source: sasview/src/sas/sascalc/pr/invertor.py @ b9d74f3

Last change on this file since b9d74f3 was b9d74f3, checked in by andyfaff, 8 years ago

MAINT: use raise Exception() not raise Exception

  • Property mode set to 100644
File size: 25.3 KB
Line 
1# pylint: disable=invalid-name
2"""
3Module to perform P(r) inversion.
4The module contains the Invertor class.
5
6FIXME: The way the Invertor interacts with its C component should be cleaned up
7"""
8
9import numpy as np
10import sys
11import math
12import time
13import copy
14import os
15import re
16import logging
17from numpy.linalg import lstsq
18from scipy import optimize
19from sas.sascalc.pr.core.pr_inversion import Cinvertor
20
21logger = logging.getLogger(__name__)
22
23def help():
24    """
25    Provide general online help text
26    Future work: extend this function to allow topic selection
27    """
28    info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
29    info_txt += "(1980) 13, 168-175.\n\n"
30    info_txt += "P(r) is set to be equal to an expansion of base functions "
31    info_txt += "of the type "
32    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each "
33    info_txt += "base functions "
34    info_txt += "in the expansion is found by performing a least square fit "
35    info_txt += "with the "
36    info_txt += "following fit function:\n\n"
37    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 +"
38    info_txt += "Reg_term\n\n"
39    info_txt += "where I_meas(q) is the measured scattering intensity and "
40    info_txt += "I_th(q) is "
41    info_txt += "the prediction from the Fourier transform of the P(r) "
42    info_txt += "expansion. "
43    info_txt += "The Reg_term term is a regularization term set to the second"
44    info_txt += " derivative "
45    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce "
46    info_txt += "a smooth P(r) output.\n\n"
47    info_txt += "The following are user inputs:\n\n"
48    info_txt += "   - Number of terms: the number of base functions in the P(r)"
49    info_txt += " expansion.\n\n"
50    info_txt += "   - Regularization constant: a multiplicative constant "
51    info_txt += "to set the size of "
52    info_txt += "the regularization term.\n\n"
53    info_txt += "   - Maximum distance: the maximum distance between any "
54    info_txt += "two points in the system.\n"
55
56    return info_txt
57
58
59class Invertor(Cinvertor):
60    """
61    Invertor class to perform P(r) inversion
62
63    The problem is solved by posing the problem as  Ax = b,
64    where x is the set of coefficients we are looking for.
65
66    Npts is the number of points.
67
68    In the following i refers to the ith base function coefficient.
69    The matrix has its entries j in its first Npts rows set to ::
70
71        A[j][i] = (Fourier transformed base function for point j)
72
73    We them choose a number of r-points, n_r, to evaluate the second
74    derivative of P(r) at. This is used as our regularization term.
75    For a vector r of length n_r, the following n_r rows are set to ::
76
77        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
78        evaluated at r[j])
79
80    The vector b has its first Npts entries set to ::
81
82        b[j] = (I(q) observed for point j)
83
84    The following n_r entries are set to zero.
85
86    The result is found by using scipy.linalg.basic.lstsq to invert
87    the matrix and find the coefficients x.
88
89    Methods inherited from Cinvertor:
90
91    * ``get_peaks(pars)``: returns the number of P(r) peaks
92    * ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
93    * ``get_positive(pars)``: returns the fraction of P(r) that is above zero
94    * ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
95    """
96    ## Chisqr of the last computation
97    chi2 = 0
98    ## Time elapsed for last computation
99    elapsed = 0
100    ## Alpha to get the reg term the same size as the signal
101    suggested_alpha = 0
102    ## Last number of base functions used
103    nfunc = 10
104    ## Last output values
105    out = None
106    ## Last errors on output values
107    cov = None
108    ## Background value
109    background = 0
110    ## Information dictionary for application use
111    info = {}
112
113    def __init__(self):
114        Cinvertor.__init__(self)
115
116    def __setstate__(self, state):
117        """
118        restore the state of invertor for pickle
119        """
120        (self.__dict__, self.alpha, self.d_max,
121         self.q_min, self.q_max,
122         self.x, self.y,
123         self.err, self.has_bck,
124         self.slit_height, self.slit_width) = state
125
126    def __reduce_ex__(self, proto):
127        """
128        Overwrite the __reduce_ex__
129        """
130
131        state = (self.__dict__,
132                 self.alpha, self.d_max,
133                 self.q_min, self.q_max,
134                 self.x, self.y,
135                 self.err, self.has_bck,
136                 self.slit_height, self.slit_width,
137                )
138        return (Invertor, tuple(), state, None, None)
139
140    def __setattr__(self, name, value):
141        """
142        Set the value of an attribute.
143        Access the parent class methods for
144        x, y, err, d_max, q_min, q_max and alpha
145        """
146        if   name == 'x':
147            if 0.0 in value:
148                msg = "Invertor: one of your q-values is zero. "
149                msg += "Delete that entry before proceeding"
150                raise ValueError(msg)
151            return self.set_x(value)
152        elif name == 'y':
153            return self.set_y(value)
154        elif name == 'err':
155            value2 = abs(value)
156            return self.set_err(value2)
157        elif name == 'd_max':
158            if value <= 0.0:
159                msg = "Invertor: d_max must be greater than zero."
160                msg += "Correct that entry before proceeding"
161                raise ValueError(msg)
162            return self.set_dmax(value)
163        elif name == 'q_min':
164            if value is None:
165                return self.set_qmin(-1.0)
166            return self.set_qmin(value)
167        elif name == 'q_max':
168            if value is None:
169                return self.set_qmax(-1.0)
170            return self.set_qmax(value)
171        elif name == 'alpha':
172            return self.set_alpha(value)
173        elif name == 'slit_height':
174            return self.set_slit_height(value)
175        elif name == 'slit_width':
176            return self.set_slit_width(value)
177        elif name == 'has_bck':
178            if value == True:
179                return self.set_has_bck(1)
180            elif value == False:
181                return self.set_has_bck(0)
182            else:
183                raise ValueError("Invertor: has_bck can only be True or False")
184
185        return Cinvertor.__setattr__(self, name, value)
186
187    def __getattr__(self, name):
188        """
189        Return the value of an attribute
190        """
191        #import numpy
192        if name == 'x':
193            out = np.ones(self.get_nx())
194            self.get_x(out)
195            return out
196        elif name == 'y':
197            out = np.ones(self.get_ny())
198            self.get_y(out)
199            return out
200        elif name == 'err':
201            out = np.ones(self.get_nerr())
202            self.get_err(out)
203            return out
204        elif name == 'd_max':
205            return self.get_dmax()
206        elif name == 'q_min':
207            qmin = self.get_qmin()
208            if qmin < 0:
209                return None
210            return qmin
211        elif name == 'q_max':
212            qmax = self.get_qmax()
213            if qmax < 0:
214                return None
215            return qmax
216        elif name == 'alpha':
217            return self.get_alpha()
218        elif name == 'slit_height':
219            return self.get_slit_height()
220        elif name == 'slit_width':
221            return self.get_slit_width()
222        elif name == 'has_bck':
223            value = self.get_has_bck()
224            if value == 1:
225                return True
226            else:
227                return False
228        elif name in self.__dict__:
229            return self.__dict__[name]
230        return None
231
232    def clone(self):
233        """
234        Return a clone of this instance
235        """
236        #import copy
237
238        invertor = Invertor()
239        invertor.chi2 = self.chi2
240        invertor.elapsed = self.elapsed
241        invertor.nfunc = self.nfunc
242        invertor.alpha = self.alpha
243        invertor.d_max = self.d_max
244        invertor.q_min = self.q_min
245        invertor.q_max = self.q_max
246
247        invertor.x = self.x
248        invertor.y = self.y
249        invertor.err = self.err
250        invertor.has_bck = self.has_bck
251        invertor.slit_height = self.slit_height
252        invertor.slit_width = self.slit_width
253
254        invertor.info = copy.deepcopy(self.info)
255
256        return invertor
257
258    def invert(self, nfunc=10, nr=20):
259        """
260        Perform inversion to P(r)
261
262        The problem is solved by posing the problem as  Ax = b,
263        where x is the set of coefficients we are looking for.
264
265        Npts is the number of points.
266
267        In the following i refers to the ith base function coefficient.
268        The matrix has its entries j in its first Npts rows set to ::
269
270            A[i][j] = (Fourier transformed base function for point j)
271
272        We them choose a number of r-points, n_r, to evaluate the second
273        derivative of P(r) at. This is used as our regularization term.
274        For a vector r of length n_r, the following n_r rows are set to ::
275
276            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
277
278        The vector b has its first Npts entries set to ::
279
280            b[j] = (I(q) observed for point j)
281
282        The following n_r entries are set to zero.
283
284        The result is found by using scipy.linalg.basic.lstsq to invert
285        the matrix and find the coefficients x.
286
287        :param nfunc: number of base functions to use.
288        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
289        :return: c_out, c_cov - the coefficients with covariance matrix
290        """
291        # Reset the background value before proceeding
292        self.background = 0.0
293        return self.lstsq(nfunc, nr=nr)
294
295    def iq(self, out, q):
296        """
297        Function to call to evaluate the scattering intensity
298
299        :param args: c-parameters, and q
300        :return: I(q)
301
302        """
303        return Cinvertor.iq(self, out, q) + self.background
304
305    def invert_optimize(self, nfunc=10, nr=20):
306        """
307        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
308
309        This probably produce more reliable results, but is much slower.
310        The minimization function is set to
311        sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
312        where the reg_term is given by Svergun: it is the integral of
313        the square of the first derivative
314        of P(r), d(P(r))/dr, integrated over the full range of r.
315
316        :param nfunc: number of base functions to use.
317        :param nr: number of r points to evaluate the 2nd derivative at
318            for the reg. term.
319
320        :return: c_out, c_cov - the coefficients with covariance matrix
321
322        """
323        self.nfunc = nfunc
324        # First, check that the current data is valid
325        if self.is_valid() <= 0:
326            msg = "Invertor.invert: Data array are of different length"
327            raise RuntimeError(msg)
328
329        p = np.ones(nfunc)
330        t_0 = time.time()
331        out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
332
333        # Compute chi^2
334        res = self.residuals(out)
335        chisqr = 0
336        for i in range(len(res)):
337            chisqr += res[i]
338
339        self.chi2 = chisqr
340
341        # Store computation time
342        self.elapsed = time.time() - t_0
343
344        if cov_x is None:
345            cov_x = np.ones([nfunc, nfunc])
346            cov_x *= math.fabs(chisqr)
347        return out, cov_x
348
349    def pr_fit(self, nfunc=5):
350        """
351        This is a direct fit to a given P(r). It assumes that the y data
352        is set to some P(r) distribution that we are trying to reproduce
353        with a set of base functions.
354
355        This method is provided as a test.
356        """
357        # First, check that the current data is valid
358        if self.is_valid() <= 0:
359            msg = "Invertor.invert: Data arrays are of different length"
360            raise RuntimeError(msg)
361
362        p = np.ones(nfunc)
363        t_0 = time.time()
364        out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
365
366        # Compute chi^2
367        res = self.pr_residuals(out)
368        chisqr = 0
369        for i in range(len(res)):
370            chisqr += res[i]
371
372        self.chisqr = chisqr
373
374        # Store computation time
375        self.elapsed = time.time() - t_0
376
377        return out, cov_x
378
379    def pr_err(self, c, c_cov, r):
380        """
381        Returns the value of P(r) for a given r, and base function
382        coefficients, with error.
383
384        :param c: base function coefficients
385        :param c_cov: covariance matrice of the base function coefficients
386        :param r: r-value to evaluate P(r) at
387
388        :return: P(r)
389
390        """
391        return self.get_pr_err(c, c_cov, r)
392
393    def _accept_q(self, q):
394        """
395        Check q-value against user-defined range
396        """
397        if self.q_min is not None and q < self.q_min:
398            return False
399        if self.q_max is not None and q > self.q_max:
400            return False
401        return True
402
403    def lstsq(self, nfunc=5, nr=20):
404        """
405        The problem is solved by posing the problem as  Ax = b,
406        where x is the set of coefficients we are looking for.
407
408        Npts is the number of points.
409
410        In the following i refers to the ith base function coefficient.
411        The matrix has its entries j in its first Npts rows set to ::
412
413            A[i][j] = (Fourier transformed base function for point j)
414
415        We them choose a number of r-points, n_r, to evaluate the second
416        derivative of P(r) at. This is used as our regularization term.
417        For a vector r of length n_r, the following n_r rows are set to ::
418
419            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
420            evaluated at r[j])
421
422        The vector b has its first Npts entries set to ::
423
424            b[j] = (I(q) observed for point j)
425
426        The following n_r entries are set to zero.
427
428        The result is found by using scipy.linalg.basic.lstsq to invert
429        the matrix and find the coefficients x.
430
431        :param nfunc: number of base functions to use.
432        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
433
434        If the result does not allow us to compute the covariance matrix,
435        a matrix filled with zeros will be returned.
436
437        """
438        # Note: To make sure an array is contiguous:
439        # blah = np.ascontiguousarray(blah_original)
440        # ... before passing it to C
441
442        if self.is_valid() < 0:
443            msg = "Invertor: invalid data; incompatible data lengths."
444            raise RuntimeError(msg)
445
446        self.nfunc = nfunc
447        # a -- An M x N matrix.
448        # b -- An M x nrhs matrix or M vector.
449        npts = len(self.x)
450        nq = nr
451        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
452        if sqrt_alpha < 0.0:
453            nq = 0
454
455        # If we need to fit the background, add a term
456        if self.has_bck == True:
457            nfunc_0 = nfunc
458            nfunc += 1
459
460        a = np.zeros([npts + nq, nfunc])
461        b = np.zeros(npts + nq)
462        err = np.zeros([nfunc, nfunc])
463
464        # Construct the a matrix and b vector that represent the problem
465        t_0 = time.time()
466        try:
467            self._get_matrix(nfunc, nq, a, b)
468        except:
469            raise RuntimeError("Invertor: could not invert I(Q)\n  %s" % sys.exc_value)
470
471        # Perform the inversion (least square fit)
472        c, chi2, _, _ = lstsq(a, b)
473        # Sanity check
474        try:
475            float(chi2)
476        except:
477            chi2 = -1.0
478        self.chi2 = chi2
479
480        inv_cov = np.zeros([nfunc, nfunc])
481        # Get the covariance matrix, defined as inv_cov = a_transposed * a
482        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
483
484        # Compute the reg term size for the output
485        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
486
487        if math.fabs(self.alpha) > 0:
488            new_alpha = sum_sig / (sum_reg / self.alpha)
489        else:
490            new_alpha = 0.0
491        self.suggested_alpha = new_alpha
492
493        try:
494            cov = np.linalg.pinv(inv_cov)
495            err = math.fabs(chi2 / float(npts - nfunc)) * cov
496        except:
497            # We were not able to estimate the errors
498            # Return an empty error matrix
499            logger.error(sys.exc_value)
500
501        # Keep a copy of the last output
502        if self.has_bck == False:
503            self.background = 0
504            self.out = c
505            self.cov = err
506        else:
507            self.background = c[0]
508
509            err_0 = np.zeros([nfunc, nfunc])
510            c_0 = np.zeros(nfunc)
511
512            for i in range(nfunc_0):
513                c_0[i] = c[i + 1]
514                for j in range(nfunc_0):
515                    err_0[i][j] = err[i + 1][j + 1]
516
517            self.out = c_0
518            self.cov = err_0
519
520        # Store computation time
521        self.elapsed = time.time() - t_0
522
523        return self.out, self.cov
524
525    def estimate_numterms(self, isquit_func=None):
526        """
527        Returns a reasonable guess for the
528        number of terms
529
530        :param isquit_func:
531          reference to thread function to call to check whether the computation needs to
532          be stopped.
533
534        :return: number of terms, alpha, message
535
536        """
537        from num_term import NTermEstimator
538        estimator = NTermEstimator(self.clone())
539        try:
540            return estimator.num_terms(isquit_func)
541        except:
542            # If we fail, estimate alpha and return the default
543            # number of terms
544            best_alpha, _, _ = self.estimate_alpha(self.nfunc)
545            logger.warning("Invertor.estimate_numterms: %s" % sys.exc_value)
546            return self.nfunc, best_alpha, "Could not estimate number of terms"
547
548    def estimate_alpha(self, nfunc):
549        """
550        Returns a reasonable guess for the
551        regularization constant alpha
552
553        :param nfunc: number of terms to use in the expansion.
554
555        :return: alpha, message, elapsed
556
557        where alpha is the estimate for alpha,
558        message is a message for the user,
559        elapsed is the computation time
560        """
561        #import time
562        try:
563            pr = self.clone()
564
565            # T_0 for computation time
566            starttime = time.time()
567            elapsed = 0
568
569            # If the current alpha is zero, try
570            # another value
571            if pr.alpha <= 0:
572                pr.alpha = 0.0001
573
574            # Perform inversion to find the largest alpha
575            out, _ = pr.invert(nfunc)
576            elapsed = time.time() - starttime
577            initial_alpha = pr.alpha
578            initial_peaks = pr.get_peaks(out)
579
580            # Try the inversion with the estimated alpha
581            pr.alpha = pr.suggested_alpha
582            out, _ = pr.invert(nfunc)
583
584            npeaks = pr.get_peaks(out)
585            # if more than one peak to start with
586            # just return the estimate
587            if npeaks > 1:
588                #message = "Your P(r) is not smooth,
589                #please check your inversion parameters"
590                message = None
591                return pr.suggested_alpha, message, elapsed
592            else:
593
594                # Look at smaller values
595                # We assume that for the suggested alpha, we have 1 peak
596                # if not, send a message to change parameters
597                alpha = pr.suggested_alpha
598                best_alpha = pr.suggested_alpha
599                found = False
600                for i in range(10):
601                    pr.alpha = (0.33) ** (i + 1) * alpha
602                    out, _ = pr.invert(nfunc)
603
604                    peaks = pr.get_peaks(out)
605                    if peaks > 1:
606                        found = True
607                        break
608                    best_alpha = pr.alpha
609
610                # If we didn't find a turning point for alpha and
611                # the initial alpha already had only one peak,
612                # just return that
613                if not found and initial_peaks == 1 and \
614                    initial_alpha < best_alpha:
615                    best_alpha = initial_alpha
616
617                # Check whether the size makes sense
618                message = ''
619
620                if not found:
621                    message = None
622                elif best_alpha >= 0.5 * pr.suggested_alpha:
623                    # best alpha is too big, return a
624                    # reasonable value
625                    message = "The estimated alpha for your system is too "
626                    message += "large. "
627                    message += "Try increasing your maximum distance."
628
629                return best_alpha, message, elapsed
630
631        except:
632            message = "Invertor.estimate_alpha: %s" % sys.exc_value
633            return 0, message, elapsed
634
635    def to_file(self, path, npts=100):
636        """
637        Save the state to a file that will be readable
638        by SliceView.
639
640        :param path: path of the file to write
641        :param npts: number of P(r) points to be written
642
643        """
644        file = open(path, 'w')
645        file.write("#d_max=%g\n" % self.d_max)
646        file.write("#nfunc=%g\n" % self.nfunc)
647        file.write("#alpha=%g\n" % self.alpha)
648        file.write("#chi2=%g\n" % self.chi2)
649        file.write("#elapsed=%g\n" % self.elapsed)
650        file.write("#qmin=%s\n" % str(self.q_min))
651        file.write("#qmax=%s\n" % str(self.q_max))
652        file.write("#slit_height=%g\n" % self.slit_height)
653        file.write("#slit_width=%g\n" % self.slit_width)
654        file.write("#background=%g\n" % self.background)
655        if self.has_bck == True:
656            file.write("#has_bck=1\n")
657        else:
658            file.write("#has_bck=0\n")
659        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
660        if self.out is not None:
661            if len(self.out) == len(self.cov):
662                for i in range(len(self.out)):
663                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
664                                                   str(self.cov[i][i])))
665        file.write("<r>  <Pr>  <dPr>\n")
666        r = np.arange(0.0, self.d_max, self.d_max / npts)
667
668        for r_i in r:
669            (value, err) = self.pr_err(self.out, self.cov, r_i)
670            file.write("%g  %g  %g\n" % (r_i, value, err))
671
672        file.close()
673
674    def from_file(self, path):
675        """
676        Load the state of the Invertor from a file,
677        to be able to generate P(r) from a set of
678        parameters.
679
680        :param path: path of the file to load
681
682        """
683        #import os
684        #import re
685        if os.path.isfile(path):
686            try:
687                fd = open(path, 'r')
688
689                buff = fd.read()
690                lines = buff.split('\n')
691                for line in lines:
692                    if line.startswith('#d_max='):
693                        toks = line.split('=')
694                        self.d_max = float(toks[1])
695                    elif line.startswith('#nfunc='):
696                        toks = line.split('=')
697                        self.nfunc = int(toks[1])
698                        self.out = np.zeros(self.nfunc)
699                        self.cov = np.zeros([self.nfunc, self.nfunc])
700                    elif line.startswith('#alpha='):
701                        toks = line.split('=')
702                        self.alpha = float(toks[1])
703                    elif line.startswith('#chi2='):
704                        toks = line.split('=')
705                        self.chi2 = float(toks[1])
706                    elif line.startswith('#elapsed='):
707                        toks = line.split('=')
708                        self.elapsed = float(toks[1])
709                    elif line.startswith('#alpha_estimate='):
710                        toks = line.split('=')
711                        self.suggested_alpha = float(toks[1])
712                    elif line.startswith('#qmin='):
713                        toks = line.split('=')
714                        try:
715                            self.q_min = float(toks[1])
716                        except:
717                            self.q_min = None
718                    elif line.startswith('#qmax='):
719                        toks = line.split('=')
720                        try:
721                            self.q_max = float(toks[1])
722                        except:
723                            self.q_max = None
724                    elif line.startswith('#slit_height='):
725                        toks = line.split('=')
726                        self.slit_height = float(toks[1])
727                    elif line.startswith('#slit_width='):
728                        toks = line.split('=')
729                        self.slit_width = float(toks[1])
730                    elif line.startswith('#background='):
731                        toks = line.split('=')
732                        self.background = float(toks[1])
733                    elif line.startswith('#has_bck='):
734                        toks = line.split('=')
735                        if int(toks[1]) == 1:
736                            self.has_bck = True
737                        else:
738                            self.has_bck = False
739
740                    # Now read in the parameters
741                    elif line.startswith('#C_'):
742                        toks = line.split('=')
743                        p = re.compile('#C_([0-9]+)')
744                        m = p.search(toks[0])
745                        toks2 = toks[1].split('+-')
746                        i = int(m.group(1))
747                        self.out[i] = float(toks2[0])
748
749                        self.cov[i][i] = float(toks2[1])
750
751            except:
752                msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
753                raise RuntimeError(msg)
754        else:
755            msg = "Invertor.from_file: '%s' is not a file" % str(path)
756            raise RuntimeError(msg)
Note: See TracBrowser for help on using the repository browser.