source: sasview/src/sas/sascalc/pr/invertor.py @ 4ba4b69

Last change on this file since 4ba4b69 was 2c60f304, checked in by ajj, 8 years ago

Fixing issue with dmax in P(r). Now logs value error rather than crashing.

  • Property mode set to 100644
File size: 25.4 KB
Line 
1# pylint: disable=invalid-name
2"""
3Module to perform P(r) inversion.
4The module contains the Invertor class.
5
6FIXME: The way the Invertor interacts with its C component should be cleaned up
7"""
8
9import numpy
10import sys
11import math
12import time
13import copy
14import os
15import re
16import logging
17from numpy.linalg import lstsq
18from scipy import optimize
19from sas.sascalc.pr.core.pr_inversion import Cinvertor
20
21def help():
22    """
23    Provide general online help text
24    Future work: extend this function to allow topic selection
25    """
26    info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
27    info_txt += "(1980) 13, 168-175.\n\n"
28    info_txt += "P(r) is set to be equal to an expansion of base functions "
29    info_txt += "of the type "
30    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each "
31    info_txt += "base functions "
32    info_txt += "in the expansion is found by performing a least square fit "
33    info_txt += "with the "
34    info_txt += "following fit function:\n\n"
35    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 +"
36    info_txt += "Reg_term\n\n"
37    info_txt += "where I_meas(q) is the measured scattering intensity and "
38    info_txt += "I_th(q) is "
39    info_txt += "the prediction from the Fourier transform of the P(r) "
40    info_txt += "expansion. "
41    info_txt += "The Reg_term term is a regularization term set to the second"
42    info_txt += " derivative "
43    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce "
44    info_txt += "a smooth P(r) output.\n\n"
45    info_txt += "The following are user inputs:\n\n"
46    info_txt += "   - Number of terms: the number of base functions in the P(r)"
47    info_txt += " expansion.\n\n"
48    info_txt += "   - Regularization constant: a multiplicative constant "
49    info_txt += "to set the size of "
50    info_txt += "the regularization term.\n\n"
51    info_txt += "   - Maximum distance: the maximum distance between any "
52    info_txt += "two points in the system.\n"
53
54    return info_txt
55
56
57class Invertor(Cinvertor):
58    """
59    Invertor class to perform P(r) inversion
60
61    The problem is solved by posing the problem as  Ax = b,
62    where x is the set of coefficients we are looking for.
63
64    Npts is the number of points.
65
66    In the following i refers to the ith base function coefficient.
67    The matrix has its entries j in its first Npts rows set to ::
68
69        A[j][i] = (Fourier transformed base function for point j)
70
71    We them choose a number of r-points, n_r, to evaluate the second
72    derivative of P(r) at. This is used as our regularization term.
73    For a vector r of length n_r, the following n_r rows are set to ::
74
75        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
76        evaluated at r[j])
77
78    The vector b has its first Npts entries set to ::
79
80        b[j] = (I(q) observed for point j)
81
82    The following n_r entries are set to zero.
83
84    The result is found by using scipy.linalg.basic.lstsq to invert
85    the matrix and find the coefficients x.
86
87    Methods inherited from Cinvertor:
88
89    * ``get_peaks(pars)``: returns the number of P(r) peaks
90    * ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
91    * ``get_positive(pars)``: returns the fraction of P(r) that is above zero
92    * ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
93    """
94    ## Chisqr of the last computation
95    chi2 = 0
96    ## Time elapsed for last computation
97    elapsed = 0
98    ## Alpha to get the reg term the same size as the signal
99    suggested_alpha = 0
100    ## Last number of base functions used
101    nfunc = 10
102    ## Last output values
103    out = None
104    ## Last errors on output values
105    cov = None
106    ## Background value
107    background = 0
108    ## Information dictionary for application use
109    info = {}
110
111    def __init__(self):
112        Cinvertor.__init__(self)
113
114    def __setstate__(self, state):
115        """
116        restore the state of invertor for pickle
117        """
118        (self.__dict__, self.alpha, self.d_max,
119         self.q_min, self.q_max,
120         self.x, self.y,
121         self.err, self.has_bck,
122         self.slit_height, self.slit_width) = state
123
124    def __reduce_ex__(self, proto):
125        """
126        Overwrite the __reduce_ex__
127        """
128
129        state = (self.__dict__,
130                 self.alpha, self.d_max,
131                 self.q_min, self.q_max,
132                 self.x, self.y,
133                 self.err, self.has_bck,
134                 self.slit_height, self.slit_width,
135                )
136        return (Invertor, tuple(), state, None, None)
137
138    def __setattr__(self, name, value):
139        """
140        Set the value of an attribute.
141        Access the parent class methods for
142        x, y, err, d_max, q_min, q_max and alpha
143        """
144        if   name == 'x':
145            if 0.0 in value:
146                msg = "Invertor: one of your q-values is zero. "
147                msg += "Delete that entry before proceeding"
148                raise ValueError, msg
149            return self.set_x(value)
150        elif name == 'y':
151            return self.set_y(value)
152        elif name == 'err':
153            value2 = abs(value)
154            return self.set_err(value2)
155        elif name == 'd_max':
156            if value <= 0.0:
157                msg = "Invertor: d_max must be greater than zero."
158                msg += "Correct that entry before proceeding"
159                raise ValueError, msg
160            return self.set_dmax(value)
161        elif name == 'q_min':
162            if value == None:
163                return self.set_qmin(-1.0)
164            return self.set_qmin(value)
165        elif name == 'q_max':
166            if value == None:
167                return self.set_qmax(-1.0)
168            return self.set_qmax(value)
169        elif name == 'alpha':
170            return self.set_alpha(value)
171        elif name == 'slit_height':
172            return self.set_slit_height(value)
173        elif name == 'slit_width':
174            return self.set_slit_width(value)
175        elif name == 'has_bck':
176            if value == True:
177                return self.set_has_bck(1)
178            elif value == False:
179                return self.set_has_bck(0)
180            else:
181                raise ValueError, "Invertor: has_bck can only be True or False"
182
183        return Cinvertor.__setattr__(self, name, value)
184
185    def __getattr__(self, name):
186        """
187        Return the value of an attribute
188        """
189        #import numpy
190        if name == 'x':
191            out = numpy.ones(self.get_nx())
192            self.get_x(out)
193            return out
194        elif name == 'y':
195            out = numpy.ones(self.get_ny())
196            self.get_y(out)
197            return out
198        elif name == 'err':
199            out = numpy.ones(self.get_nerr())
200            self.get_err(out)
201            return out
202        elif name == 'd_max':
203            return self.get_dmax()
204        elif name == 'q_min':
205            qmin = self.get_qmin()
206            if qmin < 0:
207                return None
208            return qmin
209        elif name == 'q_max':
210            qmax = self.get_qmax()
211            if qmax < 0:
212                return None
213            return qmax
214        elif name == 'alpha':
215            return self.get_alpha()
216        elif name == 'slit_height':
217            return self.get_slit_height()
218        elif name == 'slit_width':
219            return self.get_slit_width()
220        elif name == 'has_bck':
221            value = self.get_has_bck()
222            if value == 1:
223                return True
224            else:
225                return False
226        elif name in self.__dict__:
227            return self.__dict__[name]
228        return None
229
230    def clone(self):
231        """
232        Return a clone of this instance
233        """
234        #import copy
235
236        invertor = Invertor()
237        invertor.chi2 = self.chi2
238        invertor.elapsed = self.elapsed
239        invertor.nfunc = self.nfunc
240        invertor.alpha = self.alpha
241        invertor.d_max = self.d_max
242        invertor.q_min = self.q_min
243        invertor.q_max = self.q_max
244
245        invertor.x = self.x
246        invertor.y = self.y
247        invertor.err = self.err
248        invertor.has_bck = self.has_bck
249        invertor.slit_height = self.slit_height
250        invertor.slit_width = self.slit_width
251
252        invertor.info = copy.deepcopy(self.info)
253
254        return invertor
255
256    def invert(self, nfunc=10, nr=20):
257        """
258        Perform inversion to P(r)
259
260        The problem is solved by posing the problem as  Ax = b,
261        where x is the set of coefficients we are looking for.
262
263        Npts is the number of points.
264
265        In the following i refers to the ith base function coefficient.
266        The matrix has its entries j in its first Npts rows set to ::
267
268            A[i][j] = (Fourier transformed base function for point j)
269
270        We them choose a number of r-points, n_r, to evaluate the second
271        derivative of P(r) at. This is used as our regularization term.
272        For a vector r of length n_r, the following n_r rows are set to ::
273
274            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
275
276        The vector b has its first Npts entries set to ::
277
278            b[j] = (I(q) observed for point j)
279
280        The following n_r entries are set to zero.
281
282        The result is found by using scipy.linalg.basic.lstsq to invert
283        the matrix and find the coefficients x.
284
285        :param nfunc: number of base functions to use.
286        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
287        :return: c_out, c_cov - the coefficients with covariance matrix
288        """
289        # Reset the background value before proceeding
290        self.background = 0.0
291        return self.lstsq(nfunc, nr=nr)
292
293    def iq(self, out, q):
294        """
295        Function to call to evaluate the scattering intensity
296
297        :param args: c-parameters, and q
298        :return: I(q)
299
300        """
301        return Cinvertor.iq(self, out, q) + self.background
302
303    def invert_optimize(self, nfunc=10, nr=20):
304        """
305        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
306
307        This probably produce more reliable results, but is much slower.
308        The minimization function is set to
309        sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
310        where the reg_term is given by Svergun: it is the integral of
311        the square of the first derivative
312        of P(r), d(P(r))/dr, integrated over the full range of r.
313
314        :param nfunc: number of base functions to use.
315        :param nr: number of r points to evaluate the 2nd derivative at
316            for the reg. term.
317
318        :return: c_out, c_cov - the coefficients with covariance matrix
319
320        """
321        self.nfunc = nfunc
322        # First, check that the current data is valid
323        if self.is_valid() <= 0:
324            msg = "Invertor.invert: Data array are of different length"
325            raise RuntimeError, msg
326
327        p = numpy.ones(nfunc)
328        t_0 = time.time()
329        out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
330
331        # Compute chi^2
332        res = self.residuals(out)
333        chisqr = 0
334        for i in range(len(res)):
335            chisqr += res[i]
336
337        self.chi2 = chisqr
338
339        # Store computation time
340        self.elapsed = time.time() - t_0
341
342        if cov_x is None:
343            cov_x = numpy.ones([nfunc, nfunc])
344            cov_x *= math.fabs(chisqr)
345        return out, cov_x
346
347    def pr_fit(self, nfunc=5):
348        """
349        This is a direct fit to a given P(r). It assumes that the y data
350        is set to some P(r) distribution that we are trying to reproduce
351        with a set of base functions.
352
353        This method is provided as a test.
354        """
355        # First, check that the current data is valid
356        if self.is_valid() <= 0:
357            msg = "Invertor.invert: Data arrays are of different length"
358            raise RuntimeError, msg
359
360        p = numpy.ones(nfunc)
361        t_0 = time.time()
362        out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
363
364        # Compute chi^2
365        res = self.pr_residuals(out)
366        chisqr = 0
367        for i in range(len(res)):
368            chisqr += res[i]
369
370        self.chisqr = chisqr
371
372        # Store computation time
373        self.elapsed = time.time() - t_0
374
375        return out, cov_x
376
377    def pr_err(self, c, c_cov, r):
378        """
379        Returns the value of P(r) for a given r, and base function
380        coefficients, with error.
381
382        :param c: base function coefficients
383        :param c_cov: covariance matrice of the base function coefficients
384        :param r: r-value to evaluate P(r) at
385
386        :return: P(r)
387
388        """
389        return self.get_pr_err(c, c_cov, r)
390
391    def _accept_q(self, q):
392        """
393        Check q-value against user-defined range
394        """
395        if not self.q_min == None and q < self.q_min:
396            return False
397        if not self.q_max == None and q > self.q_max:
398            return False
399        return True
400
401    def lstsq(self, nfunc=5, nr=20):
402        """
403        The problem is solved by posing the problem as  Ax = b,
404        where x is the set of coefficients we are looking for.
405
406        Npts is the number of points.
407
408        In the following i refers to the ith base function coefficient.
409        The matrix has its entries j in its first Npts rows set to ::
410
411            A[i][j] = (Fourier transformed base function for point j)
412
413        We them choose a number of r-points, n_r, to evaluate the second
414        derivative of P(r) at. This is used as our regularization term.
415        For a vector r of length n_r, the following n_r rows are set to ::
416
417            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
418            evaluated at r[j])
419
420        The vector b has its first Npts entries set to ::
421
422            b[j] = (I(q) observed for point j)
423
424        The following n_r entries are set to zero.
425
426        The result is found by using scipy.linalg.basic.lstsq to invert
427        the matrix and find the coefficients x.
428
429        :param nfunc: number of base functions to use.
430        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
431
432        If the result does not allow us to compute the covariance matrix,
433        a matrix filled with zeros will be returned.
434
435        """
436        # Note: To make sure an array is contiguous:
437        # blah = numpy.ascontiguousarray(blah_original)
438        # ... before passing it to C
439
440        if self.is_valid() < 0:
441            msg = "Invertor: invalid data; incompatible data lengths."
442            raise RuntimeError, msg
443
444        self.nfunc = nfunc
445        # a -- An M x N matrix.
446        # b -- An M x nrhs matrix or M vector.
447        npts = len(self.x)
448        nq = nr
449        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
450        if sqrt_alpha < 0.0:
451            nq = 0
452
453        # If we need to fit the background, add a term
454        if self.has_bck == True:
455            nfunc_0 = nfunc
456            nfunc += 1
457
458        a = numpy.zeros([npts + nq, nfunc])
459        b = numpy.zeros(npts + nq)
460        err = numpy.zeros([nfunc, nfunc])
461
462        # Construct the a matrix and b vector that represent the problem
463        t_0 = time.time()
464        try:
465            self._get_matrix(nfunc, nq, a, b)
466        except:
467            raise RuntimeError, "Invertor: could not invert I(Q)\n  %s" % sys.exc_value
468
469        # Perform the inversion (least square fit)
470        c, chi2, _, _ = lstsq(a, b)
471        # Sanity check
472        try:
473            float(chi2)
474        except:
475            chi2 = -1.0
476        self.chi2 = chi2
477
478        inv_cov = numpy.zeros([nfunc, nfunc])
479        # Get the covariance matrix, defined as inv_cov = a_transposed * a
480        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
481
482        # Compute the reg term size for the output
483        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
484
485        if math.fabs(self.alpha) > 0:
486            new_alpha = sum_sig / (sum_reg / self.alpha)
487        else:
488            new_alpha = 0.0
489        self.suggested_alpha = new_alpha
490
491        try:
492            cov = numpy.linalg.pinv(inv_cov)
493            err = math.fabs(chi2 / float(npts - nfunc)) * cov
494        except:
495            # We were not able to estimate the errors
496            # Return an empty error matrix
497            logging.error(sys.exc_value)
498
499        # Keep a copy of the last output
500        if self.has_bck == False:
501            self.background = 0
502            self.out = c
503            self.cov = err
504        else:
505            self.background = c[0]
506
507            err_0 = numpy.zeros([nfunc, nfunc])
508            c_0 = numpy.zeros(nfunc)
509
510            for i in range(nfunc_0):
511                c_0[i] = c[i + 1]
512                for j in range(nfunc_0):
513                    err_0[i][j] = err[i + 1][j + 1]
514
515            self.out = c_0
516            self.cov = err_0
517
518        # Store computation time
519        self.elapsed = time.time() - t_0
520
521        return self.out, self.cov
522
523    def estimate_numterms(self, isquit_func=None):
524        """
525        Returns a reasonable guess for the
526        number of terms
527
528        :param isquit_func:
529          reference to thread function to call to check whether the computation needs to
530          be stopped.
531
532        :return: number of terms, alpha, message
533
534        """
535        from num_term import NTermEstimator
536        estimator = NTermEstimator(self.clone())
537        try:
538            return estimator.num_terms(isquit_func)
539        except:
540            # If we fail, estimate alpha and return the default
541            # number of terms
542            best_alpha, _, _ = self.estimate_alpha(self.nfunc)
543            logging.warning("Invertor.estimate_numterms: %s" % sys.exc_value)
544            return self.nfunc, best_alpha, "Could not estimate number of terms"
545
546    def estimate_alpha(self, nfunc):
547        """
548        Returns a reasonable guess for the
549        regularization constant alpha
550
551        :param nfunc: number of terms to use in the expansion.
552
553        :return: alpha, message, elapsed
554
555        where alpha is the estimate for alpha,
556        message is a message for the user,
557        elapsed is the computation time
558        """
559        #import time
560        try:
561            pr = self.clone()
562
563            # T_0 for computation time
564            starttime = time.time()
565            elapsed = 0
566
567            # If the current alpha is zero, try
568            # another value
569            if pr.alpha <= 0:
570                pr.alpha = 0.0001
571
572            # Perform inversion to find the largest alpha
573            out, _ = pr.invert(nfunc)
574            elapsed = time.time() - starttime
575            initial_alpha = pr.alpha
576            initial_peaks = pr.get_peaks(out)
577
578            # Try the inversion with the estimated alpha
579            pr.alpha = pr.suggested_alpha
580            out, _ = pr.invert(nfunc)
581
582            npeaks = pr.get_peaks(out)
583            # if more than one peak to start with
584            # just return the estimate
585            if npeaks > 1:
586                #message = "Your P(r) is not smooth,
587                #please check your inversion parameters"
588                message = None
589                return pr.suggested_alpha, message, elapsed
590            else:
591
592                # Look at smaller values
593                # We assume that for the suggested alpha, we have 1 peak
594                # if not, send a message to change parameters
595                alpha = pr.suggested_alpha
596                best_alpha = pr.suggested_alpha
597                found = False
598                for i in range(10):
599                    pr.alpha = (0.33) ** (i + 1) * alpha
600                    out, _ = pr.invert(nfunc)
601
602                    peaks = pr.get_peaks(out)
603                    if peaks > 1:
604                        found = True
605                        break
606                    best_alpha = pr.alpha
607
608                # If we didn't find a turning point for alpha and
609                # the initial alpha already had only one peak,
610                # just return that
611                if not found and initial_peaks == 1 and \
612                    initial_alpha < best_alpha:
613                    best_alpha = initial_alpha
614
615                # Check whether the size makes sense
616                message = ''
617
618                if not found:
619                    message = None
620                elif best_alpha >= 0.5 * pr.suggested_alpha:
621                    # best alpha is too big, return a
622                    # reasonable value
623                    message = "The estimated alpha for your system is too "
624                    message += "large. "
625                    message += "Try increasing your maximum distance."
626
627                return best_alpha, message, elapsed
628
629        except:
630            message = "Invertor.estimate_alpha: %s" % sys.exc_value
631            return 0, message, elapsed
632
633    def to_file(self, path, npts=100):
634        """
635        Save the state to a file that will be readable
636        by SliceView.
637
638        :param path: path of the file to write
639        :param npts: number of P(r) points to be written
640
641        """
642        file = open(path, 'w')
643        file.write("#d_max=%g\n" % self.d_max)
644        file.write("#nfunc=%g\n" % self.nfunc)
645        file.write("#alpha=%g\n" % self.alpha)
646        file.write("#chi2=%g\n" % self.chi2)
647        file.write("#elapsed=%g\n" % self.elapsed)
648        file.write("#qmin=%s\n" % str(self.q_min))
649        file.write("#qmax=%s\n" % str(self.q_max))
650        file.write("#slit_height=%g\n" % self.slit_height)
651        file.write("#slit_width=%g\n" % self.slit_width)
652        file.write("#background=%g\n" % self.background)
653        if self.has_bck == True:
654            file.write("#has_bck=1\n")
655        else:
656            file.write("#has_bck=0\n")
657        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
658        if not self.out == None:
659            if len(self.out) == len(self.cov):
660                for i in range(len(self.out)):
661                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
662                                                   str(self.cov[i][i])))
663        file.write("<r>  <Pr>  <dPr>\n")
664        r = numpy.arange(0.0, self.d_max, self.d_max / npts)
665
666        for r_i in r:
667            (value, err) = self.pr_err(self.out, self.cov, r_i)
668            file.write("%g  %g  %g\n" % (r_i, value, err))
669
670        file.close()
671
672    def from_file(self, path):
673        """
674        Load the state of the Invertor from a file,
675        to be able to generate P(r) from a set of
676        parameters.
677
678        :param path: path of the file to load
679
680        """
681        #import os
682        #import re
683        if os.path.isfile(path):
684            try:
685                fd = open(path, 'r')
686
687                buff = fd.read()
688                lines = buff.split('\n')
689                for line in lines:
690                    if line.startswith('#d_max='):
691                        toks = line.split('=')
692                        self.d_max = float(toks[1])
693                    elif line.startswith('#nfunc='):
694                        toks = line.split('=')
695                        self.nfunc = int(toks[1])
696                        self.out = numpy.zeros(self.nfunc)
697                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
698                    elif line.startswith('#alpha='):
699                        toks = line.split('=')
700                        self.alpha = float(toks[1])
701                    elif line.startswith('#chi2='):
702                        toks = line.split('=')
703                        self.chi2 = float(toks[1])
704                    elif line.startswith('#elapsed='):
705                        toks = line.split('=')
706                        self.elapsed = float(toks[1])
707                    elif line.startswith('#alpha_estimate='):
708                        toks = line.split('=')
709                        self.suggested_alpha = float(toks[1])
710                    elif line.startswith('#qmin='):
711                        toks = line.split('=')
712                        try:
713                            self.q_min = float(toks[1])
714                        except:
715                            self.q_min = None
716                    elif line.startswith('#qmax='):
717                        toks = line.split('=')
718                        try:
719                            self.q_max = float(toks[1])
720                        except:
721                            self.q_max = None
722                    elif line.startswith('#slit_height='):
723                        toks = line.split('=')
724                        self.slit_height = float(toks[1])
725                    elif line.startswith('#slit_width='):
726                        toks = line.split('=')
727                        self.slit_width = float(toks[1])
728                    elif line.startswith('#background='):
729                        toks = line.split('=')
730                        self.background = float(toks[1])
731                    elif line.startswith('#has_bck='):
732                        toks = line.split('=')
733                        if int(toks[1]) == 1:
734                            self.has_bck = True
735                        else:
736                            self.has_bck = False
737
738                    # Now read in the parameters
739                    elif line.startswith('#C_'):
740                        toks = line.split('=')
741                        p = re.compile('#C_([0-9]+)')
742                        m = p.search(toks[0])
743                        toks2 = toks[1].split('+-')
744                        i = int(m.group(1))
745                        self.out[i] = float(toks2[0])
746
747                        self.cov[i][i] = float(toks2[1])
748
749            except:
750                msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
751                raise RuntimeError, msg
752        else:
753            msg = "Invertor.from_file: '%s' is not a file" % str(path)
754            raise RuntimeError, msg
Note: See TracBrowser for help on using the repository browser.