source: sasview/src/sas/sascalc/pr/invertor.py @ 7743c09

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalc
Last change on this file since 7743c09 was b699768, checked in by Piotr Rozyczko <piotr.rozyczko@…>, 9 years ago

Initial commit of the refactored SasCalc? module.

  • Property mode set to 100644
File size: 25.2 KB
Line 
1# pylint: disable=invalid-name
2"""
3Module to perform P(r) inversion.
4The module contains the Invertor class.
5
6FIXME: The way the Invertor interacts with its C component should be cleaned up
7"""
8
9import numpy
10import sys
11import math
12import time
13import copy
14import os
15import re
16import logging
17from numpy.linalg import lstsq
18from scipy import optimize
19from sas.sascalc.pr.core.pr_inversion import Cinvertor
20
21def help():
22    """
23    Provide general online help text
24    Future work: extend this function to allow topic selection
25    """
26    info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
27    info_txt += "(1980) 13, 168-175.\n\n"
28    info_txt += "P(r) is set to be equal to an expansion of base functions "
29    info_txt += "of the type "
30    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each "
31    info_txt += "base functions "
32    info_txt += "in the expansion is found by performing a least square fit "
33    info_txt += "with the "
34    info_txt += "following fit function:\n\n"
35    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 +"
36    info_txt += "Reg_term\n\n"
37    info_txt += "where I_meas(q) is the measured scattering intensity and "
38    info_txt += "I_th(q) is "
39    info_txt += "the prediction from the Fourier transform of the P(r) "
40    info_txt += "expansion. "
41    info_txt += "The Reg_term term is a regularization term set to the second"
42    info_txt += " derivative "
43    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce "
44    info_txt += "a smooth P(r) output.\n\n"
45    info_txt += "The following are user inputs:\n\n"
46    info_txt += "   - Number of terms: the number of base functions in the P(r)"
47    info_txt += " expansion.\n\n"
48    info_txt += "   - Regularization constant: a multiplicative constant "
49    info_txt += "to set the size of "
50    info_txt += "the regularization term.\n\n"
51    info_txt += "   - Maximum distance: the maximum distance between any "
52    info_txt += "two points in the system.\n"
53
54    return info_txt
55
56
57class Invertor(Cinvertor):
58    """
59    Invertor class to perform P(r) inversion
60
61    The problem is solved by posing the problem as  Ax = b,
62    where x is the set of coefficients we are looking for.
63
64    Npts is the number of points.
65
66    In the following i refers to the ith base function coefficient.
67    The matrix has its entries j in its first Npts rows set to ::
68
69        A[j][i] = (Fourier transformed base function for point j)
70
71    We them choose a number of r-points, n_r, to evaluate the second
72    derivative of P(r) at. This is used as our regularization term.
73    For a vector r of length n_r, the following n_r rows are set to ::
74
75        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
76        evaluated at r[j])
77
78    The vector b has its first Npts entries set to ::
79
80        b[j] = (I(q) observed for point j)
81
82    The following n_r entries are set to zero.
83
84    The result is found by using scipy.linalg.basic.lstsq to invert
85    the matrix and find the coefficients x.
86
87    Methods inherited from Cinvertor:
88
89    * ``get_peaks(pars)``: returns the number of P(r) peaks
90    * ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
91    * ``get_positive(pars)``: returns the fraction of P(r) that is above zero
92    * ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
93    """
94    ## Chisqr of the last computation
95    chi2 = 0
96    ## Time elapsed for last computation
97    elapsed = 0
98    ## Alpha to get the reg term the same size as the signal
99    suggested_alpha = 0
100    ## Last number of base functions used
101    nfunc = 10
102    ## Last output values
103    out = None
104    ## Last errors on output values
105    cov = None
106    ## Background value
107    background = 0
108    ## Information dictionary for application use
109    info = {}
110
111    def __init__(self):
112        Cinvertor.__init__(self)
113
114    def __setstate__(self, state):
115        """
116        restore the state of invertor for pickle
117        """
118        (self.__dict__, self.alpha, self.d_max,
119         self.q_min, self.q_max,
120         self.x, self.y,
121         self.err, self.has_bck,
122         self.slit_height, self.slit_width) = state
123
124    def __reduce_ex__(self, proto):
125        """
126        Overwrite the __reduce_ex__
127        """
128
129        state = (self.__dict__,
130                 self.alpha, self.d_max,
131                 self.q_min, self.q_max,
132                 self.x, self.y,
133                 self.err, self.has_bck,
134                 self.slit_height, self.slit_width,
135                )
136        return (Invertor, tuple(), state, None, None)
137
138    def __setattr__(self, name, value):
139        """
140        Set the value of an attribute.
141        Access the parent class methods for
142        x, y, err, d_max, q_min, q_max and alpha
143        """
144        if   name == 'x':
145            if 0.0 in value:
146                msg = "Invertor: one of your q-values is zero. "
147                msg += "Delete that entry before proceeding"
148                raise ValueError, msg
149            return self.set_x(value)
150        elif name == 'y':
151            return self.set_y(value)
152        elif name == 'err':
153            value2 = abs(value)
154            return self.set_err(value2)
155        elif name == 'd_max':
156            return self.set_dmax(value)
157        elif name == 'q_min':
158            if value == None:
159                return self.set_qmin(-1.0)
160            return self.set_qmin(value)
161        elif name == 'q_max':
162            if value == None:
163                return self.set_qmax(-1.0)
164            return self.set_qmax(value)
165        elif name == 'alpha':
166            return self.set_alpha(value)
167        elif name == 'slit_height':
168            return self.set_slit_height(value)
169        elif name == 'slit_width':
170            return self.set_slit_width(value)
171        elif name == 'has_bck':
172            if value == True:
173                return self.set_has_bck(1)
174            elif value == False:
175                return self.set_has_bck(0)
176            else:
177                raise ValueError, "Invertor: has_bck can only be True or False"
178
179        return Cinvertor.__setattr__(self, name, value)
180
181    def __getattr__(self, name):
182        """
183        Return the value of an attribute
184        """
185        #import numpy
186        if name == 'x':
187            out = numpy.ones(self.get_nx())
188            self.get_x(out)
189            return out
190        elif name == 'y':
191            out = numpy.ones(self.get_ny())
192            self.get_y(out)
193            return out
194        elif name == 'err':
195            out = numpy.ones(self.get_nerr())
196            self.get_err(out)
197            return out
198        elif name == 'd_max':
199            return self.get_dmax()
200        elif name == 'q_min':
201            qmin = self.get_qmin()
202            if qmin < 0:
203                return None
204            return qmin
205        elif name == 'q_max':
206            qmax = self.get_qmax()
207            if qmax < 0:
208                return None
209            return qmax
210        elif name == 'alpha':
211            return self.get_alpha()
212        elif name == 'slit_height':
213            return self.get_slit_height()
214        elif name == 'slit_width':
215            return self.get_slit_width()
216        elif name == 'has_bck':
217            value = self.get_has_bck()
218            if value == 1:
219                return True
220            else:
221                return False
222        elif name in self.__dict__:
223            return self.__dict__[name]
224        return None
225
226    def clone(self):
227        """
228        Return a clone of this instance
229        """
230        #import copy
231
232        invertor = Invertor()
233        invertor.chi2 = self.chi2
234        invertor.elapsed = self.elapsed
235        invertor.nfunc = self.nfunc
236        invertor.alpha = self.alpha
237        invertor.d_max = self.d_max
238        invertor.q_min = self.q_min
239        invertor.q_max = self.q_max
240
241        invertor.x = self.x
242        invertor.y = self.y
243        invertor.err = self.err
244        invertor.has_bck = self.has_bck
245        invertor.slit_height = self.slit_height
246        invertor.slit_width = self.slit_width
247
248        invertor.info = copy.deepcopy(self.info)
249
250        return invertor
251
252    def invert(self, nfunc=10, nr=20):
253        """
254        Perform inversion to P(r)
255
256        The problem is solved by posing the problem as  Ax = b,
257        where x is the set of coefficients we are looking for.
258
259        Npts is the number of points.
260
261        In the following i refers to the ith base function coefficient.
262        The matrix has its entries j in its first Npts rows set to ::
263
264            A[i][j] = (Fourier transformed base function for point j)
265
266        We them choose a number of r-points, n_r, to evaluate the second
267        derivative of P(r) at. This is used as our regularization term.
268        For a vector r of length n_r, the following n_r rows are set to ::
269
270            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
271
272        The vector b has its first Npts entries set to ::
273
274            b[j] = (I(q) observed for point j)
275
276        The following n_r entries are set to zero.
277
278        The result is found by using scipy.linalg.basic.lstsq to invert
279        the matrix and find the coefficients x.
280
281        :param nfunc: number of base functions to use.
282        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
283        :return: c_out, c_cov - the coefficients with covariance matrix
284        """
285        # Reset the background value before proceeding
286        self.background = 0.0
287        return self.lstsq(nfunc, nr=nr)
288
289    def iq(self, out, q):
290        """
291        Function to call to evaluate the scattering intensity
292
293        :param args: c-parameters, and q
294        :return: I(q)
295
296        """
297        return Cinvertor.iq(self, out, q) + self.background
298
299    def invert_optimize(self, nfunc=10, nr=20):
300        """
301        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
302
303        This probably produce more reliable results, but is much slower.
304        The minimization function is set to
305        sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
306        where the reg_term is given by Svergun: it is the integral of
307        the square of the first derivative
308        of P(r), d(P(r))/dr, integrated over the full range of r.
309
310        :param nfunc: number of base functions to use.
311        :param nr: number of r points to evaluate the 2nd derivative at
312            for the reg. term.
313
314        :return: c_out, c_cov - the coefficients with covariance matrix
315
316        """
317        self.nfunc = nfunc
318        # First, check that the current data is valid
319        if self.is_valid() <= 0:
320            msg = "Invertor.invert: Data array are of different length"
321            raise RuntimeError, msg
322
323        p = numpy.ones(nfunc)
324        t_0 = time.time()
325        out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
326
327        # Compute chi^2
328        res = self.residuals(out)
329        chisqr = 0
330        for i in range(len(res)):
331            chisqr += res[i]
332
333        self.chi2 = chisqr
334
335        # Store computation time
336        self.elapsed = time.time() - t_0
337
338        if cov_x is None:
339            cov_x = numpy.ones([nfunc, nfunc])
340            cov_x *= math.fabs(chisqr)
341        return out, cov_x
342
343    def pr_fit(self, nfunc=5):
344        """
345        This is a direct fit to a given P(r). It assumes that the y data
346        is set to some P(r) distribution that we are trying to reproduce
347        with a set of base functions.
348
349        This method is provided as a test.
350        """
351        # First, check that the current data is valid
352        if self.is_valid() <= 0:
353            msg = "Invertor.invert: Data arrays are of different length"
354            raise RuntimeError, msg
355
356        p = numpy.ones(nfunc)
357        t_0 = time.time()
358        out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
359
360        # Compute chi^2
361        res = self.pr_residuals(out)
362        chisqr = 0
363        for i in range(len(res)):
364            chisqr += res[i]
365
366        self.chisqr = chisqr
367
368        # Store computation time
369        self.elapsed = time.time() - t_0
370
371        return out, cov_x
372
373    def pr_err(self, c, c_cov, r):
374        """
375        Returns the value of P(r) for a given r, and base function
376        coefficients, with error.
377
378        :param c: base function coefficients
379        :param c_cov: covariance matrice of the base function coefficients
380        :param r: r-value to evaluate P(r) at
381
382        :return: P(r)
383
384        """
385        return self.get_pr_err(c, c_cov, r)
386
387    def _accept_q(self, q):
388        """
389        Check q-value against user-defined range
390        """
391        if not self.q_min == None and q < self.q_min:
392            return False
393        if not self.q_max == None and q > self.q_max:
394            return False
395        return True
396
397    def lstsq(self, nfunc=5, nr=20):
398        """
399        The problem is solved by posing the problem as  Ax = b,
400        where x is the set of coefficients we are looking for.
401
402        Npts is the number of points.
403
404        In the following i refers to the ith base function coefficient.
405        The matrix has its entries j in its first Npts rows set to ::
406
407            A[i][j] = (Fourier transformed base function for point j)
408
409        We them choose a number of r-points, n_r, to evaluate the second
410        derivative of P(r) at. This is used as our regularization term.
411        For a vector r of length n_r, the following n_r rows are set to ::
412
413            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
414            evaluated at r[j])
415
416        The vector b has its first Npts entries set to ::
417
418            b[j] = (I(q) observed for point j)
419
420        The following n_r entries are set to zero.
421
422        The result is found by using scipy.linalg.basic.lstsq to invert
423        the matrix and find the coefficients x.
424
425        :param nfunc: number of base functions to use.
426        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
427
428        If the result does not allow us to compute the covariance matrix,
429        a matrix filled with zeros will be returned.
430
431        """
432        # Note: To make sure an array is contiguous:
433        # blah = numpy.ascontiguousarray(blah_original)
434        # ... before passing it to C
435
436        if self.is_valid() < 0:
437            msg = "Invertor: invalid data; incompatible data lengths."
438            raise RuntimeError, msg
439
440        self.nfunc = nfunc
441        # a -- An M x N matrix.
442        # b -- An M x nrhs matrix or M vector.
443        npts = len(self.x)
444        nq = nr
445        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
446        if sqrt_alpha < 0.0:
447            nq = 0
448
449        # If we need to fit the background, add a term
450        if self.has_bck == True:
451            nfunc_0 = nfunc
452            nfunc += 1
453
454        a = numpy.zeros([npts + nq, nfunc])
455        b = numpy.zeros(npts + nq)
456        err = numpy.zeros([nfunc, nfunc])
457
458        # Construct the a matrix and b vector that represent the problem
459        t_0 = time.time()
460        try:
461            self._get_matrix(nfunc, nq, a, b)
462        except:
463            raise RuntimeError, "Invertor: could not invert I(Q)\n  %s" % sys.exc_value
464
465        # Perform the inversion (least square fit)
466        c, chi2, _, _ = lstsq(a, b)
467        # Sanity check
468        try:
469            float(chi2)
470        except:
471            chi2 = -1.0
472        self.chi2 = chi2
473
474        inv_cov = numpy.zeros([nfunc, nfunc])
475        # Get the covariance matrix, defined as inv_cov = a_transposed * a
476        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
477
478        # Compute the reg term size for the output
479        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
480
481        if math.fabs(self.alpha) > 0:
482            new_alpha = sum_sig / (sum_reg / self.alpha)
483        else:
484            new_alpha = 0.0
485        self.suggested_alpha = new_alpha
486
487        try:
488            cov = numpy.linalg.pinv(inv_cov)
489            err = math.fabs(chi2 / float(npts - nfunc)) * cov
490        except:
491            # We were not able to estimate the errors
492            # Return an empty error matrix
493            logging.error(sys.exc_value)
494
495        # Keep a copy of the last output
496        if self.has_bck == False:
497            self.background = 0
498            self.out = c
499            self.cov = err
500        else:
501            self.background = c[0]
502
503            err_0 = numpy.zeros([nfunc, nfunc])
504            c_0 = numpy.zeros(nfunc)
505
506            for i in range(nfunc_0):
507                c_0[i] = c[i + 1]
508                for j in range(nfunc_0):
509                    err_0[i][j] = err[i + 1][j + 1]
510
511            self.out = c_0
512            self.cov = err_0
513
514        # Store computation time
515        self.elapsed = time.time() - t_0
516
517        return self.out, self.cov
518
519    def estimate_numterms(self, isquit_func=None):
520        """
521        Returns a reasonable guess for the
522        number of terms
523
524        :param isquit_func:
525          reference to thread function to call to check whether the computation needs to
526          be stopped.
527
528        :return: number of terms, alpha, message
529
530        """
531        from num_term import NTermEstimator
532        estimator = NTermEstimator(self.clone())
533        try:
534            return estimator.num_terms(isquit_func)
535        except:
536            # If we fail, estimate alpha and return the default
537            # number of terms
538            best_alpha, _, _ = self.estimate_alpha(self.nfunc)
539            logging.warning("Invertor.estimate_numterms: %s" % sys.exc_value)
540            return self.nfunc, best_alpha, "Could not estimate number of terms"
541
542    def estimate_alpha(self, nfunc):
543        """
544        Returns a reasonable guess for the
545        regularization constant alpha
546
547        :param nfunc: number of terms to use in the expansion.
548
549        :return: alpha, message, elapsed
550
551        where alpha is the estimate for alpha,
552        message is a message for the user,
553        elapsed is the computation time
554        """
555        #import time
556        try:
557            pr = self.clone()
558
559            # T_0 for computation time
560            starttime = time.time()
561            elapsed = 0
562
563            # If the current alpha is zero, try
564            # another value
565            if pr.alpha <= 0:
566                pr.alpha = 0.0001
567
568            # Perform inversion to find the largest alpha
569            out, _ = pr.invert(nfunc)
570            elapsed = time.time() - starttime
571            initial_alpha = pr.alpha
572            initial_peaks = pr.get_peaks(out)
573
574            # Try the inversion with the estimated alpha
575            pr.alpha = pr.suggested_alpha
576            out, _ = pr.invert(nfunc)
577
578            npeaks = pr.get_peaks(out)
579            # if more than one peak to start with
580            # just return the estimate
581            if npeaks > 1:
582                #message = "Your P(r) is not smooth,
583                #please check your inversion parameters"
584                message = None
585                return pr.suggested_alpha, message, elapsed
586            else:
587
588                # Look at smaller values
589                # We assume that for the suggested alpha, we have 1 peak
590                # if not, send a message to change parameters
591                alpha = pr.suggested_alpha
592                best_alpha = pr.suggested_alpha
593                found = False
594                for i in range(10):
595                    pr.alpha = (0.33) ** (i + 1) * alpha
596                    out, _ = pr.invert(nfunc)
597
598                    peaks = pr.get_peaks(out)
599                    if peaks > 1:
600                        found = True
601                        break
602                    best_alpha = pr.alpha
603
604                # If we didn't find a turning point for alpha and
605                # the initial alpha already had only one peak,
606                # just return that
607                if not found and initial_peaks == 1 and \
608                    initial_alpha < best_alpha:
609                    best_alpha = initial_alpha
610
611                # Check whether the size makes sense
612                message = ''
613
614                if not found:
615                    message = None
616                elif best_alpha >= 0.5 * pr.suggested_alpha:
617                    # best alpha is too big, return a
618                    # reasonable value
619                    message = "The estimated alpha for your system is too "
620                    message += "large. "
621                    message += "Try increasing your maximum distance."
622
623                return best_alpha, message, elapsed
624
625        except:
626            message = "Invertor.estimate_alpha: %s" % sys.exc_value
627            return 0, message, elapsed
628
629    def to_file(self, path, npts=100):
630        """
631        Save the state to a file that will be readable
632        by SliceView.
633
634        :param path: path of the file to write
635        :param npts: number of P(r) points to be written
636
637        """
638        file = open(path, 'w')
639        file.write("#d_max=%g\n" % self.d_max)
640        file.write("#nfunc=%g\n" % self.nfunc)
641        file.write("#alpha=%g\n" % self.alpha)
642        file.write("#chi2=%g\n" % self.chi2)
643        file.write("#elapsed=%g\n" % self.elapsed)
644        file.write("#qmin=%s\n" % str(self.q_min))
645        file.write("#qmax=%s\n" % str(self.q_max))
646        file.write("#slit_height=%g\n" % self.slit_height)
647        file.write("#slit_width=%g\n" % self.slit_width)
648        file.write("#background=%g\n" % self.background)
649        if self.has_bck == True:
650            file.write("#has_bck=1\n")
651        else:
652            file.write("#has_bck=0\n")
653        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
654        if not self.out == None:
655            if len(self.out) == len(self.cov):
656                for i in range(len(self.out)):
657                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
658                                                   str(self.cov[i][i])))
659        file.write("<r>  <Pr>  <dPr>\n")
660        r = numpy.arange(0.0, self.d_max, self.d_max / npts)
661
662        for r_i in r:
663            (value, err) = self.pr_err(self.out, self.cov, r_i)
664            file.write("%g  %g  %g\n" % (r_i, value, err))
665
666        file.close()
667
668    def from_file(self, path):
669        """
670        Load the state of the Invertor from a file,
671        to be able to generate P(r) from a set of
672        parameters.
673
674        :param path: path of the file to load
675
676        """
677        #import os
678        #import re
679        if os.path.isfile(path):
680            try:
681                fd = open(path, 'r')
682
683                buff = fd.read()
684                lines = buff.split('\n')
685                for line in lines:
686                    if line.startswith('#d_max='):
687                        toks = line.split('=')
688                        self.d_max = float(toks[1])
689                    elif line.startswith('#nfunc='):
690                        toks = line.split('=')
691                        self.nfunc = int(toks[1])
692                        self.out = numpy.zeros(self.nfunc)
693                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
694                    elif line.startswith('#alpha='):
695                        toks = line.split('=')
696                        self.alpha = float(toks[1])
697                    elif line.startswith('#chi2='):
698                        toks = line.split('=')
699                        self.chi2 = float(toks[1])
700                    elif line.startswith('#elapsed='):
701                        toks = line.split('=')
702                        self.elapsed = float(toks[1])
703                    elif line.startswith('#alpha_estimate='):
704                        toks = line.split('=')
705                        self.suggested_alpha = float(toks[1])
706                    elif line.startswith('#qmin='):
707                        toks = line.split('=')
708                        try:
709                            self.q_min = float(toks[1])
710                        except:
711                            self.q_min = None
712                    elif line.startswith('#qmax='):
713                        toks = line.split('=')
714                        try:
715                            self.q_max = float(toks[1])
716                        except:
717                            self.q_max = None
718                    elif line.startswith('#slit_height='):
719                        toks = line.split('=')
720                        self.slit_height = float(toks[1])
721                    elif line.startswith('#slit_width='):
722                        toks = line.split('=')
723                        self.slit_width = float(toks[1])
724                    elif line.startswith('#background='):
725                        toks = line.split('=')
726                        self.background = float(toks[1])
727                    elif line.startswith('#has_bck='):
728                        toks = line.split('=')
729                        if int(toks[1]) == 1:
730                            self.has_bck = True
731                        else:
732                            self.has_bck = False
733
734                    # Now read in the parameters
735                    elif line.startswith('#C_'):
736                        toks = line.split('=')
737                        p = re.compile('#C_([0-9]+)')
738                        m = p.search(toks[0])
739                        toks2 = toks[1].split('+-')
740                        i = int(m.group(1))
741                        self.out[i] = float(toks2[0])
742
743                        self.cov[i][i] = float(toks2[1])
744
745            except:
746                msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
747                raise RuntimeError, msg
748        else:
749            msg = "Invertor.from_file: '%s' is not a file" % str(path)
750            raise RuntimeError, msg
Note: See TracBrowser for help on using the repository browser.