source: sasview/src/sas/sascalc/pr/invertor.py @ 9319cb0

magnetic_scattrelease-4.2.2ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249unittest-saveload
Last change on this file since 9319cb0 was 2469df7, checked in by Paul Kienzle <pkienzle@…>, 7 years ago

lint: update 'if x==True/False?' to 'if x/not x:'

  • Property mode set to 100644
File size: 25.3 KB
Line 
1# pylint: disable=invalid-name
2"""
3Module to perform P(r) inversion.
4The module contains the Invertor class.
5
6FIXME: The way the Invertor interacts with its C component should be cleaned up
7"""
8
9import numpy as np
10import sys
11import math
12import time
13import copy
14import os
15import re
16import logging
17from numpy.linalg import lstsq
18from scipy import optimize
19from sas.sascalc.pr.core.pr_inversion import Cinvertor
20
21logger = logging.getLogger(__name__)
22
23def help():
24    """
25    Provide general online help text
26    Future work: extend this function to allow topic selection
27    """
28    info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
29    info_txt += "(1980) 13, 168-175.\n\n"
30    info_txt += "P(r) is set to be equal to an expansion of base functions "
31    info_txt += "of the type "
32    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each "
33    info_txt += "base functions "
34    info_txt += "in the expansion is found by performing a least square fit "
35    info_txt += "with the "
36    info_txt += "following fit function:\n\n"
37    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 +"
38    info_txt += "Reg_term\n\n"
39    info_txt += "where I_meas(q) is the measured scattering intensity and "
40    info_txt += "I_th(q) is "
41    info_txt += "the prediction from the Fourier transform of the P(r) "
42    info_txt += "expansion. "
43    info_txt += "The Reg_term term is a regularization term set to the second"
44    info_txt += " derivative "
45    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce "
46    info_txt += "a smooth P(r) output.\n\n"
47    info_txt += "The following are user inputs:\n\n"
48    info_txt += "   - Number of terms: the number of base functions in the P(r)"
49    info_txt += " expansion.\n\n"
50    info_txt += "   - Regularization constant: a multiplicative constant "
51    info_txt += "to set the size of "
52    info_txt += "the regularization term.\n\n"
53    info_txt += "   - Maximum distance: the maximum distance between any "
54    info_txt += "two points in the system.\n"
55
56    return info_txt
57
58
59class Invertor(Cinvertor):
60    """
61    Invertor class to perform P(r) inversion
62
63    The problem is solved by posing the problem as  Ax = b,
64    where x is the set of coefficients we are looking for.
65
66    Npts is the number of points.
67
68    In the following i refers to the ith base function coefficient.
69    The matrix has its entries j in its first Npts rows set to ::
70
71        A[j][i] = (Fourier transformed base function for point j)
72
73    We them choose a number of r-points, n_r, to evaluate the second
74    derivative of P(r) at. This is used as our regularization term.
75    For a vector r of length n_r, the following n_r rows are set to ::
76
77        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
78        evaluated at r[j])
79
80    The vector b has its first Npts entries set to ::
81
82        b[j] = (I(q) observed for point j)
83
84    The following n_r entries are set to zero.
85
86    The result is found by using scipy.linalg.basic.lstsq to invert
87    the matrix and find the coefficients x.
88
89    Methods inherited from Cinvertor:
90
91    * ``get_peaks(pars)``: returns the number of P(r) peaks
92    * ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
93    * ``get_positive(pars)``: returns the fraction of P(r) that is above zero
94    * ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
95    """
96    ## Chisqr of the last computation
97    chi2 = 0
98    ## Time elapsed for last computation
99    elapsed = 0
100    ## Alpha to get the reg term the same size as the signal
101    suggested_alpha = 0
102    ## Last number of base functions used
103    nfunc = 10
104    ## Last output values
105    out = None
106    ## Last errors on output values
107    cov = None
108    ## Background value
109    background = 0
110    ## Information dictionary for application use
111    info = {}
112
113    def __init__(self):
114        Cinvertor.__init__(self)
115
116    def __setstate__(self, state):
117        """
118        restore the state of invertor for pickle
119        """
120        (self.__dict__, self.alpha, self.d_max,
121         self.q_min, self.q_max,
122         self.x, self.y,
123         self.err, self.est_bck,
124         self.slit_height, self.slit_width) = state
125
126    def __reduce_ex__(self, proto):
127        """
128        Overwrite the __reduce_ex__
129        """
130
131        state = (self.__dict__,
132                 self.alpha, self.d_max,
133                 self.q_min, self.q_max,
134                 self.x, self.y,
135                 self.err, self.est_bck,
136                 self.slit_height, self.slit_width,
137                )
138        return (Invertor, tuple(), state, None, None)
139
140    def __setattr__(self, name, value):
141        """
142        Set the value of an attribute.
143        Access the parent class methods for
144        x, y, err, d_max, q_min, q_max and alpha
145        """
146        if   name == 'x':
147            if 0.0 in value:
148                msg = "Invertor: one of your q-values is zero. "
149                msg += "Delete that entry before proceeding"
150                raise ValueError(msg)
151            return self.set_x(value)
152        elif name == 'y':
153            return self.set_y(value)
154        elif name == 'err':
155            value2 = abs(value)
156            return self.set_err(value2)
157        elif name == 'd_max':
158            if value <= 0.0:
159                msg = "Invertor: d_max must be greater than zero."
160                msg += "Correct that entry before proceeding"
161                raise ValueError(msg)
162            return self.set_dmax(value)
163        elif name == 'q_min':
164            if value is None:
165                return self.set_qmin(-1.0)
166            return self.set_qmin(value)
167        elif name == 'q_max':
168            if value is None:
169                return self.set_qmax(-1.0)
170            return self.set_qmax(value)
171        elif name == 'alpha':
172            return self.set_alpha(value)
173        elif name == 'slit_height':
174            return self.set_slit_height(value)
175        elif name == 'slit_width':
176            return self.set_slit_width(value)
177        elif name == 'est_bck':
178            if value == True:
179                return self.set_est_bck(1)
180            elif value == False:
181                return self.set_est_bck(0)
182            else:
183                raise ValueError("Invertor: est_bck can only be True or False")
184
185        return Cinvertor.__setattr__(self, name, value)
186
187    def __getattr__(self, name):
188        """
189        Return the value of an attribute
190        """
191        #import numpy
192        if name == 'x':
193            out = np.ones(self.get_nx())
194            self.get_x(out)
195            return out
196        elif name == 'y':
197            out = np.ones(self.get_ny())
198            self.get_y(out)
199            return out
200        elif name == 'err':
201            out = np.ones(self.get_nerr())
202            self.get_err(out)
203            return out
204        elif name == 'd_max':
205            return self.get_dmax()
206        elif name == 'q_min':
207            qmin = self.get_qmin()
208            if qmin < 0:
209                return None
210            return qmin
211        elif name == 'q_max':
212            qmax = self.get_qmax()
213            if qmax < 0:
214                return None
215            return qmax
216        elif name == 'alpha':
217            return self.get_alpha()
218        elif name == 'slit_height':
219            return self.get_slit_height()
220        elif name == 'slit_width':
221            return self.get_slit_width()
222        elif name == 'est_bck':
223            value = self.get_est_bck()
224            return value == 1
225        elif name in self.__dict__:
226            return self.__dict__[name]
227        return None
228
229    def clone(self):
230        """
231        Return a clone of this instance
232        """
233        #import copy
234
235        invertor = Invertor()
236        invertor.chi2 = self.chi2
237        invertor.elapsed = self.elapsed
238        invertor.nfunc = self.nfunc
239        invertor.alpha = self.alpha
240        invertor.d_max = self.d_max
241        invertor.q_min = self.q_min
242        invertor.q_max = self.q_max
243
244        invertor.x = self.x
245        invertor.y = self.y
246        invertor.err = self.err
247        invertor.est_bck = self.est_bck
248        invertor.background = self.background
249        invertor.slit_height = self.slit_height
250        invertor.slit_width = self.slit_width
251
252        invertor.info = copy.deepcopy(self.info)
253
254        return invertor
255
256    def invert(self, nfunc=10, nr=20):
257        """
258        Perform inversion to P(r)
259
260        The problem is solved by posing the problem as  Ax = b,
261        where x is the set of coefficients we are looking for.
262
263        Npts is the number of points.
264
265        In the following i refers to the ith base function coefficient.
266        The matrix has its entries j in its first Npts rows set to ::
267
268            A[i][j] = (Fourier transformed base function for point j)
269
270        We them choose a number of r-points, n_r, to evaluate the second
271        derivative of P(r) at. This is used as our regularization term.
272        For a vector r of length n_r, the following n_r rows are set to ::
273
274            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
275
276        The vector b has its first Npts entries set to ::
277
278            b[j] = (I(q) observed for point j)
279
280        The following n_r entries are set to zero.
281
282        The result is found by using scipy.linalg.basic.lstsq to invert
283        the matrix and find the coefficients x.
284
285        :param nfunc: number of base functions to use.
286        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
287        :return: c_out, c_cov - the coefficients with covariance matrix
288        """
289        # Reset the background value before proceeding
290        # self.background = 0.0
291        if not self.est_bck:
292            self.y -= self.background
293        out, cov = self.lstsq(nfunc, nr=nr)
294        if not self.est_bck:
295            self.y += self.background
296        return out, cov
297
298    def iq(self, out, q):
299        """
300        Function to call to evaluate the scattering intensity
301
302        :param args: c-parameters, and q
303        :return: I(q)
304
305        """
306        return Cinvertor.iq(self, out, q) + self.background
307
308    def invert_optimize(self, nfunc=10, nr=20):
309        """
310        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
311
312        This probably produce more reliable results, but is much slower.
313        The minimization function is set to
314        sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
315        where the reg_term is given by Svergun: it is the integral of
316        the square of the first derivative
317        of P(r), d(P(r))/dr, integrated over the full range of r.
318
319        :param nfunc: number of base functions to use.
320        :param nr: number of r points to evaluate the 2nd derivative at
321            for the reg. term.
322
323        :return: c_out, c_cov - the coefficients with covariance matrix
324
325        """
326        self.nfunc = nfunc
327        # First, check that the current data is valid
328        if self.is_valid() <= 0:
329            msg = "Invertor.invert: Data array are of different length"
330            raise RuntimeError(msg)
331
332        p = np.ones(nfunc)
333        t_0 = time.time()
334        out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
335
336        # Compute chi^2
337        res = self.residuals(out)
338        chisqr = 0
339        for i in range(len(res)):
340            chisqr += res[i]
341
342        self.chi2 = chisqr
343
344        # Store computation time
345        self.elapsed = time.time() - t_0
346
347        if cov_x is None:
348            cov_x = np.ones([nfunc, nfunc])
349            cov_x *= math.fabs(chisqr)
350        return out, cov_x
351
352    def pr_fit(self, nfunc=5):
353        """
354        This is a direct fit to a given P(r). It assumes that the y data
355        is set to some P(r) distribution that we are trying to reproduce
356        with a set of base functions.
357
358        This method is provided as a test.
359        """
360        # First, check that the current data is valid
361        if self.is_valid() <= 0:
362            msg = "Invertor.invert: Data arrays are of different length"
363            raise RuntimeError(msg)
364
365        p = np.ones(nfunc)
366        t_0 = time.time()
367        out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
368
369        # Compute chi^2
370        res = self.pr_residuals(out)
371        chisqr = 0
372        for i in range(len(res)):
373            chisqr += res[i]
374
375        self.chisqr = chisqr
376
377        # Store computation time
378        self.elapsed = time.time() - t_0
379
380        return out, cov_x
381
382    def pr_err(self, c, c_cov, r):
383        """
384        Returns the value of P(r) for a given r, and base function
385        coefficients, with error.
386
387        :param c: base function coefficients
388        :param c_cov: covariance matrice of the base function coefficients
389        :param r: r-value to evaluate P(r) at
390
391        :return: P(r)
392
393        """
394        return self.get_pr_err(c, c_cov, r)
395
396    def _accept_q(self, q):
397        """
398        Check q-value against user-defined range
399        """
400        if self.q_min is not None and q < self.q_min:
401            return False
402        if self.q_max is not None and q > self.q_max:
403            return False
404        return True
405
406    def lstsq(self, nfunc=5, nr=20):
407        """
408        The problem is solved by posing the problem as  Ax = b,
409        where x is the set of coefficients we are looking for.
410
411        Npts is the number of points.
412
413        In the following i refers to the ith base function coefficient.
414        The matrix has its entries j in its first Npts rows set to ::
415
416            A[i][j] = (Fourier transformed base function for point j)
417
418        We them choose a number of r-points, n_r, to evaluate the second
419        derivative of P(r) at. This is used as our regularization term.
420        For a vector r of length n_r, the following n_r rows are set to ::
421
422            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
423            evaluated at r[j])
424
425        The vector b has its first Npts entries set to ::
426
427            b[j] = (I(q) observed for point j)
428
429        The following n_r entries are set to zero.
430
431        The result is found by using scipy.linalg.basic.lstsq to invert
432        the matrix and find the coefficients x.
433
434        :param nfunc: number of base functions to use.
435        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
436
437        If the result does not allow us to compute the covariance matrix,
438        a matrix filled with zeros will be returned.
439
440        """
441        # Note: To make sure an array is contiguous:
442        # blah = np.ascontiguousarray(blah_original)
443        # ... before passing it to C
444
445        if self.is_valid() < 0:
446            msg = "Invertor: invalid data; incompatible data lengths."
447            raise RuntimeError(msg)
448
449        self.nfunc = nfunc
450        # a -- An M x N matrix.
451        # b -- An M x nrhs matrix or M vector.
452        npts = len(self.x)
453        nq = nr
454        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
455        if sqrt_alpha < 0.0:
456            nq = 0
457
458        # If we need to fit the background, add a term
459        if self.est_bck:
460            nfunc_0 = nfunc
461            nfunc += 1
462
463        a = np.zeros([npts + nq, nfunc])
464        b = np.zeros(npts + nq)
465        err = np.zeros([nfunc, nfunc])
466
467        # Construct the a matrix and b vector that represent the problem
468        t_0 = time.time()
469        try:
470            self._get_matrix(nfunc, nq, a, b)
471        except Exception as exc:
472            raise RuntimeError("Invertor: could not invert I(Q)\n  %s" % str(exc))
473
474        # Perform the inversion (least square fit)
475        c, chi2, _, _ = lstsq(a, b)
476        # Sanity check
477        try:
478            float(chi2)
479        except:
480            chi2 = -1.0
481        self.chi2 = chi2
482
483        inv_cov = np.zeros([nfunc, nfunc])
484        # Get the covariance matrix, defined as inv_cov = a_transposed * a
485        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
486
487        # Compute the reg term size for the output
488        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
489
490        if math.fabs(self.alpha) > 0:
491            new_alpha = sum_sig / (sum_reg / self.alpha)
492        else:
493            new_alpha = 0.0
494        self.suggested_alpha = new_alpha
495
496        try:
497            cov = np.linalg.pinv(inv_cov)
498            err = math.fabs(chi2 / float(npts - nfunc)) * cov
499        except:
500            # We were not able to estimate the errors
501            # Return an empty error matrix
502            logger.error(sys.exc_value)
503
504        # Keep a copy of the last output
505        if not self.est_bck:
506            self.out = c
507            self.cov = err
508        else:
509            self.background = c[0]
510
511            err_0 = np.zeros([nfunc, nfunc])
512            c_0 = np.zeros(nfunc)
513
514            for i in range(nfunc_0):
515                c_0[i] = c[i + 1]
516                for j in range(nfunc_0):
517                    err_0[i][j] = err[i + 1][j + 1]
518
519            self.out = c_0
520            self.cov = err_0
521
522        # Store computation time
523        self.elapsed = time.time() - t_0
524
525        return self.out, self.cov
526
527    def estimate_numterms(self, isquit_func=None):
528        """
529        Returns a reasonable guess for the
530        number of terms
531
532        :param isquit_func:
533          reference to thread function to call to check whether the computation needs to
534          be stopped.
535
536        :return: number of terms, alpha, message
537
538        """
539        from num_term import NTermEstimator
540        estimator = NTermEstimator(self.clone())
541        try:
542            return estimator.num_terms(isquit_func)
543        except:
544            # If we fail, estimate alpha and return the default
545            # number of terms
546            best_alpha, _, _ = self.estimate_alpha(self.nfunc)
547            logger.warning("Invertor.estimate_numterms: %s" % sys.exc_value)
548            return self.nfunc, best_alpha, "Could not estimate number of terms"
549
550    def estimate_alpha(self, nfunc):
551        """
552        Returns a reasonable guess for the
553        regularization constant alpha
554
555        :param nfunc: number of terms to use in the expansion.
556
557        :return: alpha, message, elapsed
558
559        where alpha is the estimate for alpha,
560        message is a message for the user,
561        elapsed is the computation time
562        """
563        #import time
564        try:
565            pr = self.clone()
566
567            # T_0 for computation time
568            starttime = time.time()
569            elapsed = 0
570
571            # If the current alpha is zero, try
572            # another value
573            if pr.alpha <= 0:
574                pr.alpha = 0.0001
575
576            # Perform inversion to find the largest alpha
577            out, _ = pr.invert(nfunc)
578            elapsed = time.time() - starttime
579            initial_alpha = pr.alpha
580            initial_peaks = pr.get_peaks(out)
581
582            # Try the inversion with the estimated alpha
583            pr.alpha = pr.suggested_alpha
584            out, _ = pr.invert(nfunc)
585
586            npeaks = pr.get_peaks(out)
587            # if more than one peak to start with
588            # just return the estimate
589            if npeaks > 1:
590                #message = "Your P(r) is not smooth,
591                #please check your inversion parameters"
592                message = None
593                return pr.suggested_alpha, message, elapsed
594            else:
595
596                # Look at smaller values
597                # We assume that for the suggested alpha, we have 1 peak
598                # if not, send a message to change parameters
599                alpha = pr.suggested_alpha
600                best_alpha = pr.suggested_alpha
601                found = False
602                for i in range(10):
603                    pr.alpha = (0.33) ** (i + 1) * alpha
604                    out, _ = pr.invert(nfunc)
605
606                    peaks = pr.get_peaks(out)
607                    if peaks > 1:
608                        found = True
609                        break
610                    best_alpha = pr.alpha
611
612                # If we didn't find a turning point for alpha and
613                # the initial alpha already had only one peak,
614                # just return that
615                if not found and initial_peaks == 1 and \
616                    initial_alpha < best_alpha:
617                    best_alpha = initial_alpha
618
619                # Check whether the size makes sense
620                message = ''
621
622                if not found:
623                    message = None
624                elif best_alpha >= 0.5 * pr.suggested_alpha:
625                    # best alpha is too big, return a
626                    # reasonable value
627                    message = "The estimated alpha for your system is too "
628                    message += "large. "
629                    message += "Try increasing your maximum distance."
630
631                return best_alpha, message, elapsed
632
633        except:
634            message = "Invertor.estimate_alpha: %s" % sys.exc_value
635            return 0, message, elapsed
636
637    def to_file(self, path, npts=100):
638        """
639        Save the state to a file that will be readable
640        by SliceView.
641
642        :param path: path of the file to write
643        :param npts: number of P(r) points to be written
644
645        """
646        file = open(path, 'w')
647        file.write("#d_max=%g\n" % self.d_max)
648        file.write("#nfunc=%g\n" % self.nfunc)
649        file.write("#alpha=%g\n" % self.alpha)
650        file.write("#chi2=%g\n" % self.chi2)
651        file.write("#elapsed=%g\n" % self.elapsed)
652        file.write("#qmin=%s\n" % str(self.q_min))
653        file.write("#qmax=%s\n" % str(self.q_max))
654        file.write("#slit_height=%g\n" % self.slit_height)
655        file.write("#slit_width=%g\n" % self.slit_width)
656        file.write("#background=%g\n" % self.background)
657        if self.est_bck:
658            file.write("#has_bck=1\n")
659        else:
660            file.write("#has_bck=0\n")
661        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
662        if self.out is not None:
663            if len(self.out) == len(self.cov):
664                for i in range(len(self.out)):
665                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
666                                                   str(self.cov[i][i])))
667        file.write("<r>  <Pr>  <dPr>\n")
668        r = np.arange(0.0, self.d_max, self.d_max / npts)
669
670        for r_i in r:
671            (value, err) = self.pr_err(self.out, self.cov, r_i)
672            file.write("%g  %g  %g\n" % (r_i, value, err))
673
674        file.close()
675
676    def from_file(self, path):
677        """
678        Load the state of the Invertor from a file,
679        to be able to generate P(r) from a set of
680        parameters.
681
682        :param path: path of the file to load
683
684        """
685        #import os
686        #import re
687        if os.path.isfile(path):
688            try:
689                fd = open(path, 'r')
690
691                buff = fd.read()
692                lines = buff.split('\n')
693                for line in lines:
694                    if line.startswith('#d_max='):
695                        toks = line.split('=')
696                        self.d_max = float(toks[1])
697                    elif line.startswith('#nfunc='):
698                        toks = line.split('=')
699                        self.nfunc = int(toks[1])
700                        self.out = np.zeros(self.nfunc)
701                        self.cov = np.zeros([self.nfunc, self.nfunc])
702                    elif line.startswith('#alpha='):
703                        toks = line.split('=')
704                        self.alpha = float(toks[1])
705                    elif line.startswith('#chi2='):
706                        toks = line.split('=')
707                        self.chi2 = float(toks[1])
708                    elif line.startswith('#elapsed='):
709                        toks = line.split('=')
710                        self.elapsed = float(toks[1])
711                    elif line.startswith('#alpha_estimate='):
712                        toks = line.split('=')
713                        self.suggested_alpha = float(toks[1])
714                    elif line.startswith('#qmin='):
715                        toks = line.split('=')
716                        try:
717                            self.q_min = float(toks[1])
718                        except:
719                            self.q_min = None
720                    elif line.startswith('#qmax='):
721                        toks = line.split('=')
722                        try:
723                            self.q_max = float(toks[1])
724                        except:
725                            self.q_max = None
726                    elif line.startswith('#slit_height='):
727                        toks = line.split('=')
728                        self.slit_height = float(toks[1])
729                    elif line.startswith('#slit_width='):
730                        toks = line.split('=')
731                        self.slit_width = float(toks[1])
732                    elif line.startswith('#background='):
733                        toks = line.split('=')
734                        self.background = float(toks[1])
735                    elif line.startswith('#has_bck='):
736                        toks = line.split('=')
737                        self.est_bck = int(toks[1]) == 1
738
739                    # Now read in the parameters
740                    elif line.startswith('#C_'):
741                        toks = line.split('=')
742                        p = re.compile('#C_([0-9]+)')
743                        m = p.search(toks[0])
744                        toks2 = toks[1].split('+-')
745                        i = int(m.group(1))
746                        self.out[i] = float(toks2[0])
747
748                        self.cov[i][i] = float(toks2[1])
749
750            except:
751                msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
752                raise RuntimeError(msg)
753        else:
754            msg = "Invertor.from_file: '%s' is not a file" % str(path)
755            raise RuntimeError(msg)
Note: See TracBrowser for help on using the repository browser.