source: sasview/src/sas/sascalc/pr/invertor.py @ 6b43c58

Last change on this file since 6b43c58 was d04ac05, checked in by Paul Kienzle <pkienzle@…>, 7 years ago

add python 3 support for C modules

  • Property mode set to 100644
File size: 25.5 KB
Line 
1# pylint: disable=invalid-name
2"""
3Module to perform P(r) inversion.
4The module contains the Invertor class.
5
6FIXME: The way the Invertor interacts with its C component should be cleaned up
7"""
8
9import numpy as np
10import sys
11import math
12import time
13import copy
14import os
15import re
16import logging
17from numpy.linalg import lstsq
18from scipy import optimize
19from sas.sascalc.pr.core.pr_inversion import Cinvertor
20
21logger = logging.getLogger(__name__)
22
23def help():
24    """
25    Provide general online help text
26    Future work: extend this function to allow topic selection
27    """
28    info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
29    info_txt += "(1980) 13, 168-175.\n\n"
30    info_txt += "P(r) is set to be equal to an expansion of base functions "
31    info_txt += "of the type "
32    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each "
33    info_txt += "base functions "
34    info_txt += "in the expansion is found by performing a least square fit "
35    info_txt += "with the "
36    info_txt += "following fit function:\n\n"
37    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 +"
38    info_txt += "Reg_term\n\n"
39    info_txt += "where I_meas(q) is the measured scattering intensity and "
40    info_txt += "I_th(q) is "
41    info_txt += "the prediction from the Fourier transform of the P(r) "
42    info_txt += "expansion. "
43    info_txt += "The Reg_term term is a regularization term set to the second"
44    info_txt += " derivative "
45    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce "
46    info_txt += "a smooth P(r) output.\n\n"
47    info_txt += "The following are user inputs:\n\n"
48    info_txt += "   - Number of terms: the number of base functions in the P(r)"
49    info_txt += " expansion.\n\n"
50    info_txt += "   - Regularization constant: a multiplicative constant "
51    info_txt += "to set the size of "
52    info_txt += "the regularization term.\n\n"
53    info_txt += "   - Maximum distance: the maximum distance between any "
54    info_txt += "two points in the system.\n"
55
56    return info_txt
57
58
59class Invertor(Cinvertor):
60    """
61    Invertor class to perform P(r) inversion
62
63    The problem is solved by posing the problem as  Ax = b,
64    where x is the set of coefficients we are looking for.
65
66    Npts is the number of points.
67
68    In the following i refers to the ith base function coefficient.
69    The matrix has its entries j in its first Npts rows set to ::
70
71        A[j][i] = (Fourier transformed base function for point j)
72
73    We them choose a number of r-points, n_r, to evaluate the second
74    derivative of P(r) at. This is used as our regularization term.
75    For a vector r of length n_r, the following n_r rows are set to ::
76
77        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
78        evaluated at r[j])
79
80    The vector b has its first Npts entries set to ::
81
82        b[j] = (I(q) observed for point j)
83
84    The following n_r entries are set to zero.
85
86    The result is found by using scipy.linalg.basic.lstsq to invert
87    the matrix and find the coefficients x.
88
89    Methods inherited from Cinvertor:
90
91    * ``get_peaks(pars)``: returns the number of P(r) peaks
92    * ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
93    * ``get_positive(pars)``: returns the fraction of P(r) that is above zero
94    * ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
95    """
96    ## Chisqr of the last computation
97    chi2 = 0
98    ## Time elapsed for last computation
99    elapsed = 0
100    ## Alpha to get the reg term the same size as the signal
101    suggested_alpha = 0
102    ## Last number of base functions used
103    nfunc = 10
104    ## Last output values
105    out = None
106    ## Last errors on output values
107    cov = None
108    ## Background value
109    background = 0
110    ## Information dictionary for application use
111    info = {}
112
113    def __init__(self):
114        Cinvertor.__init__(self)
115
116    def __setstate__(self, state):
117        """
118        restore the state of invertor for pickle
119        """
120        (self.__dict__, self.alpha, self.d_max,
121         self.q_min, self.q_max,
122         self.x, self.y,
123         self.err, self.est_bck,
124         self.slit_height, self.slit_width) = state
125
126    def __reduce_ex__(self, proto):
127        """
128        Overwrite the __reduce_ex__
129        """
130
131        state = (self.__dict__,
132                 self.alpha, self.d_max,
133                 self.q_min, self.q_max,
134                 self.x, self.y,
135                 self.err, self.est_bck,
136                 self.slit_height, self.slit_width,
137                )
138        return (Invertor, tuple(), state, None, None)
139
140    def __setattr__(self, name, value):
141        """
142        Set the value of an attribute.
143        Access the parent class methods for
144        x, y, err, d_max, q_min, q_max and alpha
145        """
146        if   name == 'x':
147            if 0.0 in value:
148                msg = "Invertor: one of your q-values is zero. "
149                msg += "Delete that entry before proceeding"
150                raise ValueError(msg)
151            return self.set_x(value)
152        elif name == 'y':
153            return self.set_y(value)
154        elif name == 'err':
155            value2 = abs(value)
156            return self.set_err(value2)
157        elif name == 'd_max':
158            if value <= 0.0:
159                msg = "Invertor: d_max must be greater than zero."
160                msg += "Correct that entry before proceeding"
161                raise ValueError(msg)
162            return self.set_dmax(value)
163        elif name == 'q_min':
164            if value is None:
165                return self.set_qmin(-1.0)
166            return self.set_qmin(value)
167        elif name == 'q_max':
168            if value is None:
169                return self.set_qmax(-1.0)
170            return self.set_qmax(value)
171        elif name == 'alpha':
172            return self.set_alpha(value)
173        elif name == 'slit_height':
174            return self.set_slit_height(value)
175        elif name == 'slit_width':
176            return self.set_slit_width(value)
177        elif name == 'est_bck':
178            if value == True:
179                return self.set_est_bck(1)
180            elif value == False:
181                return self.set_est_bck(0)
182            else:
183                raise ValueError("Invertor: est_bck can only be True or False")
184
185        return Cinvertor.__setattr__(self, name, value)
186
187    def __getattr__(self, name):
188        """
189        Return the value of an attribute
190        """
191        #import numpy
192        if name == 'x':
193            out = np.ones(self.get_nx())
194            self.get_x(out)
195            return out
196        elif name == 'y':
197            out = np.ones(self.get_ny())
198            self.get_y(out)
199            return out
200        elif name == 'err':
201            out = np.ones(self.get_nerr())
202            self.get_err(out)
203            return out
204        elif name == 'd_max':
205            return self.get_dmax()
206        elif name == 'q_min':
207            qmin = self.get_qmin()
208            if qmin < 0:
209                return None
210            return qmin
211        elif name == 'q_max':
212            qmax = self.get_qmax()
213            if qmax < 0:
214                return None
215            return qmax
216        elif name == 'alpha':
217            return self.get_alpha()
218        elif name == 'slit_height':
219            return self.get_slit_height()
220        elif name == 'slit_width':
221            return self.get_slit_width()
222        elif name == 'est_bck':
223            value = self.get_est_bck()
224            if value == 1:
225                return True
226            else:
227                return False
228        elif name in self.__dict__:
229            return self.__dict__[name]
230        return None
231
232    def clone(self):
233        """
234        Return a clone of this instance
235        """
236        #import copy
237
238        invertor = Invertor()
239        invertor.chi2 = self.chi2
240        invertor.elapsed = self.elapsed
241        invertor.nfunc = self.nfunc
242        invertor.alpha = self.alpha
243        invertor.d_max = self.d_max
244        invertor.q_min = self.q_min
245        invertor.q_max = self.q_max
246
247        invertor.x = self.x
248        invertor.y = self.y
249        invertor.err = self.err
250        invertor.est_bck = self.est_bck
251        invertor.background = self.background
252        invertor.slit_height = self.slit_height
253        invertor.slit_width = self.slit_width
254
255        invertor.info = copy.deepcopy(self.info)
256
257        return invertor
258
259    def invert(self, nfunc=10, nr=20):
260        """
261        Perform inversion to P(r)
262
263        The problem is solved by posing the problem as  Ax = b,
264        where x is the set of coefficients we are looking for.
265
266        Npts is the number of points.
267
268        In the following i refers to the ith base function coefficient.
269        The matrix has its entries j in its first Npts rows set to ::
270
271            A[i][j] = (Fourier transformed base function for point j)
272
273        We them choose a number of r-points, n_r, to evaluate the second
274        derivative of P(r) at. This is used as our regularization term.
275        For a vector r of length n_r, the following n_r rows are set to ::
276
277            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
278
279        The vector b has its first Npts entries set to ::
280
281            b[j] = (I(q) observed for point j)
282
283        The following n_r entries are set to zero.
284
285        The result is found by using scipy.linalg.basic.lstsq to invert
286        the matrix and find the coefficients x.
287
288        :param nfunc: number of base functions to use.
289        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
290        :return: c_out, c_cov - the coefficients with covariance matrix
291        """
292        # Reset the background value before proceeding
293        # self.background = 0.0
294        if not self.est_bck:
295            self.y -= self.background
296        out, cov = self.lstsq(nfunc, nr=nr)
297        if not self.est_bck:
298            self.y += self.background
299        return out, cov
300
301    def iq(self, out, q):
302        """
303        Function to call to evaluate the scattering intensity
304
305        :param args: c-parameters, and q
306        :return: I(q)
307
308        """
309        return Cinvertor.iq(self, out, q) + self.background
310
311    def invert_optimize(self, nfunc=10, nr=20):
312        """
313        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
314
315        This probably produce more reliable results, but is much slower.
316        The minimization function is set to
317        sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
318        where the reg_term is given by Svergun: it is the integral of
319        the square of the first derivative
320        of P(r), d(P(r))/dr, integrated over the full range of r.
321
322        :param nfunc: number of base functions to use.
323        :param nr: number of r points to evaluate the 2nd derivative at
324            for the reg. term.
325
326        :return: c_out, c_cov - the coefficients with covariance matrix
327
328        """
329        self.nfunc = nfunc
330        # First, check that the current data is valid
331        if self.is_valid() <= 0:
332            msg = "Invertor.invert: Data array are of different length"
333            raise RuntimeError(msg)
334
335        p = np.ones(nfunc)
336        t_0 = time.time()
337        out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
338
339        # Compute chi^2
340        res = self.residuals(out)
341        chisqr = 0
342        for i in range(len(res)):
343            chisqr += res[i]
344
345        self.chi2 = chisqr
346
347        # Store computation time
348        self.elapsed = time.time() - t_0
349
350        if cov_x is None:
351            cov_x = np.ones([nfunc, nfunc])
352            cov_x *= math.fabs(chisqr)
353        return out, cov_x
354
355    def pr_fit(self, nfunc=5):
356        """
357        This is a direct fit to a given P(r). It assumes that the y data
358        is set to some P(r) distribution that we are trying to reproduce
359        with a set of base functions.
360
361        This method is provided as a test.
362        """
363        # First, check that the current data is valid
364        if self.is_valid() <= 0:
365            msg = "Invertor.invert: Data arrays are of different length"
366            raise RuntimeError(msg)
367
368        p = np.ones(nfunc)
369        t_0 = time.time()
370        out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
371
372        # Compute chi^2
373        res = self.pr_residuals(out)
374        chisqr = 0
375        for i in range(len(res)):
376            chisqr += res[i]
377
378        self.chisqr = chisqr
379
380        # Store computation time
381        self.elapsed = time.time() - t_0
382
383        return out, cov_x
384
385    def pr_err(self, c, c_cov, r):
386        """
387        Returns the value of P(r) for a given r, and base function
388        coefficients, with error.
389
390        :param c: base function coefficients
391        :param c_cov: covariance matrice of the base function coefficients
392        :param r: r-value to evaluate P(r) at
393
394        :return: P(r)
395
396        """
397        return self.get_pr_err(c, c_cov, r)
398
399    def _accept_q(self, q):
400        """
401        Check q-value against user-defined range
402        """
403        if self.q_min is not None and q < self.q_min:
404            return False
405        if self.q_max is not None and q > self.q_max:
406            return False
407        return True
408
409    def lstsq(self, nfunc=5, nr=20):
410        """
411        The problem is solved by posing the problem as  Ax = b,
412        where x is the set of coefficients we are looking for.
413
414        Npts is the number of points.
415
416        In the following i refers to the ith base function coefficient.
417        The matrix has its entries j in its first Npts rows set to ::
418
419            A[i][j] = (Fourier transformed base function for point j)
420
421        We them choose a number of r-points, n_r, to evaluate the second
422        derivative of P(r) at. This is used as our regularization term.
423        For a vector r of length n_r, the following n_r rows are set to ::
424
425            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
426            evaluated at r[j])
427
428        The vector b has its first Npts entries set to ::
429
430            b[j] = (I(q) observed for point j)
431
432        The following n_r entries are set to zero.
433
434        The result is found by using scipy.linalg.basic.lstsq to invert
435        the matrix and find the coefficients x.
436
437        :param nfunc: number of base functions to use.
438        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
439
440        If the result does not allow us to compute the covariance matrix,
441        a matrix filled with zeros will be returned.
442
443        """
444        # Note: To make sure an array is contiguous:
445        # blah = np.ascontiguousarray(blah_original)
446        # ... before passing it to C
447
448        if self.is_valid() < 0:
449            msg = "Invertor: invalid data; incompatible data lengths."
450            raise RuntimeError(msg)
451
452        self.nfunc = nfunc
453        # a -- An M x N matrix.
454        # b -- An M x nrhs matrix or M vector.
455        npts = len(self.x)
456        nq = nr
457        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
458        if sqrt_alpha < 0.0:
459            nq = 0
460
461        # If we need to fit the background, add a term
462        if self.est_bck == True:
463            nfunc_0 = nfunc
464            nfunc += 1
465
466        a = np.zeros([npts + nq, nfunc])
467        b = np.zeros(npts + nq)
468        err = np.zeros([nfunc, nfunc])
469
470        # Construct the a matrix and b vector that represent the problem
471        t_0 = time.time()
472        try:
473            self._get_matrix(nfunc, nq, a, b)
474        except Exception as exc:
475            raise RuntimeError("Invertor: could not invert I(Q)\n  %s" % str(exc))
476
477        # Perform the inversion (least square fit)
478        c, chi2, _, _ = lstsq(a, b)
479        # Sanity check
480        try:
481            float(chi2)
482        except:
483            chi2 = -1.0
484        self.chi2 = chi2
485
486        inv_cov = np.zeros([nfunc, nfunc])
487        # Get the covariance matrix, defined as inv_cov = a_transposed * a
488        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
489
490        # Compute the reg term size for the output
491        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
492
493        if math.fabs(self.alpha) > 0:
494            new_alpha = sum_sig / (sum_reg / self.alpha)
495        else:
496            new_alpha = 0.0
497        self.suggested_alpha = new_alpha
498
499        try:
500            cov = np.linalg.pinv(inv_cov)
501            err = math.fabs(chi2 / float(npts - nfunc)) * cov
502        except:
503            # We were not able to estimate the errors
504            # Return an empty error matrix
505            logger.error(sys.exc_value)
506
507        # Keep a copy of the last output
508        if self.est_bck == False:
509            self.out = c
510            self.cov = err
511        else:
512            self.background = c[0]
513
514            err_0 = np.zeros([nfunc, nfunc])
515            c_0 = np.zeros(nfunc)
516
517            for i in range(nfunc_0):
518                c_0[i] = c[i + 1]
519                for j in range(nfunc_0):
520                    err_0[i][j] = err[i + 1][j + 1]
521
522            self.out = c_0
523            self.cov = err_0
524
525        # Store computation time
526        self.elapsed = time.time() - t_0
527
528        return self.out, self.cov
529
530    def estimate_numterms(self, isquit_func=None):
531        """
532        Returns a reasonable guess for the
533        number of terms
534
535        :param isquit_func:
536          reference to thread function to call to check whether the computation needs to
537          be stopped.
538
539        :return: number of terms, alpha, message
540
541        """
542        from num_term import NTermEstimator
543        estimator = NTermEstimator(self.clone())
544        try:
545            return estimator.num_terms(isquit_func)
546        except:
547            # If we fail, estimate alpha and return the default
548            # number of terms
549            best_alpha, _, _ = self.estimate_alpha(self.nfunc)
550            logger.warning("Invertor.estimate_numterms: %s" % sys.exc_value)
551            return self.nfunc, best_alpha, "Could not estimate number of terms"
552
553    def estimate_alpha(self, nfunc):
554        """
555        Returns a reasonable guess for the
556        regularization constant alpha
557
558        :param nfunc: number of terms to use in the expansion.
559
560        :return: alpha, message, elapsed
561
562        where alpha is the estimate for alpha,
563        message is a message for the user,
564        elapsed is the computation time
565        """
566        #import time
567        try:
568            pr = self.clone()
569
570            # T_0 for computation time
571            starttime = time.time()
572            elapsed = 0
573
574            # If the current alpha is zero, try
575            # another value
576            if pr.alpha <= 0:
577                pr.alpha = 0.0001
578
579            # Perform inversion to find the largest alpha
580            out, _ = pr.invert(nfunc)
581            elapsed = time.time() - starttime
582            initial_alpha = pr.alpha
583            initial_peaks = pr.get_peaks(out)
584
585            # Try the inversion with the estimated alpha
586            pr.alpha = pr.suggested_alpha
587            out, _ = pr.invert(nfunc)
588
589            npeaks = pr.get_peaks(out)
590            # if more than one peak to start with
591            # just return the estimate
592            if npeaks > 1:
593                #message = "Your P(r) is not smooth,
594                #please check your inversion parameters"
595                message = None
596                return pr.suggested_alpha, message, elapsed
597            else:
598
599                # Look at smaller values
600                # We assume that for the suggested alpha, we have 1 peak
601                # if not, send a message to change parameters
602                alpha = pr.suggested_alpha
603                best_alpha = pr.suggested_alpha
604                found = False
605                for i in range(10):
606                    pr.alpha = (0.33) ** (i + 1) * alpha
607                    out, _ = pr.invert(nfunc)
608
609                    peaks = pr.get_peaks(out)
610                    if peaks > 1:
611                        found = True
612                        break
613                    best_alpha = pr.alpha
614
615                # If we didn't find a turning point for alpha and
616                # the initial alpha already had only one peak,
617                # just return that
618                if not found and initial_peaks == 1 and \
619                    initial_alpha < best_alpha:
620                    best_alpha = initial_alpha
621
622                # Check whether the size makes sense
623                message = ''
624
625                if not found:
626                    message = None
627                elif best_alpha >= 0.5 * pr.suggested_alpha:
628                    # best alpha is too big, return a
629                    # reasonable value
630                    message = "The estimated alpha for your system is too "
631                    message += "large. "
632                    message += "Try increasing your maximum distance."
633
634                return best_alpha, message, elapsed
635
636        except:
637            message = "Invertor.estimate_alpha: %s" % sys.exc_value
638            return 0, message, elapsed
639
640    def to_file(self, path, npts=100):
641        """
642        Save the state to a file that will be readable
643        by SliceView.
644
645        :param path: path of the file to write
646        :param npts: number of P(r) points to be written
647
648        """
649        file = open(path, 'w')
650        file.write("#d_max=%g\n" % self.d_max)
651        file.write("#nfunc=%g\n" % self.nfunc)
652        file.write("#alpha=%g\n" % self.alpha)
653        file.write("#chi2=%g\n" % self.chi2)
654        file.write("#elapsed=%g\n" % self.elapsed)
655        file.write("#qmin=%s\n" % str(self.q_min))
656        file.write("#qmax=%s\n" % str(self.q_max))
657        file.write("#slit_height=%g\n" % self.slit_height)
658        file.write("#slit_width=%g\n" % self.slit_width)
659        file.write("#background=%g\n" % self.background)
660        if self.est_bck == True:
661            file.write("#has_bck=1\n")
662        else:
663            file.write("#has_bck=0\n")
664        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
665        if self.out is not None:
666            if len(self.out) == len(self.cov):
667                for i in range(len(self.out)):
668                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
669                                                   str(self.cov[i][i])))
670        file.write("<r>  <Pr>  <dPr>\n")
671        r = np.arange(0.0, self.d_max, self.d_max / npts)
672
673        for r_i in r:
674            (value, err) = self.pr_err(self.out, self.cov, r_i)
675            file.write("%g  %g  %g\n" % (r_i, value, err))
676
677        file.close()
678
679    def from_file(self, path):
680        """
681        Load the state of the Invertor from a file,
682        to be able to generate P(r) from a set of
683        parameters.
684
685        :param path: path of the file to load
686
687        """
688        #import os
689        #import re
690        if os.path.isfile(path):
691            try:
692                fd = open(path, 'r')
693
694                buff = fd.read()
695                lines = buff.split('\n')
696                for line in lines:
697                    if line.startswith('#d_max='):
698                        toks = line.split('=')
699                        self.d_max = float(toks[1])
700                    elif line.startswith('#nfunc='):
701                        toks = line.split('=')
702                        self.nfunc = int(toks[1])
703                        self.out = np.zeros(self.nfunc)
704                        self.cov = np.zeros([self.nfunc, self.nfunc])
705                    elif line.startswith('#alpha='):
706                        toks = line.split('=')
707                        self.alpha = float(toks[1])
708                    elif line.startswith('#chi2='):
709                        toks = line.split('=')
710                        self.chi2 = float(toks[1])
711                    elif line.startswith('#elapsed='):
712                        toks = line.split('=')
713                        self.elapsed = float(toks[1])
714                    elif line.startswith('#alpha_estimate='):
715                        toks = line.split('=')
716                        self.suggested_alpha = float(toks[1])
717                    elif line.startswith('#qmin='):
718                        toks = line.split('=')
719                        try:
720                            self.q_min = float(toks[1])
721                        except:
722                            self.q_min = None
723                    elif line.startswith('#qmax='):
724                        toks = line.split('=')
725                        try:
726                            self.q_max = float(toks[1])
727                        except:
728                            self.q_max = None
729                    elif line.startswith('#slit_height='):
730                        toks = line.split('=')
731                        self.slit_height = float(toks[1])
732                    elif line.startswith('#slit_width='):
733                        toks = line.split('=')
734                        self.slit_width = float(toks[1])
735                    elif line.startswith('#background='):
736                        toks = line.split('=')
737                        self.background = float(toks[1])
738                    elif line.startswith('#has_bck='):
739                        toks = line.split('=')
740                        if int(toks[1]) == 1:
741                            self.est_bck = True
742                        else:
743                            self.est_bck = False
744
745                    # Now read in the parameters
746                    elif line.startswith('#C_'):
747                        toks = line.split('=')
748                        p = re.compile('#C_([0-9]+)')
749                        m = p.search(toks[0])
750                        toks2 = toks[1].split('+-')
751                        i = int(m.group(1))
752                        self.out[i] = float(toks2[0])
753
754                        self.cov[i][i] = float(toks2[1])
755
756            except:
757                msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
758                raise RuntimeError(msg)
759        else:
760            msg = "Invertor.from_file: '%s' is not a file" % str(path)
761            raise RuntimeError(msg)
Note: See TracBrowser for help on using the repository browser.