source: sasview/src/sas/sascalc/pr/invertor.py @ 0e0c645

ticket-1243
Last change on this file since 0e0c645 was 57e48ca, checked in by Paul Kienzle <pkienzle@…>, 6 years ago

fix rcond requires float error in pr

  • Property mode set to 100644
File size: 25.4 KB
Line 
1# pylint: disable=invalid-name
2"""
3Module to perform P(r) inversion.
4The module contains the Invertor class.
5
6FIXME: The way the Invertor interacts with its C component should be cleaned up
7"""
8from __future__ import division
9
10import numpy as np
11import sys
12import math
13import time
14import copy
15import os
16import re
17import logging
18from numpy.linalg import lstsq
19from scipy import optimize
20from sas.sascalc.pr._pr_inversion import Cinvertor
21
22logger = logging.getLogger(__name__)
23
24def help():
25    """
26    Provide general online help text
27    Future work: extend this function to allow topic selection
28    """
29    info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
30    info_txt += "(1980) 13, 168-175.\n\n"
31    info_txt += "P(r) is set to be equal to an expansion of base functions "
32    info_txt += "of the type "
33    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each "
34    info_txt += "base functions "
35    info_txt += "in the expansion is found by performing a least square fit "
36    info_txt += "with the "
37    info_txt += "following fit function:\n\n"
38    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 +"
39    info_txt += "Reg_term\n\n"
40    info_txt += "where I_meas(q) is the measured scattering intensity and "
41    info_txt += "I_th(q) is "
42    info_txt += "the prediction from the Fourier transform of the P(r) "
43    info_txt += "expansion. "
44    info_txt += "The Reg_term term is a regularization term set to the second"
45    info_txt += " derivative "
46    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce "
47    info_txt += "a smooth P(r) output.\n\n"
48    info_txt += "The following are user inputs:\n\n"
49    info_txt += "   - Number of terms: the number of base functions in the P(r)"
50    info_txt += " expansion.\n\n"
51    info_txt += "   - Regularization constant: a multiplicative constant "
52    info_txt += "to set the size of "
53    info_txt += "the regularization term.\n\n"
54    info_txt += "   - Maximum distance: the maximum distance between any "
55    info_txt += "two points in the system.\n"
56
57    return info_txt
58
59
60class Invertor(Cinvertor):
61    """
62    Invertor class to perform P(r) inversion
63
64    The problem is solved by posing the problem as  Ax = b,
65    where x is the set of coefficients we are looking for.
66
67    Npts is the number of points.
68
69    In the following i refers to the ith base function coefficient.
70    The matrix has its entries j in its first Npts rows set to ::
71
72        A[j][i] = (Fourier transformed base function for point j)
73
74    We then choose a number of r-points, n_r, to evaluate the second
75    derivative of P(r) at. This is used as our regularization term.
76    For a vector r of length n_r, the following n_r rows are set to ::
77
78        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
79        evaluated at r[j])
80
81    The vector b has its first Npts entries set to ::
82
83        b[j] = (I(q) observed for point j)
84
85    The following n_r entries are set to zero.
86
87    The result is found by using scipy.linalg.basic.lstsq to invert
88    the matrix and find the coefficients x.
89
90    Methods inherited from Cinvertor:
91
92    * ``get_peaks(pars)``: returns the number of P(r) peaks
93    * ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
94    * ``get_positive(pars)``: returns the fraction of P(r) that is above zero
95    * ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
96    """
97    ## Chisqr of the last computation
98    chi2 = 0
99    ## Time elapsed for last computation
100    elapsed = 0
101    ## Alpha to get the reg term the same size as the signal
102    suggested_alpha = 0
103    ## Last number of base functions used
104    nfunc = 10
105    ## Last output values
106    out = None
107    ## Last errors on output values
108    cov = None
109    ## Background value
110    background = 0
111    ## Information dictionary for application use
112    info = {}
113
114    def __init__(self):
115        Cinvertor.__init__(self)
116
117    def __setstate__(self, state):
118        """
119        restore the state of invertor for pickle
120        """
121        (self.__dict__, self.alpha, self.d_max,
122         self.q_min, self.q_max,
123         self.x, self.y,
124         self.err, self.est_bck,
125         self.slit_height, self.slit_width) = state
126
127    def __reduce_ex__(self, proto):
128        """
129        Overwrite the __reduce_ex__
130        """
131
132        state = (self.__dict__,
133                 self.alpha, self.d_max,
134                 self.q_min, self.q_max,
135                 self.x, self.y,
136                 self.err, self.est_bck,
137                 self.slit_height, self.slit_width,
138                )
139        return (Invertor, tuple(), state, None, None)
140
141    def __setattr__(self, name, value):
142        """
143        Set the value of an attribute.
144        Access the parent class methods for
145        x, y, err, d_max, q_min, q_max and alpha
146        """
147        if name == 'x':
148            if 0.0 in value:
149                msg = "Invertor: one of your q-values is zero. "
150                msg += "Delete that entry before proceeding"
151                raise ValueError(msg)
152            return self.set_x(value)
153        elif name == 'y':
154            return self.set_y(value)
155        elif name == 'err':
156            value2 = abs(value)
157            return self.set_err(value2)
158        elif name == 'd_max':
159            if value <= 0.0:
160                msg = "Invertor: d_max must be greater than zero."
161                msg += "Correct that entry before proceeding"
162                raise ValueError(msg)
163            return self.set_dmax(value)
164        elif name == 'q_min':
165            if value is None:
166                return self.set_qmin(-1.0)
167            return self.set_qmin(value)
168        elif name == 'q_max':
169            if value is None:
170                return self.set_qmax(-1.0)
171            return self.set_qmax(value)
172        elif name == 'alpha':
173            return self.set_alpha(value)
174        elif name == 'slit_height':
175            return self.set_slit_height(value)
176        elif name == 'slit_width':
177            return self.set_slit_width(value)
178        elif name == 'est_bck':
179            if value == True:
180                return self.set_est_bck(1)
181            elif value == False:
182                return self.set_est_bck(0)
183            else:
184                raise ValueError("Invertor: est_bck can only be True or False")
185
186        return Cinvertor.__setattr__(self, name, value)
187
188    def __getattr__(self, name):
189        """
190        Return the value of an attribute
191        """
192        #import numpy
193        if name == 'x':
194            out = np.ones(self.get_nx())
195            self.get_x(out)
196            return out
197        elif name == 'y':
198            out = np.ones(self.get_ny())
199            self.get_y(out)
200            return out
201        elif name == 'err':
202            out = np.ones(self.get_nerr())
203            self.get_err(out)
204            return out
205        elif name == 'd_max':
206            return self.get_dmax()
207        elif name == 'q_min':
208            qmin = self.get_qmin()
209            if qmin < 0:
210                return None
211            return qmin
212        elif name == 'q_max':
213            qmax = self.get_qmax()
214            if qmax < 0:
215                return None
216            return qmax
217        elif name == 'alpha':
218            return self.get_alpha()
219        elif name == 'slit_height':
220            return self.get_slit_height()
221        elif name == 'slit_width':
222            return self.get_slit_width()
223        elif name == 'est_bck':
224            value = self.get_est_bck()
225            return value == 1
226        elif name in self.__dict__:
227            return self.__dict__[name]
228        return None
229
230    def clone(self):
231        """
232        Return a clone of this instance
233        """
234        #import copy
235
236        invertor = Invertor()
237        invertor.chi2 = self.chi2
238        invertor.elapsed = self.elapsed
239        invertor.nfunc = self.nfunc
240        invertor.alpha = self.alpha
241        invertor.d_max = self.d_max
242        invertor.q_min = self.q_min
243        invertor.q_max = self.q_max
244
245        invertor.x = self.x
246        invertor.y = self.y
247        invertor.err = self.err
248        invertor.est_bck = self.est_bck
249        invertor.background = self.background
250        invertor.slit_height = self.slit_height
251        invertor.slit_width = self.slit_width
252
253        invertor.info = copy.deepcopy(self.info)
254
255        return invertor
256
257    def invert(self, nfunc=10, nr=20):
258        """
259        Perform inversion to P(r)
260
261        The problem is solved by posing the problem as  Ax = b,
262        where x is the set of coefficients we are looking for.
263
264        Npts is the number of points.
265
266        In the following i refers to the ith base function coefficient.
267        The matrix has its entries j in its first Npts rows set to ::
268
269            A[i][j] = (Fourier transformed base function for point j)
270
271        We then choose a number of r-points, n_r, to evaluate the second
272        derivative of P(r) at. This is used as our regularization term.
273        For a vector r of length n_r, the following n_r rows are set to ::
274
275            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
276
277        The vector b has its first Npts entries set to ::
278
279            b[j] = (I(q) observed for point j)
280
281        The following n_r entries are set to zero.
282
283        The result is found by using scipy.linalg.basic.lstsq to invert
284        the matrix and find the coefficients x.
285
286        :param nfunc: number of base functions to use.
287        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
288        :return: c_out, c_cov - the coefficients with covariance matrix
289        """
290        # Reset the background value before proceeding
291        # self.background = 0.0
292        if not self.est_bck:
293            self.y -= self.background
294        out, cov = self.lstsq(nfunc, nr=nr)
295        if not self.est_bck:
296            self.y += self.background
297        return out, cov
298
299    def iq(self, out, q):
300        """
301        Function to call to evaluate the scattering intensity
302
303        :param args: c-parameters, and q
304        :return: I(q)
305
306        """
307        return Cinvertor.iq(self, out, q) + self.background
308
309    def invert_optimize(self, nfunc=10, nr=20):
310        """
311        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
312
313        This probably produce more reliable results, but is much slower.
314        The minimization function is set to
315        sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
316        where the reg_term is given by Svergun: it is the integral of
317        the square of the first derivative
318        of P(r), d(P(r))/dr, integrated over the full range of r.
319
320        :param nfunc: number of base functions to use.
321        :param nr: number of r points to evaluate the 2nd derivative at
322            for the reg. term.
323
324        :return: c_out, c_cov - the coefficients with covariance matrix
325
326        """
327        self.nfunc = nfunc
328        # First, check that the current data is valid
329        if self.is_valid() <= 0:
330            msg = "Invertor.invert: Data array are of different length"
331            raise RuntimeError(msg)
332
333        p = np.ones(nfunc)
334        t_0 = time.time()
335        out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
336
337        # Compute chi^2
338        res = self.residuals(out)
339        chisqr = 0
340        for i in range(len(res)):
341            chisqr += res[i]
342
343        self.chi2 = chisqr
344
345        # Store computation time
346        self.elapsed = time.time() - t_0
347
348        if cov_x is None:
349            cov_x = np.ones([nfunc, nfunc])
350            cov_x *= math.fabs(chisqr)
351        return out, cov_x
352
353    def pr_fit(self, nfunc=5):
354        """
355        This is a direct fit to a given P(r). It assumes that the y data
356        is set to some P(r) distribution that we are trying to reproduce
357        with a set of base functions.
358
359        This method is provided as a test.
360        """
361        # First, check that the current data is valid
362        if self.is_valid() <= 0:
363            msg = "Invertor.invert: Data arrays are of different length"
364            raise RuntimeError(msg)
365
366        p = np.ones(nfunc)
367        t_0 = time.time()
368        out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
369
370        # Compute chi^2
371        res = self.pr_residuals(out)
372        chisqr = 0
373        for i in range(len(res)):
374            chisqr += res[i]
375
376        self.chisqr = chisqr
377
378        # Store computation time
379        self.elapsed = time.time() - t_0
380
381        return out, cov_x
382
383    def pr_err(self, c, c_cov, r):
384        """
385        Returns the value of P(r) for a given r, and base function
386        coefficients, with error.
387
388        :param c: base function coefficients
389        :param c_cov: covariance matrice of the base function coefficients
390        :param r: r-value to evaluate P(r) at
391
392        :return: P(r)
393
394        """
395        return self.get_pr_err(c, c_cov, r)
396
397    def _accept_q(self, q):
398        """
399        Check q-value against user-defined range
400        """
401        if self.q_min is not None and q < self.q_min:
402            return False
403        if self.q_max is not None and q > self.q_max:
404            return False
405        return True
406
407    def lstsq(self, nfunc=5, nr=20):
408        """
409        The problem is solved by posing the problem as  Ax = b,
410        where x is the set of coefficients we are looking for.
411
412        Npts is the number of points.
413
414        In the following i refers to the ith base function coefficient.
415        The matrix has its entries j in its first Npts rows set to ::
416
417            A[i][j] = (Fourier transformed base function for point j)
418
419        We then choose a number of r-points, n_r, to evaluate the second
420        derivative of P(r) at. This is used as our regularization term.
421        For a vector r of length n_r, the following n_r rows are set to ::
422
423            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
424            evaluated at r[j])
425
426        The vector b has its first Npts entries set to ::
427
428            b[j] = (I(q) observed for point j)
429
430        The following n_r entries are set to zero.
431
432        The result is found by using scipy.linalg.basic.lstsq to invert
433        the matrix and find the coefficients x.
434
435        :param nfunc: number of base functions to use.
436        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
437
438        If the result does not allow us to compute the covariance matrix,
439        a matrix filled with zeros will be returned.
440
441        """
442        # Note: To make sure an array is contiguous:
443        # blah = np.ascontiguousarray(blah_original)
444        # ... before passing it to C
445
446        if self.is_valid() < 0:
447            msg = "Invertor: invalid data; incompatible data lengths."
448            raise RuntimeError(msg)
449
450        self.nfunc = nfunc
451        # a -- An M x N matrix.
452        # b -- An M x nrhs matrix or M vector.
453        npts = len(self.x)
454        nq = nr
455        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
456        if sqrt_alpha < 0.0:
457            nq = 0
458
459        # If we need to fit the background, add a term
460        if self.est_bck:
461            nfunc_0 = nfunc
462            nfunc += 1
463
464        a = np.zeros([npts + nq, nfunc])
465        b = np.zeros(npts + nq)
466        err = np.zeros([nfunc, nfunc])
467
468        # Construct the a matrix and b vector that represent the problem
469        t_0 = time.time()
470        try:
471            self._get_matrix(nfunc, nq, a, b)
472        except Exception as exc:
473            raise RuntimeError("Invertor: could not invert I(Q)\n  %s" % str(exc))
474
475        # Perform the inversion (least square fit)
476        c, chi2, _, _ = lstsq(a, b, rcond=-1)
477        # Sanity check
478        try:
479            float(chi2)
480        except:
481            chi2 = -1.0
482        self.chi2 = chi2
483
484        inv_cov = np.zeros([nfunc, nfunc])
485        # Get the covariance matrix, defined as inv_cov = a_transposed * a
486        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
487
488        # Compute the reg term size for the output
489        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
490
491        if math.fabs(self.alpha) > 0:
492            new_alpha = sum_sig / (sum_reg / self.alpha)
493        else:
494            new_alpha = 0.0
495        self.suggested_alpha = new_alpha
496
497        try:
498            cov = np.linalg.pinv(inv_cov)
499            err = math.fabs(chi2 / (npts - nfunc)) * cov
500        except Exception as exc:
501            # We were not able to estimate the errors
502            # Return an empty error matrix
503            logger.error(exc)
504
505        # Keep a copy of the last output
506        if not self.est_bck:
507            self.out = c
508            self.cov = err
509        else:
510            self.background = c[0]
511
512            err_0 = np.zeros([nfunc, nfunc])
513            c_0 = np.zeros(nfunc)
514
515            for i in range(nfunc_0):
516                c_0[i] = c[i + 1]
517                for j in range(nfunc_0):
518                    err_0[i][j] = err[i + 1][j + 1]
519
520            self.out = c_0
521            self.cov = err_0
522
523        # Store computation time
524        self.elapsed = time.time() - t_0
525
526        return self.out, self.cov
527
528    def estimate_numterms(self, isquit_func=None):
529        """
530        Returns a reasonable guess for the
531        number of terms
532
533        :param isquit_func:
534          reference to thread function to call to check whether the computation needs to
535          be stopped.
536
537        :return: number of terms, alpha, message
538
539        """
540        from .num_term import NTermEstimator
541        estimator = NTermEstimator(self.clone())
542        try:
543            return estimator.num_terms(isquit_func)
544        except Exception as exc:
545            # If we fail, estimate alpha and return the default
546            # number of terms
547            best_alpha, _, _ = self.estimate_alpha(self.nfunc)
548            logger.warning("Invertor.estimate_numterms: %s" % exc)
549            return self.nfunc, best_alpha, "Could not estimate number of terms"
550
551    def estimate_alpha(self, nfunc):
552        """
553        Returns a reasonable guess for the
554        regularization constant alpha
555
556        :param nfunc: number of terms to use in the expansion.
557
558        :return: alpha, message, elapsed
559
560        where alpha is the estimate for alpha,
561        message is a message for the user,
562        elapsed is the computation time
563        """
564        #import time
565        try:
566            pr = self.clone()
567
568            # T_0 for computation time
569            starttime = time.time()
570            elapsed = 0
571
572            # If the current alpha is zero, try
573            # another value
574            if pr.alpha <= 0:
575                pr.alpha = 0.0001
576
577            # Perform inversion to find the largest alpha
578            out, _ = pr.invert(nfunc)
579            elapsed = time.time() - starttime
580            initial_alpha = pr.alpha
581            initial_peaks = pr.get_peaks(out)
582
583            # Try the inversion with the estimated alpha
584            pr.alpha = pr.suggested_alpha
585            out, _ = pr.invert(nfunc)
586
587            npeaks = pr.get_peaks(out)
588            # if more than one peak to start with
589            # just return the estimate
590            if npeaks > 1:
591                #message = "Your P(r) is not smooth,
592                #please check your inversion parameters"
593                message = None
594                return pr.suggested_alpha, message, elapsed
595            else:
596
597                # Look at smaller values
598                # We assume that for the suggested alpha, we have 1 peak
599                # if not, send a message to change parameters
600                alpha = pr.suggested_alpha
601                best_alpha = pr.suggested_alpha
602                found = False
603                for i in range(10):
604                    pr.alpha = (0.33) ** (i + 1) * alpha
605                    out, _ = pr.invert(nfunc)
606
607                    peaks = pr.get_peaks(out)
608                    if peaks > 1:
609                        found = True
610                        break
611                    best_alpha = pr.alpha
612
613                # If we didn't find a turning point for alpha and
614                # the initial alpha already had only one peak,
615                # just return that
616                if not found and initial_peaks == 1 and \
617                    initial_alpha < best_alpha:
618                    best_alpha = initial_alpha
619
620                # Check whether the size makes sense
621                message = ''
622
623                if not found:
624                    message = None
625                elif best_alpha >= 0.5 * pr.suggested_alpha:
626                    # best alpha is too big, return a
627                    # reasonable value
628                    message = "The estimated alpha for your system is too "
629                    message += "large. "
630                    message += "Try increasing your maximum distance."
631
632                return best_alpha, message, elapsed
633
634        except Exception as exc:
635            message = "Invertor.estimate_alpha: %s" % exc
636            return 0, message, elapsed
637
638    def to_file(self, path, npts=100):
639        """
640        Save the state to a file that will be readable
641        by SliceView.
642
643        :param path: path of the file to write
644        :param npts: number of P(r) points to be written
645
646        """
647        file = open(path, 'w')
648        file.write("#d_max=%g\n" % self.d_max)
649        file.write("#nfunc=%g\n" % self.nfunc)
650        file.write("#alpha=%g\n" % self.alpha)
651        file.write("#chi2=%g\n" % self.chi2)
652        file.write("#elapsed=%g\n" % self.elapsed)
653        file.write("#qmin=%s\n" % str(self.q_min))
654        file.write("#qmax=%s\n" % str(self.q_max))
655        file.write("#slit_height=%g\n" % self.slit_height)
656        file.write("#slit_width=%g\n" % self.slit_width)
657        file.write("#background=%g\n" % self.background)
658        if self.est_bck:
659            file.write("#has_bck=1\n")
660        else:
661            file.write("#has_bck=0\n")
662        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
663        if self.out is not None:
664            if len(self.out) == len(self.cov):
665                for i in range(len(self.out)):
666                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
667                                                   str(self.cov[i][i])))
668        file.write("<r>  <Pr>  <dPr>\n")
669        r = np.arange(0.0, self.d_max, self.d_max / npts)
670
671        for r_i in r:
672            (value, err) = self.pr_err(self.out, self.cov, r_i)
673            file.write("%g  %g  %g\n" % (r_i, value, err))
674
675        file.close()
676
677    def from_file(self, path):
678        """
679        Load the state of the Invertor from a file,
680        to be able to generate P(r) from a set of
681        parameters.
682
683        :param path: path of the file to load
684
685        """
686        #import os
687        #import re
688        if os.path.isfile(path):
689            try:
690                fd = open(path, 'r')
691
692                buff = fd.read()
693                lines = buff.split('\n')
694                for line in lines:
695                    if line.startswith('#d_max='):
696                        toks = line.split('=')
697                        self.d_max = float(toks[1])
698                    elif line.startswith('#nfunc='):
699                        toks = line.split('=')
700                        self.nfunc = int(toks[1])
701                        self.out = np.zeros(self.nfunc)
702                        self.cov = np.zeros([self.nfunc, self.nfunc])
703                    elif line.startswith('#alpha='):
704                        toks = line.split('=')
705                        self.alpha = float(toks[1])
706                    elif line.startswith('#chi2='):
707                        toks = line.split('=')
708                        self.chi2 = float(toks[1])
709                    elif line.startswith('#elapsed='):
710                        toks = line.split('=')
711                        self.elapsed = float(toks[1])
712                    elif line.startswith('#alpha_estimate='):
713                        toks = line.split('=')
714                        self.suggested_alpha = float(toks[1])
715                    elif line.startswith('#qmin='):
716                        toks = line.split('=')
717                        try:
718                            self.q_min = float(toks[1])
719                        except:
720                            self.q_min = None
721                    elif line.startswith('#qmax='):
722                        toks = line.split('=')
723                        try:
724                            self.q_max = float(toks[1])
725                        except:
726                            self.q_max = None
727                    elif line.startswith('#slit_height='):
728                        toks = line.split('=')
729                        self.slit_height = float(toks[1])
730                    elif line.startswith('#slit_width='):
731                        toks = line.split('=')
732                        self.slit_width = float(toks[1])
733                    elif line.startswith('#background='):
734                        toks = line.split('=')
735                        self.background = float(toks[1])
736                    elif line.startswith('#has_bck='):
737                        toks = line.split('=')
738                        self.est_bck = int(toks[1]) == 1
739
740                    # Now read in the parameters
741                    elif line.startswith('#C_'):
742                        toks = line.split('=')
743                        p = re.compile('#C_([0-9]+)')
744                        m = p.search(toks[0])
745                        toks2 = toks[1].split('+-')
746                        i = int(m.group(1))
747                        self.out[i] = float(toks2[0])
748
749                        self.cov[i][i] = float(toks2[1])
750
751            except Exception as exc:
752                msg = "Invertor.from_file: corrupted file\n%s" % exc
753                raise RuntimeError(msg)
754        else:
755            msg = "Invertor.from_file: '%s' is not a file" % str(path)
756            raise RuntimeError(msg)
Note: See TracBrowser for help on using the repository browser.