source: sasview/src/sas/sascalc/pr/invertor.py @ 0dde203

ticket-1249
Last change on this file since 0dde203 was 7af652d, checked in by GitHub <noreply@…>, 6 years ago

Merge branch 'master' into py37-sascalc

  • Property mode set to 100644
File size: 25.5 KB
Line 
1# pylint: disable=invalid-name
2"""
3Module to perform P(r) inversion.
4The module contains the Invertor class.
5
6FIXME: The way the Invertor interacts with its C component should be cleaned up
7"""
8from __future__ import division
9
10import numpy as np
11import sys
12import math
13import time
14import copy
15import os
16import re
17import logging
18from numpy.linalg import lstsq
19from scipy import optimize
20from sas.sascalc.pr._pr_inversion import Cinvertor
21
22logger = logging.getLogger(__name__)
23
24def help():
25    """
26    Provide general online help text
27    Future work: extend this function to allow topic selection
28    """
29    info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
30    info_txt += "(1980) 13, 168-175.\n\n"
31    info_txt += "P(r) is set to be equal to an expansion of base functions "
32    info_txt += "of the type "
33    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each "
34    info_txt += "base functions "
35    info_txt += "in the expansion is found by performing a least square fit "
36    info_txt += "with the "
37    info_txt += "following fit function:\n\n"
38    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 +"
39    info_txt += "Reg_term\n\n"
40    info_txt += "where I_meas(q) is the measured scattering intensity and "
41    info_txt += "I_th(q) is "
42    info_txt += "the prediction from the Fourier transform of the P(r) "
43    info_txt += "expansion. "
44    info_txt += "The Reg_term term is a regularization term set to the second"
45    info_txt += " derivative "
46    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce "
47    info_txt += "a smooth P(r) output.\n\n"
48    info_txt += "The following are user inputs:\n\n"
49    info_txt += "   - Number of terms: the number of base functions in the P(r)"
50    info_txt += " expansion.\n\n"
51    info_txt += "   - Regularization constant: a multiplicative constant "
52    info_txt += "to set the size of "
53    info_txt += "the regularization term.\n\n"
54    info_txt += "   - Maximum distance: the maximum distance between any "
55    info_txt += "two points in the system.\n"
56
57    return info_txt
58
59
60class Invertor(Cinvertor):
61    """
62    Invertor class to perform P(r) inversion
63
64    The problem is solved by posing the problem as  Ax = b,
65    where x is the set of coefficients we are looking for.
66
67    Npts is the number of points.
68
69    In the following i refers to the ith base function coefficient.
70    The matrix has its entries j in its first Npts rows set to ::
71
72        A[j][i] = (Fourier transformed base function for point j)
73
74    We then choose a number of r-points, n_r, to evaluate the second
75    derivative of P(r) at. This is used as our regularization term.
76    For a vector r of length n_r, the following n_r rows are set to ::
77
78        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
79        evaluated at r[j])
80
81    The vector b has its first Npts entries set to ::
82
83        b[j] = (I(q) observed for point j)
84
85    The following n_r entries are set to zero.
86
87    The result is found by using scipy.linalg.basic.lstsq to invert
88    the matrix and find the coefficients x.
89
90    Methods inherited from Cinvertor:
91
92    * ``get_peaks(pars)``: returns the number of P(r) peaks
93    * ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
94    * ``get_positive(pars)``: returns the fraction of P(r) that is above zero
95    * ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
96    """
97    ## Chisqr of the last computation
98    chi2 = 0
99    ## Time elapsed for last computation
100    elapsed = 0
101    ## Alpha to get the reg term the same size as the signal
102    suggested_alpha = 0
103    ## Last number of base functions used
104    nfunc = 10
105    ## Last output values
106    out = None
107    ## Last errors on output values
108    cov = None
109    ## Background value
110    background = 0
111    ## Information dictionary for application use
112    info = {}
113
114    def __init__(self):
115        Cinvertor.__init__(self)
116
117    def __setstate__(self, state):
118        """
119        restore the state of invertor for pickle
120        """
121        (self.__dict__, self.alpha, self.d_max,
122         self.q_min, self.q_max,
123         self.x, self.y,
124         self.err, self.est_bck,
125         self.slit_height, self.slit_width) = state
126
127    def __reduce_ex__(self, proto):
128        """
129        Overwrite the __reduce_ex__
130        """
131
132        state = (self.__dict__,
133                 self.alpha, self.d_max,
134                 self.q_min, self.q_max,
135                 self.x, self.y,
136                 self.err, self.est_bck,
137                 self.slit_height, self.slit_width,
138                )
139        return (Invertor, tuple(), state, None, None)
140
141    def __setattr__(self, name, value):
142        """
143        Set the value of an attribute.
144        Access the parent class methods for
145        x, y, err, d_max, q_min, q_max and alpha
146        """
147        if name == 'x':
148            if 0.0 in value:
149                msg = "Invertor: one of your q-values is zero. "
150                msg += "Delete that entry before proceeding"
151                raise ValueError(msg)
152            return self.set_x(value)
153        elif name == 'y':
154            return self.set_y(value)
155        elif name == 'err':
156            value2 = abs(value)
157            return self.set_err(value2)
158        elif name == 'd_max':
159            if value <= 0.0:
160                msg = "Invertor: d_max must be greater than zero."
161                msg += "Correct that entry before proceeding"
162                raise ValueError(msg)
163            return self.set_dmax(value)
164        elif name == 'q_min':
165            if value is None:
166                return self.set_qmin(-1.0)
167            return self.set_qmin(value)
168        elif name == 'q_max':
169            if value is None:
170                return self.set_qmax(-1.0)
171            return self.set_qmax(value)
172        elif name == 'alpha':
173            return self.set_alpha(value)
174        elif name == 'slit_height':
175            return self.set_slit_height(value)
176        elif name == 'slit_width':
177            return self.set_slit_width(value)
178        elif name == 'est_bck':
179            if value == True:
180                return self.set_est_bck(1)
181            elif value == False:
182                return self.set_est_bck(0)
183            else:
184                raise ValueError("Invertor: est_bck can only be True or False")
185
186        return Cinvertor.__setattr__(self, name, value)
187
188    def __getattr__(self, name):
189        """
190        Return the value of an attribute
191        """
192        #import numpy
193        if name == 'x':
194            out = np.ones(self.get_nx())
195            self.get_x(out)
196            return out
197        elif name == 'y':
198            out = np.ones(self.get_ny())
199            self.get_y(out)
200            return out
201        elif name == 'err':
202            out = np.ones(self.get_nerr())
203            self.get_err(out)
204            return out
205        elif name == 'd_max':
206            return self.get_dmax()
207        elif name == 'q_min':
208            qmin = self.get_qmin()
209            if qmin < 0:
210                return None
211            return qmin
212        elif name == 'q_max':
213            qmax = self.get_qmax()
214            if qmax < 0:
215                return None
216            return qmax
217        elif name == 'alpha':
218            return self.get_alpha()
219        elif name == 'slit_height':
220            return self.get_slit_height()
221        elif name == 'slit_width':
222            return self.get_slit_width()
223        elif name == 'est_bck':
224            value = self.get_est_bck()
225            return value == 1
226        elif name in self.__dict__:
227            return self.__dict__[name]
228        return None
229
230    def clone(self):
231        """
232        Return a clone of this instance
233        """
234        #import copy
235
236        invertor = Invertor()
237        invertor.chi2 = self.chi2
238        invertor.elapsed = self.elapsed
239        invertor.nfunc = self.nfunc
240        invertor.alpha = self.alpha
241        invertor.d_max = self.d_max
242        invertor.q_min = self.q_min
243        invertor.q_max = self.q_max
244
245        invertor.x = self.x
246        invertor.y = self.y
247        invertor.err = self.err
248        invertor.est_bck = self.est_bck
249        invertor.background = self.background
250        invertor.slit_height = self.slit_height
251        invertor.slit_width = self.slit_width
252
253        invertor.info = copy.deepcopy(self.info)
254
255        return invertor
256
257    def invert(self, nfunc=10, nr=20):
258        """
259        Perform inversion to P(r)
260
261        The problem is solved by posing the problem as  Ax = b,
262        where x is the set of coefficients we are looking for.
263
264        Npts is the number of points.
265
266        In the following i refers to the ith base function coefficient.
267        The matrix has its entries j in its first Npts rows set to ::
268
269            A[i][j] = (Fourier transformed base function for point j)
270
271        We then choose a number of r-points, n_r, to evaluate the second
272        derivative of P(r) at. This is used as our regularization term.
273        For a vector r of length n_r, the following n_r rows are set to ::
274
275            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
276
277        The vector b has its first Npts entries set to ::
278
279            b[j] = (I(q) observed for point j)
280
281        The following n_r entries are set to zero.
282
283        The result is found by using scipy.linalg.basic.lstsq to invert
284        the matrix and find the coefficients x.
285
286        :param nfunc: number of base functions to use.
287        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
288        :return: c_out, c_cov - the coefficients with covariance matrix
289        """
290        # Reset the background value before proceeding
291        # self.background = 0.0
292        if not self.est_bck:
293            self.y -= self.background
294        out, cov = self.lstsq(nfunc, nr=nr)
295        if not self.est_bck:
296            self.y += self.background
297        return out, cov
298
299    def iq(self, out, q):
300        """
301        Function to call to evaluate the scattering intensity
302
303        :param args: c-parameters, and q
304        :return: I(q)
305
306        """
307        return Cinvertor.iq(self, out, q) + self.background
308
309    def invert_optimize(self, nfunc=10, nr=20):
310        """
311        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
312
313        This probably produce more reliable results, but is much slower.
314        The minimization function is set to
315        sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
316        where the reg_term is given by Svergun: it is the integral of
317        the square of the first derivative
318        of P(r), d(P(r))/dr, integrated over the full range of r.
319
320        :param nfunc: number of base functions to use.
321        :param nr: number of r points to evaluate the 2nd derivative at
322            for the reg. term.
323
324        :return: c_out, c_cov - the coefficients with covariance matrix
325
326        """
327        self.nfunc = nfunc
328        # First, check that the current data is valid
329        if self.is_valid() <= 0:
330            msg = "Invertor.invert: Data array are of different length"
331            raise RuntimeError(msg)
332
333        p = np.ones(nfunc)
334        t_0 = time.time()
335        out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
336
337        # Compute chi^2
338        res = self.residuals(out)
339        chisqr = 0
340        for i in range(len(res)):
341            chisqr += res[i]
342
343        self.chi2 = chisqr
344
345        # Store computation time
346        self.elapsed = time.time() - t_0
347
348        if cov_x is None:
349            cov_x = np.ones([nfunc, nfunc])
350            cov_x *= math.fabs(chisqr)
351        return out, cov_x
352
353    def pr_fit(self, nfunc=5):
354        """
355        This is a direct fit to a given P(r). It assumes that the y data
356        is set to some P(r) distribution that we are trying to reproduce
357        with a set of base functions.
358
359        This method is provided as a test.
360        """
361        # First, check that the current data is valid
362        if self.is_valid() <= 0:
363            msg = "Invertor.invert: Data arrays are of different length"
364            raise RuntimeError(msg)
365
366        p = np.ones(nfunc)
367        t_0 = time.time()
368        out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
369
370        # Compute chi^2
371        res = self.pr_residuals(out)
372        chisqr = 0
373        for i in range(len(res)):
374            chisqr += res[i]
375
376        self.chisqr = chisqr
377
378        # Store computation time
379        self.elapsed = time.time() - t_0
380
381        return out, cov_x
382
383    def pr_err(self, c, c_cov, r):
384        """
385        Returns the value of P(r) for a given r, and base function
386        coefficients, with error.
387
388        :param c: base function coefficients
389        :param c_cov: covariance matrice of the base function coefficients
390        :param r: r-value to evaluate P(r) at
391
392        :return: P(r)
393
394        """
395        return self.get_pr_err(c, c_cov, r)
396
397    def _accept_q(self, q):
398        """
399        Check q-value against user-defined range
400        """
401        if self.q_min is not None and q < self.q_min:
402            return False
403        if self.q_max is not None and q > self.q_max:
404            return False
405        return True
406
407    def lstsq(self, nfunc=5, nr=20):
408        """
409        The problem is solved by posing the problem as  Ax = b,
410        where x is the set of coefficients we are looking for.
411
412        Npts is the number of points.
413
414        In the following i refers to the ith base function coefficient.
415        The matrix has its entries j in its first Npts rows set to ::
416
417            A[i][j] = (Fourier transformed base function for point j)
418
419        We then choose a number of r-points, n_r, to evaluate the second
420        derivative of P(r) at. This is used as our regularization term.
421        For a vector r of length n_r, the following n_r rows are set to ::
422
423            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
424            evaluated at r[j])
425
426        The vector b has its first Npts entries set to ::
427
428            b[j] = (I(q) observed for point j)
429
430        The following n_r entries are set to zero.
431
432        The result is found by using scipy.linalg.basic.lstsq to invert
433        the matrix and find the coefficients x.
434
435        :param nfunc: number of base functions to use.
436        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
437
438        If the result does not allow us to compute the covariance matrix,
439        a matrix filled with zeros will be returned.
440
441        """
442        # Note: To make sure an array is contiguous:
443        # blah = np.ascontiguousarray(blah_original)
444        # ... before passing it to C
445
446        if self.is_valid() < 0:
447            msg = "Invertor: invalid data; incompatible data lengths."
448            raise RuntimeError(msg)
449
450        self.nfunc = nfunc
451        # a -- An M x N matrix.
452        # b -- An M x nrhs matrix or M vector.
453        npts = len(self.x)
454        nq = nr
455        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
456        if sqrt_alpha < 0.0:
457            nq = 0
458
459        # If we need to fit the background, add a term
460        if self.est_bck:
461            nfunc_0 = nfunc
462            nfunc += 1
463
464        a = np.zeros([npts + nq, nfunc])
465        b = np.zeros(npts + nq)
466        err = np.zeros([nfunc, nfunc])
467
468        # Construct the a matrix and b vector that represent the problem
469        t_0 = time.time()
470        try:
471            self._get_matrix(nfunc, nq, a, b)
472        except Exception as exc:
473            raise RuntimeError("Invertor: could not invert I(Q)\n  %s" % str(exc))
474
475        # Perform the inversion (least square fit)
476        # CRUFT: numpy>=1.14.0 allows rcond=None for the following default
477        rcond = np.finfo(float).eps * max(a.shape)
478        c, chi2, _, _ = lstsq(a, b, rcond=rcond)
479        # Sanity check
480        try:
481            float(chi2)
482        except:
483            chi2 = -1.0
484        self.chi2 = chi2
485
486        inv_cov = np.zeros([nfunc, nfunc])
487        # Get the covariance matrix, defined as inv_cov = a_transposed * a
488        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
489
490        # Compute the reg term size for the output
491        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
492
493        if math.fabs(self.alpha) > 0:
494            new_alpha = sum_sig / (sum_reg / self.alpha)
495        else:
496            new_alpha = 0.0
497        self.suggested_alpha = new_alpha
498
499        try:
500            cov = np.linalg.pinv(inv_cov)
501            err = math.fabs(chi2 / (npts - nfunc)) * cov
502        except Exception as exc:
503            # We were not able to estimate the errors
504            # Return an empty error matrix
505            logger.error(exc)
506
507        # Keep a copy of the last output
508        if not self.est_bck:
509            self.out = c
510            self.cov = err
511        else:
512            self.background = c[0]
513
514            err_0 = np.zeros([nfunc, nfunc])
515            c_0 = np.zeros(nfunc)
516
517            for i in range(nfunc_0):
518                c_0[i] = c[i + 1]
519                for j in range(nfunc_0):
520                    err_0[i][j] = err[i + 1][j + 1]
521
522            self.out = c_0
523            self.cov = err_0
524
525        # Store computation time
526        self.elapsed = time.time() - t_0
527
528        return self.out, self.cov
529
530    def estimate_numterms(self, isquit_func=None):
531        """
532        Returns a reasonable guess for the
533        number of terms
534
535        :param isquit_func:
536          reference to thread function to call to check whether the computation needs to
537          be stopped.
538
539        :return: number of terms, alpha, message
540
541        """
542        from .num_term import NTermEstimator
543        estimator = NTermEstimator(self.clone())
544        try:
545            return estimator.num_terms(isquit_func)
546        except Exception as exc:
547            # If we fail, estimate alpha and return the default
548            # number of terms
549            best_alpha, _, _ = self.estimate_alpha(self.nfunc)
550            logger.warning("Invertor.estimate_numterms: %s" % exc)
551            return self.nfunc, best_alpha, "Could not estimate number of terms"
552
553    def estimate_alpha(self, nfunc):
554        """
555        Returns a reasonable guess for the
556        regularization constant alpha
557
558        :param nfunc: number of terms to use in the expansion.
559
560        :return: alpha, message, elapsed
561
562        where alpha is the estimate for alpha,
563        message is a message for the user,
564        elapsed is the computation time
565        """
566        #import time
567        try:
568            pr = self.clone()
569
570            # T_0 for computation time
571            starttime = time.time()
572            elapsed = 0
573
574            # If the current alpha is zero, try
575            # another value
576            if pr.alpha <= 0:
577                pr.alpha = 0.0001
578
579            # Perform inversion to find the largest alpha
580            out, _ = pr.invert(nfunc)
581            elapsed = time.time() - starttime
582            initial_alpha = pr.alpha
583            initial_peaks = pr.get_peaks(out)
584
585            # Try the inversion with the estimated alpha
586            pr.alpha = pr.suggested_alpha
587            out, _ = pr.invert(nfunc)
588
589            npeaks = pr.get_peaks(out)
590            # if more than one peak to start with
591            # just return the estimate
592            if npeaks > 1:
593                #message = "Your P(r) is not smooth,
594                #please check your inversion parameters"
595                message = None
596                return pr.suggested_alpha, message, elapsed
597            else:
598
599                # Look at smaller values
600                # We assume that for the suggested alpha, we have 1 peak
601                # if not, send a message to change parameters
602                alpha = pr.suggested_alpha
603                best_alpha = pr.suggested_alpha
604                found = False
605                for i in range(10):
606                    pr.alpha = (0.33) ** (i + 1) * alpha
607                    out, _ = pr.invert(nfunc)
608
609                    peaks = pr.get_peaks(out)
610                    if peaks > 1:
611                        found = True
612                        break
613                    best_alpha = pr.alpha
614
615                # If we didn't find a turning point for alpha and
616                # the initial alpha already had only one peak,
617                # just return that
618                if not found and initial_peaks == 1 and \
619                    initial_alpha < best_alpha:
620                    best_alpha = initial_alpha
621
622                # Check whether the size makes sense
623                message = ''
624
625                if not found:
626                    message = None
627                elif best_alpha >= 0.5 * pr.suggested_alpha:
628                    # best alpha is too big, return a
629                    # reasonable value
630                    message = "The estimated alpha for your system is too "
631                    message += "large. "
632                    message += "Try increasing your maximum distance."
633
634                return best_alpha, message, elapsed
635
636        except Exception as exc:
637            message = "Invertor.estimate_alpha: %s" % exc
638            return 0, message, elapsed
639
640    def to_file(self, path, npts=100):
641        """
642        Save the state to a file that will be readable
643        by SliceView.
644
645        :param path: path of the file to write
646        :param npts: number of P(r) points to be written
647
648        """
649        file = open(path, 'w')
650        file.write("#d_max=%g\n" % self.d_max)
651        file.write("#nfunc=%g\n" % self.nfunc)
652        file.write("#alpha=%g\n" % self.alpha)
653        file.write("#chi2=%g\n" % self.chi2)
654        file.write("#elapsed=%g\n" % self.elapsed)
655        file.write("#qmin=%s\n" % str(self.q_min))
656        file.write("#qmax=%s\n" % str(self.q_max))
657        file.write("#slit_height=%g\n" % self.slit_height)
658        file.write("#slit_width=%g\n" % self.slit_width)
659        file.write("#background=%g\n" % self.background)
660        if self.est_bck:
661            file.write("#has_bck=1\n")
662        else:
663            file.write("#has_bck=0\n")
664        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
665        if self.out is not None:
666            if len(self.out) == len(self.cov):
667                for i in range(len(self.out)):
668                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
669                                                   str(self.cov[i][i])))
670        file.write("<r>  <Pr>  <dPr>\n")
671        r = np.arange(0.0, self.d_max, self.d_max / npts)
672
673        for r_i in r:
674            (value, err) = self.pr_err(self.out, self.cov, r_i)
675            file.write("%g  %g  %g\n" % (r_i, value, err))
676
677        file.close()
678
679    def from_file(self, path):
680        """
681        Load the state of the Invertor from a file,
682        to be able to generate P(r) from a set of
683        parameters.
684
685        :param path: path of the file to load
686
687        """
688        #import os
689        #import re
690        if os.path.isfile(path):
691            try:
692                fd = open(path, 'r')
693
694                buff = fd.read()
695                lines = buff.split('\n')
696                for line in lines:
697                    if line.startswith('#d_max='):
698                        toks = line.split('=')
699                        self.d_max = float(toks[1])
700                    elif line.startswith('#nfunc='):
701                        toks = line.split('=')
702                        self.nfunc = int(toks[1])
703                        self.out = np.zeros(self.nfunc)
704                        self.cov = np.zeros([self.nfunc, self.nfunc])
705                    elif line.startswith('#alpha='):
706                        toks = line.split('=')
707                        self.alpha = float(toks[1])
708                    elif line.startswith('#chi2='):
709                        toks = line.split('=')
710                        self.chi2 = float(toks[1])
711                    elif line.startswith('#elapsed='):
712                        toks = line.split('=')
713                        self.elapsed = float(toks[1])
714                    elif line.startswith('#alpha_estimate='):
715                        toks = line.split('=')
716                        self.suggested_alpha = float(toks[1])
717                    elif line.startswith('#qmin='):
718                        toks = line.split('=')
719                        try:
720                            self.q_min = float(toks[1])
721                        except:
722                            self.q_min = None
723                    elif line.startswith('#qmax='):
724                        toks = line.split('=')
725                        try:
726                            self.q_max = float(toks[1])
727                        except:
728                            self.q_max = None
729                    elif line.startswith('#slit_height='):
730                        toks = line.split('=')
731                        self.slit_height = float(toks[1])
732                    elif line.startswith('#slit_width='):
733                        toks = line.split('=')
734                        self.slit_width = float(toks[1])
735                    elif line.startswith('#background='):
736                        toks = line.split('=')
737                        self.background = float(toks[1])
738                    elif line.startswith('#has_bck='):
739                        toks = line.split('=')
740                        self.est_bck = int(toks[1]) == 1
741
742                    # Now read in the parameters
743                    elif line.startswith('#C_'):
744                        toks = line.split('=')
745                        p = re.compile('#C_([0-9]+)')
746                        m = p.search(toks[0])
747                        toks2 = toks[1].split('+-')
748                        i = int(m.group(1))
749                        self.out[i] = float(toks2[0])
750
751                        self.cov[i][i] = float(toks2[1])
752
753            except Exception as exc:
754                msg = "Invertor.from_file: corrupted file\n%s" % exc
755                raise RuntimeError(msg)
756        else:
757            msg = "Invertor.from_file: '%s' is not a file" % str(path)
758            raise RuntimeError(msg)
Note: See TracBrowser for help on using the repository browser.