source: sasview/pr_inversion/invertor.py @ 97dea6c

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 97dea6c was d84a90c, checked in by Gervaise Alina <gervyh@…>, 15 years ago

working on documentation

  • Property mode set to 100644
File size: 25.2 KB
Line 
1"""
2Module to perform P(r) inversion.
3The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8import math, time
9from numpy.linalg import lstsq
10
11def help():
12    """
13    Provide general online help text
14    Future work: extend this function to allow topic selection
15    """
16    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
17    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
18    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
19    info_txt += "in the expansion is found by performing a least square fit with the "
20    info_txt += "following fit function:\n\n"
21    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
22    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
23    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
24    info_txt += "The Reg_term term is a regularization term set to the second derivative "
25    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
26    info_txt += "The following are user inputs:\n\n"
27    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
28    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
29    info_txt += "the regularization term.\n\n"
30    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
31     
32    return info_txt
33   
34
35class Invertor(Cinvertor):
36    """
37    Invertor class to perform P(r) inversion
38   
39    The problem is solved by posing the problem as  Ax = b,
40    where x is the set of coefficients we are looking for.
41   
42    Npts is the number of points.
43   
44    In the following i refers to the ith base function coefficient.
45    The matrix has its entries j in its first Npts rows set to
46        A[j][i] = (Fourier transformed base function for point j)
47       
48    We them choose a number of r-points, n_r, to evaluate the second
49    derivative of P(r) at. This is used as our regularization term.
50    For a vector r of length n_r, the following n_r rows are set to
51        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
52       
53    The vector b has its first Npts entries set to
54        b[j] = (I(q) observed for point j)
55       
56    The following n_r entries are set to zero.
57   
58    The result is found by using scipy.linalg.basic.lstsq to invert
59    the matrix and find the coefficients x.
60   
61    Methods inherited from Cinvertor:
62    - get_peaks(pars): returns the number of P(r) peaks
63    - oscillations(pars): returns the oscillation parameters for the output P(r)
64    - get_positive(pars): returns the fraction of P(r) that is above zero
65    - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
66    """
67    ## Chisqr of the last computation
68    chi2  = 0
69    ## Time elapsed for last computation
70    elapsed = 0
71    ## Alpha to get the reg term the same size as the signal
72    suggested_alpha = 0
73    ## Last number of base functions used
74    nfunc = 10
75    ## Last output values
76    out = None
77    ## Last errors on output values
78    cov = None
79    ## Background value
80    background = 0
81    ## Information dictionary for application use
82    info = {}
83   
84   
85    def __init__(self):
86        Cinvertor.__init__(self)
87       
88    def __setattr__(self, name, value):
89        """
90        Set the value of an attribute.
91        Access the parent class methods for
92        x, y, err, d_max, q_min, q_max and alpha
93        """
94        if   name=='x':
95            if 0.0 in value:
96                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
97            return self.set_x(value)
98        elif name=='y':
99            return self.set_y(value)
100        elif name=='err':
101            value2 = abs(value)
102            return self.set_err(value2)
103        elif name=='d_max':
104            return self.set_dmax(value)
105        elif name=='q_min':
106            if value==None:
107                return self.set_qmin(-1.0)
108            return self.set_qmin(value)
109        elif name=='q_max':
110            if value==None:
111                return self.set_qmax(-1.0)
112            return self.set_qmax(value)
113        elif name=='alpha':
114            return self.set_alpha(value)
115        elif name=='slit_height':
116            return self.set_slit_height(value)
117        elif name=='slit_width':
118            return self.set_slit_width(value)
119        elif name=='has_bck':
120            if value==True:
121                return self.set_has_bck(1)
122            elif value==False:
123                return self.set_has_bck(0)
124            else:
125                raise ValueError, "Invertor: has_bck can only be True or False"
126           
127        return Cinvertor.__setattr__(self, name, value)
128   
129    def __getattr__(self, name):
130        """
131        Return the value of an attribute
132        """
133        import numpy
134        if   name=='x':
135            out = numpy.ones(self.get_nx())
136            self.get_x(out)
137            return out
138        elif name=='y':
139            out = numpy.ones(self.get_ny())
140            self.get_y(out)
141            return out
142        elif name=='err':
143            out = numpy.ones(self.get_nerr())
144            self.get_err(out)
145            return out
146        elif name=='d_max':
147            return self.get_dmax()
148        elif name=='q_min':
149            qmin = self.get_qmin()
150            if qmin<0:
151                return None
152            return qmin
153        elif name=='q_max':
154            qmax = self.get_qmax()
155            if qmax<0:
156                return None
157            return qmax
158        elif name=='alpha':
159            return self.get_alpha()
160        elif name=='slit_height':
161            return self.get_slit_height()
162        elif name=='slit_width':
163            return self.get_slit_width()
164        elif name=='has_bck':
165            value = self.get_has_bck()
166            if value==1:
167                return True
168            else:
169                return False
170        elif name in self.__dict__:
171            return self.__dict__[name]
172        return None
173   
174    def clone(self):
175        """
176        Return a clone of this instance
177        """
178        import copy
179       
180        invertor = Invertor()
181        invertor.chi2    = self.chi2
182        invertor.elapsed = self.elapsed
183        invertor.nfunc   = self.nfunc
184        invertor.alpha   = self.alpha
185        invertor.d_max   = self.d_max
186        invertor.q_min   = self.q_min
187        invertor.q_max   = self.q_max
188       
189        invertor.x = self.x
190        invertor.y = self.y
191        invertor.err = self.err
192        invertor.has_bck = self.has_bck
193        invertor.slit_height = self.slit_height
194        invertor.slit_width  = self.slit_width
195       
196        invertor.info = copy.deepcopy(self.info)
197       
198        return invertor
199   
200    def invert(self, nfunc=10, nr=20):
201        """
202        Perform inversion to P(r)
203       
204        The problem is solved by posing the problem as  Ax = b,
205        where x is the set of coefficients we are looking for.
206       
207        Npts is the number of points.
208       
209        In the following i refers to the ith base function coefficient.
210        The matrix has its entries j in its first Npts rows set to
211            A[i][j] = (Fourier transformed base function for point j)
212           
213        We them choose a number of r-points, n_r, to evaluate the second
214        derivative of P(r) at. This is used as our regularization term.
215        For a vector r of length n_r, the following n_r rows are set to
216            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
217           
218        The vector b has its first Npts entries set to
219            b[j] = (I(q) observed for point j)
220           
221        The following n_r entries are set to zero.
222       
223        The result is found by using scipy.linalg.basic.lstsq to invert
224        the matrix and find the coefficients x.
225       
226        :param nfunc: number of base functions to use.
227        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
228        :return: c_out, c_cov - the coefficients with covariance matrix
229       
230        """
231        # Reset the background value before proceeding
232        self.background = 0.0
233        return self.lstsq(nfunc, nr=nr)
234   
235    def iq(self, out, q):
236        """
237        Function to call to evaluate the scattering intensity
238       
239        :param args: c-parameters, and q
240        :return: I(q)
241       
242        """
243        return Cinvertor.iq(self, out, q)+self.background
244   
245    def invert_optimize(self, nfunc=10, nr=20):
246        """
247        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
248       
249        This probably produce more reliable results, but is much slower.
250        The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
251        where the reg_term is given by Svergun: it is the integral of the square of the first derivative
252        of P(r), d(P(r))/dr, integrated over the full range of r.
253       
254        :param nfunc: number of base functions to use.
255        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
256       
257        :return: c_out, c_cov - the coefficients with covariance matrix
258       
259        """
260       
261        from scipy import optimize
262        import time
263       
264        self.nfunc = nfunc
265        # First, check that the current data is valid
266        if self.is_valid()<=0:
267            raise RuntimeError, "Invertor.invert: Data array are of different length"
268       
269        p = numpy.ones(nfunc)
270        t_0 = time.time()
271        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
272       
273        # Compute chi^2
274        res = self.residuals(out)
275        chisqr = 0
276        for i in range(len(res)):
277            chisqr += res[i]
278       
279        self.chi2 = chisqr
280
281        # Store computation time
282        self.elapsed = time.time() - t_0
283       
284        return out, cov_x
285   
286    def pr_fit(self, nfunc=5):
287        """
288        This is a direct fit to a given P(r). It assumes that the y data
289        is set to some P(r) distribution that we are trying to reproduce
290        with a set of base functions.
291       
292        This method is provided as a test.
293        """
294        from scipy import optimize
295       
296        # First, check that the current data is valid
297        if self.is_valid()<=0:
298            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
299       
300        p = numpy.ones(nfunc)
301        t_0 = time.time()
302        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
303       
304        # Compute chi^2
305        res = self.pr_residuals(out)
306        chisqr = 0
307        for i in range(len(res)):
308            chisqr += res[i]
309       
310        self.chisqr = chisqr
311       
312        # Store computation time
313        self.elapsed = time.time() - t_0
314
315        return out, cov_x
316   
317    def pr_err(self, c, c_cov, r):
318        """   
319        Returns the value of P(r) for a given r, and base function
320        coefficients, with error.
321       
322        :param c: base function coefficients
323        :param c_cov: covariance matrice of the base function coefficients
324        :param r: r-value to evaluate P(r) at
325       
326        :return: P(r)
327       
328        """
329        return self.get_pr_err(c, c_cov, r)
330       
331    def _accept_q(self, q):
332        """
333        Check q-value against user-defined range
334        """
335        if not self.q_min==None and q<self.q_min:
336            return False
337        if not self.q_max==None and q>self.q_max:
338            return False
339        return True
340       
341    def lstsq(self, nfunc=5, nr=20):
342        """
343        The problem is solved by posing the problem as  Ax = b,
344        where x is the set of coefficients we are looking for.
345       
346        Npts is the number of points.
347       
348        In the following i refers to the ith base function coefficient.
349        The matrix has its entries j in its first Npts rows set to
350            A[i][j] = (Fourier transformed base function for point j)
351           
352        We them choose a number of r-points, n_r, to evaluate the second
353        derivative of P(r) at. This is used as our regularization term.
354        For a vector r of length n_r, the following n_r rows are set to
355            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
356           
357        The vector b has its first Npts entries set to
358            b[j] = (I(q) observed for point j)
359           
360        The following n_r entries are set to zero.
361       
362        The result is found by using scipy.linalg.basic.lstsq to invert
363        the matrix and find the coefficients x.
364       
365        :param nfunc: number of base functions to use.
366        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
367
368        If the result does not allow us to compute the covariance matrix,
369        a matrix filled with zeros will be returned.
370
371        """
372        # Note: To make sure an array is contiguous:
373        # blah = numpy.ascontiguousarray(blah_original)
374        # ... before passing it to C
375       
376        if self.is_valid()<0:
377            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
378       
379        self.nfunc = nfunc
380        # a -- An M x N matrix.
381        # b -- An M x nrhs matrix or M vector.
382        npts = len(self.x)
383        nq   = nr
384        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
385        if sqrt_alpha<0.0:
386            nq = 0
387
388        # If we need to fit the background, add a term
389        if self.has_bck==True:
390            nfunc_0 = nfunc
391            nfunc += 1
392
393        a = numpy.zeros([npts+nq, nfunc])
394        b = numpy.zeros(npts+nq)
395        err = numpy.zeros([nfunc, nfunc])
396       
397        # Construct the a matrix and b vector that represent the problem
398        t_0 = time.time()
399        self._get_matrix(nfunc, nq, a, b)
400             
401        # Perform the inversion (least square fit)
402        c, chi2, rank, n = lstsq(a, b)
403        # Sanity check
404        try:
405            float(chi2)
406        except:
407            chi2 = -1.0
408        self.chi2 = chi2
409               
410        inv_cov = numpy.zeros([nfunc,nfunc])
411        # Get the covariance matrix, defined as inv_cov = a_transposed * a
412        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
413                   
414        # Compute the reg term size for the output
415        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
416                   
417        if math.fabs(self.alpha)>0:
418            new_alpha = sum_sig/(sum_reg/self.alpha)
419        else:
420            new_alpha = 0.0
421        self.suggested_alpha = new_alpha
422       
423        try:
424            cov = numpy.linalg.pinv(inv_cov)
425            err = math.fabs(chi2/float(npts-nfunc)) * cov
426        except:
427            # We were not able to estimate the errors
428            # Return an empty error matrix
429            pass
430           
431        # Keep a copy of the last output
432        if self.has_bck==False:
433            self.background = 0
434            self.out = c
435            self.cov = err
436        else:
437            self.background = c[0]
438           
439            err_0 = numpy.zeros([nfunc, nfunc])
440            c_0 = numpy.zeros(nfunc)
441           
442            for i in range(nfunc_0):
443                c_0[i] = c[i+1]
444                for j in range(nfunc_0):
445                    err_0[i][j] = err[i+1][j+1]
446                   
447            self.out = c_0
448            self.cov = err_0
449           
450        return self.out, self.cov
451       
452    def estimate_numterms(self, isquit_func=None):
453        """
454        Returns a reasonable guess for the
455        number of terms
456       
457        :param isquit_func: reference to thread function to call to
458                            check whether the computation needs to
459                            be stopped.
460       
461        :return: number of terms, alpha, message
462       
463        """
464        from num_term import Num_terms
465        estimator = Num_terms(self.clone())
466        try:
467            return estimator.num_terms(isquit_func)
468        except:
469            # If we fail, estimate alpha and return the default
470            # number of terms
471            best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
472            return self.nfunc, best_alpha, "Could not estimate number of terms"
473                   
474    def estimate_alpha(self, nfunc):
475        """
476        Returns a reasonable guess for the
477        regularization constant alpha
478       
479        :param nfunc: number of terms to use in the expansion.
480       
481        :return: alpha, message, elapsed
482       
483        where alpha is the estimate for alpha,
484        message is a message for the user,
485        elapsed is the computation time
486        """
487        import time
488        try:           
489            pr = self.clone()
490           
491            # T_0 for computation time
492            starttime = time.time()
493            elapsed = 0
494           
495            # If the current alpha is zero, try
496            # another value
497            if pr.alpha<=0:
498                pr.alpha = 0.0001
499                 
500            # Perform inversion to find the largest alpha
501            out, cov = pr.invert(nfunc)
502            elapsed = time.time()-starttime
503            initial_alpha = pr.alpha
504            initial_peaks = pr.get_peaks(out)
505   
506            # Try the inversion with the estimated alpha
507            pr.alpha = pr.suggested_alpha
508            out, cov = pr.invert(nfunc)
509   
510            npeaks = pr.get_peaks(out)
511            # if more than one peak to start with
512            # just return the estimate
513            if npeaks>1:
514                #message = "Your P(r) is not smooth, please check your inversion parameters"
515                message = None
516                return pr.suggested_alpha, message, elapsed
517            else:
518               
519                # Look at smaller values
520                # We assume that for the suggested alpha, we have 1 peak
521                # if not, send a message to change parameters
522                alpha = pr.suggested_alpha
523                best_alpha = pr.suggested_alpha
524                found = False
525                for i in range(10):
526                    pr.alpha = (0.33)**(i+1)*alpha
527                    out, cov = pr.invert(nfunc)
528                   
529                    peaks = pr.get_peaks(out)
530                    if peaks>1:
531                        found = True
532                        break
533                    best_alpha = pr.alpha
534                   
535                # If we didn't find a turning point for alpha and
536                # the initial alpha already had only one peak,
537                # just return that
538                if not found and initial_peaks==1 and initial_alpha<best_alpha:
539                    best_alpha = initial_alpha
540                   
541                # Check whether the size makes sense
542                message=''
543               
544                if not found:
545                    message = None
546                elif best_alpha>=0.5*pr.suggested_alpha:
547                    # best alpha is too big, return a
548                    # reasonable value
549                    message  = "The estimated alpha for your system is too large. "
550                    message += "Try increasing your maximum distance."
551               
552                return best_alpha, message, elapsed
553   
554        except:
555            message = "Invertor.estimate_alpha: %s" % sys.exc_value
556            return 0, message, elapsed
557   
558       
559    def to_file(self, path, npts=100):
560        """
561        Save the state to a file that will be readable
562        by SliceView.
563       
564        :param path: path of the file to write
565        :param npts: number of P(r) points to be written
566       
567        """
568        file = open(path, 'w')
569        file.write("#d_max=%g\n" % self.d_max)
570        file.write("#nfunc=%g\n" % self.nfunc)
571        file.write("#alpha=%g\n" % self.alpha)
572        file.write("#chi2=%g\n" % self.chi2)
573        file.write("#elapsed=%g\n" % self.elapsed)
574        file.write("#qmin=%s\n" % str(self.q_min))
575        file.write("#qmax=%s\n" % str(self.q_max))
576        file.write("#slit_height=%g\n" % self.slit_height)
577        file.write("#slit_width=%g\n" % self.slit_width)
578        file.write("#background=%g\n" % self.background)
579        if self.has_bck==True:
580            file.write("#has_bck=1\n")
581        else:
582            file.write("#has_bck=0\n")
583        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
584        if not self.out==None:
585            if len(self.out)==len(self.cov):
586                for i in range(len(self.out)):
587                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
588        file.write("<r>  <Pr>  <dPr>\n")
589        r = numpy.arange(0.0, self.d_max, self.d_max/npts)
590       
591        for r_i in r:
592            (value, err) = self.pr_err(self.out, self.cov, r_i)
593            file.write("%g  %g  %g\n" % (r_i, value, err))
594   
595        file.close()
596     
597       
598    def from_file(self, path):
599        """
600        Load the state of the Invertor from a file,
601        to be able to generate P(r) from a set of
602        parameters.
603       
604        :param path: path of the file to load
605       
606        """
607        import os
608        import re
609        if os.path.isfile(path):
610            try:
611                fd = open(path, 'r')
612               
613                buff    = fd.read()
614                lines   = buff.split('\n')
615                for line in lines:
616                    if line.startswith('#d_max='):
617                        toks = line.split('=')
618                        self.d_max = float(toks[1])
619                    elif line.startswith('#nfunc='):
620                        toks = line.split('=')
621                        self.nfunc = int(toks[1])
622                        self.out = numpy.zeros(self.nfunc)
623                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
624                    elif line.startswith('#alpha='):
625                        toks = line.split('=')
626                        self.alpha = float(toks[1])
627                    elif line.startswith('#chi2='):
628                        toks = line.split('=')
629                        self.chi2 = float(toks[1])
630                    elif line.startswith('#elapsed='):
631                        toks = line.split('=')
632                        self.elapsed = float(toks[1])
633                    elif line.startswith('#alpha_estimate='):
634                        toks = line.split('=')
635                        self.suggested_alpha = float(toks[1])
636                    elif line.startswith('#qmin='):
637                        toks = line.split('=')
638                        try:
639                            self.q_min = float(toks[1])
640                        except:
641                            self.q_min = None
642                    elif line.startswith('#qmax='):
643                        toks = line.split('=')
644                        try:
645                            self.q_max = float(toks[1])
646                        except:
647                            self.q_max = None
648                    elif line.startswith('#slit_height='):
649                        toks = line.split('=')
650                        self.slit_height = float(toks[1])
651                    elif line.startswith('#slit_width='):
652                        toks = line.split('=')
653                        self.slit_width = float(toks[1])
654                    elif line.startswith('#background='):
655                        toks = line.split('=')
656                        self.background = float(toks[1])
657                    elif line.startswith('#has_bck='):
658                        toks = line.split('=')
659                        if int(toks[1])==1:
660                            self.has_bck=True
661                        else:
662                            self.has_bck=False
663           
664                    # Now read in the parameters
665                    elif line.startswith('#C_'):
666                        toks = line.split('=')
667                        p = re.compile('#C_([0-9]+)')
668                        m = p.search(toks[0])
669                        toks2 = toks[1].split('+-')
670                        i = int(m.group(1))
671                        self.out[i] = float(toks2[0])
672                       
673                        self.cov[i][i] = float(toks2[1])                       
674           
675            except:
676                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
677        else:
678            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
679       
680       
681   
682   
683if __name__ == "__main__":
684    o = Invertor()
685
686   
687   
688   
689   
Note: See TracBrowser for help on using the repository browser.