source: sasview/pr_inversion/invertor.py @ b3de3a45

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since b3de3a45 was 3873fd2, checked in by Mathieu Doucet <doucetm@…>, 15 years ago

pr_inversion: add empty information dictionary to be used by applications to pass additional information to be carried by the Invertor.

  • Property mode set to 100644
File size: 25.6 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8import math, time
9from scipy.linalg.basic import lstsq
10
11def help():
12    """
13        Provide general online help text
14        Future work: extend this function to allow topic selection
15    """
16    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
17    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
18    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
19    info_txt += "in the expansion is found by performing a least square fit with the "
20    info_txt += "following fit function:\n\n"
21    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
22    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
23    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
24    info_txt += "The Reg_term term is a regularization term set to the second derivative "
25    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
26    info_txt += "The following are user inputs:\n\n"
27    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
28    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
29    info_txt += "the regularization term.\n\n"
30    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
31     
32    return info_txt
33   
34
35class Invertor(Cinvertor):
36    """
37        Invertor class to perform P(r) inversion
38       
39        The problem is solved by posing the problem as  Ax = b,
40        where x is the set of coefficients we are looking for.
41       
42        Npts is the number of points.
43       
44        In the following i refers to the ith base function coefficient.
45        The matrix has its entries j in its first Npts rows set to
46            A[j][i] = (Fourier transformed base function for point j)
47           
48        We them choose a number of r-points, n_r, to evaluate the second
49        derivative of P(r) at. This is used as our regularization term.
50        For a vector r of length n_r, the following n_r rows are set to
51            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
52           
53        The vector b has its first Npts entries set to
54            b[j] = (I(q) observed for point j)
55           
56        The following n_r entries are set to zero.
57       
58        The result is found by using scipy.linalg.basic.lstsq to invert
59        the matrix and find the coefficients x.
60       
61        Methods inherited from Cinvertor:
62        - get_peaks(pars): returns the number of P(r) peaks
63        - oscillations(pars): returns the oscillation parameters for the output P(r)
64        - get_positive(pars): returns the fraction of P(r) that is above zero
65        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
66    """
67    ## Chisqr of the last computation
68    chi2  = 0
69    ## Time elapsed for last computation
70    elapsed = 0
71    ## Alpha to get the reg term the same size as the signal
72    suggested_alpha = 0
73    ## Last number of base functions used
74    nfunc = 10
75    ## Last output values
76    out = None
77    ## Last errors on output values
78    cov = None
79    ## Background value
80    background = 0
81    ## Information dictionary for application use
82    info = {}
83   
84   
85    def __init__(self):
86        Cinvertor.__init__(self)
87       
88    def __setattr__(self, name, value):
89        """
90            Set the value of an attribute.
91            Access the parent class methods for
92            x, y, err, d_max, q_min, q_max and alpha
93        """
94        if   name=='x':
95            if 0.0 in value:
96                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
97            return self.set_x(value)
98        elif name=='y':
99            return self.set_y(value)
100        elif name=='err':
101            value2 = abs(value)
102            return self.set_err(value2)
103        elif name=='d_max':
104            return self.set_dmax(value)
105        elif name=='q_min':
106            if value==None:
107                return self.set_qmin(-1.0)
108            return self.set_qmin(value)
109        elif name=='q_max':
110            if value==None:
111                return self.set_qmax(-1.0)
112            return self.set_qmax(value)
113        elif name=='alpha':
114            return self.set_alpha(value)
115        elif name=='slit_height':
116            return self.set_slit_height(value)
117        elif name=='slit_width':
118            return self.set_slit_width(value)
119        elif name=='has_bck':
120            if value==True:
121                return self.set_has_bck(1)
122            elif value==False:
123                return self.set_has_bck(0)
124            else:
125                raise ValueError, "Invertor: has_bck can only be True or False"
126           
127        return Cinvertor.__setattr__(self, name, value)
128   
129    def __getattr__(self, name):
130        """
131           Return the value of an attribute
132        """
133        import numpy
134        if   name=='x':
135            out = numpy.ones(self.get_nx())
136            self.get_x(out)
137            return out
138        elif name=='y':
139            out = numpy.ones(self.get_ny())
140            self.get_y(out)
141            return out
142        elif name=='err':
143            out = numpy.ones(self.get_nerr())
144            self.get_err(out)
145            return out
146        elif name=='d_max':
147            return self.get_dmax()
148        elif name=='q_min':
149            qmin = self.get_qmin()
150            if qmin<0:
151                return None
152            return qmin
153        elif name=='q_max':
154            qmax = self.get_qmax()
155            if qmax<0:
156                return None
157            return qmax
158        elif name=='alpha':
159            return self.get_alpha()
160        elif name=='slit_height':
161            return self.get_slit_height()
162        elif name=='slit_width':
163            return self.get_slit_width()
164        elif name=='has_bck':
165            value = self.get_has_bck()
166            if value==1:
167                return True
168            else:
169                return False
170        elif name in self.__dict__:
171            return self.__dict__[name]
172        return None
173   
174    def clone(self):
175        """
176            Return a clone of this instance
177        """
178        import copy
179       
180        invertor = Invertor()
181        invertor.chi2    = self.chi2
182        invertor.elapsed = self.elapsed
183        invertor.nfunc   = self.nfunc
184        invertor.alpha   = self.alpha
185        invertor.d_max   = self.d_max
186        invertor.q_min   = self.q_min
187        invertor.q_max   = self.q_max
188       
189        invertor.x = self.x
190        invertor.y = self.y
191        invertor.err = self.err
192        invertor.has_bck = self.has_bck
193        invertor.slit_height = self.slit_height
194        invertor.slit_width  = self.slit_width
195       
196        invertor.info = copy.deepcopy(self.info)
197       
198        return invertor
199   
200    def invert(self, nfunc=10, nr=20):
201        """
202            Perform inversion to P(r)
203           
204            The problem is solved by posing the problem as  Ax = b,
205            where x is the set of coefficients we are looking for.
206           
207            Npts is the number of points.
208           
209            In the following i refers to the ith base function coefficient.
210            The matrix has its entries j in its first Npts rows set to
211                A[i][j] = (Fourier transformed base function for point j)
212               
213            We them choose a number of r-points, n_r, to evaluate the second
214            derivative of P(r) at. This is used as our regularization term.
215            For a vector r of length n_r, the following n_r rows are set to
216                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
217               
218            The vector b has its first Npts entries set to
219                b[j] = (I(q) observed for point j)
220               
221            The following n_r entries are set to zero.
222           
223            The result is found by using scipy.linalg.basic.lstsq to invert
224            the matrix and find the coefficients x.
225           
226            @param nfunc: number of base functions to use.
227            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
228            @return: c_out, c_cov - the coefficients with covariance matrix
229        """
230        # Reset the background value before proceeding
231        self.background = 0.0
232        return self.lstsq(nfunc, nr=nr)
233   
234    def iq(self, out, q):
235        """
236            Function to call to evaluate the scattering intensity
237            @param args: c-parameters, and q
238            @return: I(q)
239        """
240        return Cinvertor.iq(self, out, q)+self.background
241   
242    def invert_optimize(self, nfunc=10, nr=20):
243        """
244            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
245           
246            This probably produce more reliable results, but is much slower.
247            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
248            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
249            of P(r), d(P(r))/dr, integrated over the full range of r.
250           
251            @param nfunc: number of base functions to use.
252            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
253            @return: c_out, c_cov - the coefficients with covariance matrix
254        """
255       
256        from scipy import optimize
257        import time
258       
259        self.nfunc = nfunc
260        # First, check that the current data is valid
261        if self.is_valid()<=0:
262            raise RuntimeError, "Invertor.invert: Data array are of different length"
263       
264        p = numpy.ones(nfunc)
265        t_0 = time.time()
266        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
267       
268        # Compute chi^2
269        res = self.residuals(out)
270        chisqr = 0
271        for i in range(len(res)):
272            chisqr += res[i]
273       
274        self.chi2 = chisqr
275
276        # Store computation time
277        self.elapsed = time.time() - t_0
278       
279        return out, cov_x
280   
281    def pr_fit(self, nfunc=5):
282        """
283            This is a direct fit to a given P(r). It assumes that the y data
284            is set to some P(r) distribution that we are trying to reproduce
285            with a set of base functions.
286           
287            This method is provided as a test.
288        """
289        from scipy import optimize
290       
291        # First, check that the current data is valid
292        if self.is_valid()<=0:
293            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
294       
295        p = numpy.ones(nfunc)
296        t_0 = time.time()
297        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
298       
299        # Compute chi^2
300        res = self.pr_residuals(out)
301        chisqr = 0
302        for i in range(len(res)):
303            chisqr += res[i]
304       
305        self.chisqr = chisqr
306       
307        # Store computation time
308        self.elapsed = time.time() - t_0
309
310        return out, cov_x
311   
312    def pr_err(self, c, c_cov, r):
313        """   
314            Returns the value of P(r) for a given r, and base function
315            coefficients, with error.
316           
317            @param c: base function coefficients
318            @param c_cov: covariance matrice of the base function coefficients
319            @param r: r-value to evaluate P(r) at
320            @return: P(r)
321        """
322        return self.get_pr_err(c, c_cov, r)
323       
324    def _accept_q(self, q):
325        """
326            Check q-value against user-defined range
327        """
328        if not self.q_min==None and q<self.q_min:
329            return False
330        if not self.q_max==None and q>self.q_max:
331            return False
332        return True
333       
334    def lstsq(self, nfunc=5, nr=20):
335        """
336            The problem is solved by posing the problem as  Ax = b,
337            where x is the set of coefficients we are looking for.
338           
339            Npts is the number of points.
340           
341            In the following i refers to the ith base function coefficient.
342            The matrix has its entries j in its first Npts rows set to
343                A[i][j] = (Fourier transformed base function for point j)
344               
345            We them choose a number of r-points, n_r, to evaluate the second
346            derivative of P(r) at. This is used as our regularization term.
347            For a vector r of length n_r, the following n_r rows are set to
348                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
349               
350            The vector b has its first Npts entries set to
351                b[j] = (I(q) observed for point j)
352               
353            The following n_r entries are set to zero.
354           
355            The result is found by using scipy.linalg.basic.lstsq to invert
356            the matrix and find the coefficients x.
357           
358            @param nfunc: number of base functions to use.
359            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
360
361            If the result does not allow us to compute the covariance matrix,
362            a matrix filled with zeros will be returned.
363
364        """
365        # Note: To make sure an array is contiguous:
366        # blah = numpy.ascontiguousarray(blah_original)
367        # ... before passing it to C
368       
369        if self.is_valid()<0:
370            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
371       
372        self.nfunc = nfunc
373        # a -- An M x N matrix.
374        # b -- An M x nrhs matrix or M vector.
375        npts = len(self.x)
376        nq   = nr
377        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
378        if sqrt_alpha<0.0:
379            nq = 0
380
381        # If we need to fit the background, add a term
382        if self.has_bck==True:
383            nfunc_0 = nfunc
384            nfunc += 1
385
386        a = numpy.zeros([npts+nq, nfunc])
387        b = numpy.zeros(npts+nq)
388        err = numpy.zeros([nfunc, nfunc])
389       
390        # Construct the a matrix and b vector that represent the problem
391        t_0 = time.time()
392        self._get_matrix(nfunc, nq, a, b)
393             
394        # Perform the inversion (least square fit)
395        c, chi2, rank, n = lstsq(a, b)
396        # Sanity check
397        try:
398            float(chi2)
399        except:
400            chi2 = -1.0
401        self.chi2 = chi2
402               
403        inv_cov = numpy.zeros([nfunc,nfunc])
404        # Get the covariance matrix, defined as inv_cov = a_transposed * a
405        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
406                   
407        # Compute the reg term size for the output
408        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
409                   
410        if math.fabs(self.alpha)>0:
411            new_alpha = sum_sig/(sum_reg/self.alpha)
412        else:
413            new_alpha = 0.0
414        self.suggested_alpha = new_alpha
415       
416        try:
417            cov = numpy.linalg.pinv(inv_cov)
418            err = math.fabs(chi2/float(npts-nfunc)) * cov
419        except:
420            # We were not able to estimate the errors
421            # Return an empty error matrix
422            pass
423           
424        # Keep a copy of the last output
425        if self.has_bck==False:
426            self.background = 0
427            self.out = c
428            self.cov = err
429        else:
430            self.background = c[0]
431           
432            err_0 = numpy.zeros([nfunc, nfunc])
433            c_0 = numpy.zeros(nfunc)
434           
435            for i in range(nfunc_0):
436                c_0[i] = c[i+1]
437                for j in range(nfunc_0):
438                    err_0[i][j] = err[i+1][j+1]
439                   
440            self.out = c_0
441            self.cov = err_0
442           
443        return self.out, self.cov
444       
445    def estimate_numterms(self, isquit_func=None):
446        """
447            Returns a reasonable guess for the
448            number of terms
449            @param isquit_func: reference to thread function to call to
450                                check whether the computation needs to
451                                be stopped.
452           
453            @return: number of terms, alpha, message
454        """
455        from num_term import Num_terms
456        estimator = Num_terms(self.clone())
457        try:
458            return estimator.num_terms(isquit_func)
459        except:
460            # If we fail, estimate alpha and return the default
461            # number of terms
462            best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
463            return self.nfunc, best_alpha, "Could not estimate number of terms"
464                   
465    def estimate_alpha(self, nfunc):
466        """
467            Returns a reasonable guess for the
468            regularization constant alpha
469           
470            @param nfunc: number of terms to use in the expansion.
471            @return: alpha, message, elapsed
472           
473            where alpha is the estimate for alpha,
474            message is a message for the user,
475            elapsed is the computation time
476        """
477        import time
478        try:           
479            pr = self.clone()
480           
481            # T_0 for computation time
482            starttime = time.time()
483            elapsed = 0
484           
485            # If the current alpha is zero, try
486            # another value
487            if pr.alpha<=0:
488                pr.alpha = 0.0001
489                 
490            # Perform inversion to find the largest alpha
491            out, cov = pr.invert(nfunc)
492            elapsed = time.time()-starttime
493            initial_alpha = pr.alpha
494            initial_peaks = pr.get_peaks(out)
495   
496            # Try the inversion with the estimated alpha
497            pr.alpha = pr.suggested_alpha
498            out, cov = pr.invert(nfunc)
499   
500            npeaks = pr.get_peaks(out)
501            # if more than one peak to start with
502            # just return the estimate
503            if npeaks>1:
504                #message = "Your P(r) is not smooth, please check your inversion parameters"
505                message = None
506                return pr.suggested_alpha, message, elapsed
507            else:
508               
509                # Look at smaller values
510                # We assume that for the suggested alpha, we have 1 peak
511                # if not, send a message to change parameters
512                alpha = pr.suggested_alpha
513                best_alpha = pr.suggested_alpha
514                found = False
515                for i in range(10):
516                    pr.alpha = (0.33)**(i+1)*alpha
517                    out, cov = pr.invert(nfunc)
518                   
519                    peaks = pr.get_peaks(out)
520                    if peaks>1:
521                        found = True
522                        break
523                    best_alpha = pr.alpha
524                   
525                # If we didn't find a turning point for alpha and
526                # the initial alpha already had only one peak,
527                # just return that
528                if not found and initial_peaks==1 and initial_alpha<best_alpha:
529                    best_alpha = initial_alpha
530                   
531                # Check whether the size makes sense
532                message=''
533               
534                if not found:
535                    message = "None"
536                elif best_alpha>=0.5*pr.suggested_alpha:
537                    # best alpha is too big, return a
538                    # reasonable value
539                    message  = "The estimated alpha for your system is too large. "
540                    message += "Try increasing your maximum distance."
541               
542                return best_alpha, message, elapsed
543   
544        except:
545            message = "Invertor.estimate_alpha: %s" % sys.exc_value
546            return 0, message, elapsed
547   
548       
549    def to_file(self, path, npts=100):
550        """
551            Save the state to a file that will be readable
552            by SliceView.
553            @param path: path of the file to write
554            @param npts: number of P(r) points to be written
555        """
556        import pylab
557       
558        file = open(path, 'w')
559        file.write("#d_max=%g\n" % self.d_max)
560        file.write("#nfunc=%g\n" % self.nfunc)
561        file.write("#alpha=%g\n" % self.alpha)
562        file.write("#chi2=%g\n" % self.chi2)
563        file.write("#elapsed=%g\n" % self.elapsed)
564        file.write("#qmin=%s\n" % str(self.q_min))
565        file.write("#qmax=%s\n" % str(self.q_max))
566        file.write("#slit_height=%g\n" % self.slit_height)
567        file.write("#slit_width=%g\n" % self.slit_width)
568        file.write("#background=%g\n" % self.background)
569        if self.has_bck==True:
570            file.write("#has_bck=1\n")
571        else:
572            file.write("#has_bck=0\n")
573        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
574        if not self.out==None:
575            if len(self.out)==len(self.cov):
576                for i in range(len(self.out)):
577                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
578        file.write("<r>  <Pr>  <dPr>\n")
579        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
580       
581        for r_i in r:
582            (value, err) = self.pr_err(self.out, self.cov, r_i)
583            file.write("%g  %g  %g\n" % (r_i, value, err))
584   
585        file.close()
586     
587       
588    def from_file(self, path):
589        """
590            Load the state of the Invertor from a file,
591            to be able to generate P(r) from a set of
592            parameters.
593            @param path: path of the file to load
594        """
595        import os
596        import re
597        if os.path.isfile(path):
598            try:
599                fd = open(path, 'r')
600               
601                buff    = fd.read()
602                lines   = buff.split('\n')
603                for line in lines:
604                    if line.startswith('#d_max='):
605                        toks = line.split('=')
606                        self.d_max = float(toks[1])
607                    elif line.startswith('#nfunc='):
608                        toks = line.split('=')
609                        self.nfunc = int(toks[1])
610                        self.out = numpy.zeros(self.nfunc)
611                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
612                    elif line.startswith('#alpha='):
613                        toks = line.split('=')
614                        self.alpha = float(toks[1])
615                    elif line.startswith('#chi2='):
616                        toks = line.split('=')
617                        self.chi2 = float(toks[1])
618                    elif line.startswith('#elapsed='):
619                        toks = line.split('=')
620                        self.elapsed = float(toks[1])
621                    elif line.startswith('#alpha_estimate='):
622                        toks = line.split('=')
623                        self.suggested_alpha = float(toks[1])
624                    elif line.startswith('#qmin='):
625                        toks = line.split('=')
626                        try:
627                            self.q_min = float(toks[1])
628                        except:
629                            self.q_min = None
630                    elif line.startswith('#qmax='):
631                        toks = line.split('=')
632                        try:
633                            self.q_max = float(toks[1])
634                        except:
635                            self.q_max = None
636                    elif line.startswith('#slit_height='):
637                        toks = line.split('=')
638                        self.slit_height = float(toks[1])
639                    elif line.startswith('#slit_width='):
640                        toks = line.split('=')
641                        self.slit_width = float(toks[1])
642                    elif line.startswith('#background='):
643                        toks = line.split('=')
644                        self.background = float(toks[1])
645                    elif line.startswith('#has_bck='):
646                        toks = line.split('=')
647                        if int(toks[1])==1:
648                            self.has_bck=True
649                        else:
650                            self.has_bck=False
651           
652                    # Now read in the parameters
653                    elif line.startswith('#C_'):
654                        toks = line.split('=')
655                        p = re.compile('#C_([0-9]+)')
656                        m = p.search(toks[0])
657                        toks2 = toks[1].split('+-')
658                        i = int(m.group(1))
659                        self.out[i] = float(toks2[0])
660                       
661                        self.cov[i][i] = float(toks2[1])                       
662           
663            except:
664                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
665        else:
666            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
667       
668       
669   
670   
671if __name__ == "__main__":
672    o = Invertor()
673
674   
675   
676   
677   
Note: See TracBrowser for help on using the repository browser.