source: sasview/pr_inversion/invertor.py @ 4cabee7

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 4cabee7 was 97d69d9, checked in by Mathieu Doucet <doucetm@…>, 15 years ago

pr_inversion: get rid of unnecessary imports

  • Property mode set to 100644
File size: 25.6 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8import math, time
9from numpy.linalg import lstsq
10
11def help():
12    """
13        Provide general online help text
14        Future work: extend this function to allow topic selection
15    """
16    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
17    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
18    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
19    info_txt += "in the expansion is found by performing a least square fit with the "
20    info_txt += "following fit function:\n\n"
21    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
22    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
23    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
24    info_txt += "The Reg_term term is a regularization term set to the second derivative "
25    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
26    info_txt += "The following are user inputs:\n\n"
27    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
28    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
29    info_txt += "the regularization term.\n\n"
30    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
31     
32    return info_txt
33   
34
35class Invertor(Cinvertor):
36    """
37        Invertor class to perform P(r) inversion
38       
39        The problem is solved by posing the problem as  Ax = b,
40        where x is the set of coefficients we are looking for.
41       
42        Npts is the number of points.
43       
44        In the following i refers to the ith base function coefficient.
45        The matrix has its entries j in its first Npts rows set to
46            A[j][i] = (Fourier transformed base function for point j)
47           
48        We them choose a number of r-points, n_r, to evaluate the second
49        derivative of P(r) at. This is used as our regularization term.
50        For a vector r of length n_r, the following n_r rows are set to
51            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
52           
53        The vector b has its first Npts entries set to
54            b[j] = (I(q) observed for point j)
55           
56        The following n_r entries are set to zero.
57       
58        The result is found by using scipy.linalg.basic.lstsq to invert
59        the matrix and find the coefficients x.
60       
61        Methods inherited from Cinvertor:
62        - get_peaks(pars): returns the number of P(r) peaks
63        - oscillations(pars): returns the oscillation parameters for the output P(r)
64        - get_positive(pars): returns the fraction of P(r) that is above zero
65        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
66    """
67    ## Chisqr of the last computation
68    chi2  = 0
69    ## Time elapsed for last computation
70    elapsed = 0
71    ## Alpha to get the reg term the same size as the signal
72    suggested_alpha = 0
73    ## Last number of base functions used
74    nfunc = 10
75    ## Last output values
76    out = None
77    ## Last errors on output values
78    cov = None
79    ## Background value
80    background = 0
81    ## Information dictionary for application use
82    info = {}
83   
84   
85    def __init__(self):
86        Cinvertor.__init__(self)
87       
88    def __setattr__(self, name, value):
89        """
90            Set the value of an attribute.
91            Access the parent class methods for
92            x, y, err, d_max, q_min, q_max and alpha
93        """
94        if   name=='x':
95            if 0.0 in value:
96                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
97            return self.set_x(value)
98        elif name=='y':
99            return self.set_y(value)
100        elif name=='err':
101            value2 = abs(value)
102            return self.set_err(value2)
103        elif name=='d_max':
104            return self.set_dmax(value)
105        elif name=='q_min':
106            if value==None:
107                return self.set_qmin(-1.0)
108            return self.set_qmin(value)
109        elif name=='q_max':
110            if value==None:
111                return self.set_qmax(-1.0)
112            return self.set_qmax(value)
113        elif name=='alpha':
114            return self.set_alpha(value)
115        elif name=='slit_height':
116            return self.set_slit_height(value)
117        elif name=='slit_width':
118            return self.set_slit_width(value)
119        elif name=='has_bck':
120            if value==True:
121                return self.set_has_bck(1)
122            elif value==False:
123                return self.set_has_bck(0)
124            else:
125                raise ValueError, "Invertor: has_bck can only be True or False"
126           
127        return Cinvertor.__setattr__(self, name, value)
128   
129    def __getattr__(self, name):
130        """
131           Return the value of an attribute
132        """
133        import numpy
134        if   name=='x':
135            out = numpy.ones(self.get_nx())
136            self.get_x(out)
137            return out
138        elif name=='y':
139            out = numpy.ones(self.get_ny())
140            self.get_y(out)
141            return out
142        elif name=='err':
143            out = numpy.ones(self.get_nerr())
144            self.get_err(out)
145            return out
146        elif name=='d_max':
147            return self.get_dmax()
148        elif name=='q_min':
149            qmin = self.get_qmin()
150            if qmin<0:
151                return None
152            return qmin
153        elif name=='q_max':
154            qmax = self.get_qmax()
155            if qmax<0:
156                return None
157            return qmax
158        elif name=='alpha':
159            return self.get_alpha()
160        elif name=='slit_height':
161            return self.get_slit_height()
162        elif name=='slit_width':
163            return self.get_slit_width()
164        elif name=='has_bck':
165            value = self.get_has_bck()
166            if value==1:
167                return True
168            else:
169                return False
170        elif name in self.__dict__:
171            return self.__dict__[name]
172        return None
173   
174    def clone(self):
175        """
176            Return a clone of this instance
177        """
178        import copy
179       
180        invertor = Invertor()
181        invertor.chi2    = self.chi2
182        invertor.elapsed = self.elapsed
183        invertor.nfunc   = self.nfunc
184        invertor.alpha   = self.alpha
185        invertor.d_max   = self.d_max
186        invertor.q_min   = self.q_min
187        invertor.q_max   = self.q_max
188       
189        invertor.x = self.x
190        invertor.y = self.y
191        invertor.err = self.err
192        invertor.has_bck = self.has_bck
193        invertor.slit_height = self.slit_height
194        invertor.slit_width  = self.slit_width
195       
196        invertor.info = copy.deepcopy(self.info)
197       
198        return invertor
199   
200    def invert(self, nfunc=10, nr=20):
201        """
202            Perform inversion to P(r)
203           
204            The problem is solved by posing the problem as  Ax = b,
205            where x is the set of coefficients we are looking for.
206           
207            Npts is the number of points.
208           
209            In the following i refers to the ith base function coefficient.
210            The matrix has its entries j in its first Npts rows set to
211                A[i][j] = (Fourier transformed base function for point j)
212               
213            We them choose a number of r-points, n_r, to evaluate the second
214            derivative of P(r) at. This is used as our regularization term.
215            For a vector r of length n_r, the following n_r rows are set to
216                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
217               
218            The vector b has its first Npts entries set to
219                b[j] = (I(q) observed for point j)
220               
221            The following n_r entries are set to zero.
222           
223            The result is found by using scipy.linalg.basic.lstsq to invert
224            the matrix and find the coefficients x.
225           
226            @param nfunc: number of base functions to use.
227            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
228            @return: c_out, c_cov - the coefficients with covariance matrix
229        """
230        # Reset the background value before proceeding
231        self.background = 0.0
232        return self.lstsq(nfunc, nr=nr)
233   
234    def iq(self, out, q):
235        """
236            Function to call to evaluate the scattering intensity
237            @param args: c-parameters, and q
238            @return: I(q)
239        """
240        return Cinvertor.iq(self, out, q)+self.background
241   
242    def invert_optimize(self, nfunc=10, nr=20):
243        """
244            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
245           
246            This probably produce more reliable results, but is much slower.
247            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
248            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
249            of P(r), d(P(r))/dr, integrated over the full range of r.
250           
251            @param nfunc: number of base functions to use.
252            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
253            @return: c_out, c_cov - the coefficients with covariance matrix
254        """
255       
256        from scipy import optimize
257        import time
258       
259        self.nfunc = nfunc
260        # First, check that the current data is valid
261        if self.is_valid()<=0:
262            raise RuntimeError, "Invertor.invert: Data array are of different length"
263       
264        p = numpy.ones(nfunc)
265        t_0 = time.time()
266        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
267       
268        # Compute chi^2
269        res = self.residuals(out)
270        chisqr = 0
271        for i in range(len(res)):
272            chisqr += res[i]
273       
274        self.chi2 = chisqr
275
276        # Store computation time
277        self.elapsed = time.time() - t_0
278       
279        return out, cov_x
280   
281    def pr_fit(self, nfunc=5):
282        """
283            This is a direct fit to a given P(r). It assumes that the y data
284            is set to some P(r) distribution that we are trying to reproduce
285            with a set of base functions.
286           
287            This method is provided as a test.
288        """
289        from scipy import optimize
290       
291        # First, check that the current data is valid
292        if self.is_valid()<=0:
293            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
294       
295        p = numpy.ones(nfunc)
296        t_0 = time.time()
297        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
298       
299        # Compute chi^2
300        res = self.pr_residuals(out)
301        chisqr = 0
302        for i in range(len(res)):
303            chisqr += res[i]
304       
305        self.chisqr = chisqr
306       
307        # Store computation time
308        self.elapsed = time.time() - t_0
309
310        return out, cov_x
311   
312    def pr_err(self, c, c_cov, r):
313        """   
314            Returns the value of P(r) for a given r, and base function
315            coefficients, with error.
316           
317            @param c: base function coefficients
318            @param c_cov: covariance matrice of the base function coefficients
319            @param r: r-value to evaluate P(r) at
320            @return: P(r)
321        """
322        return self.get_pr_err(c, c_cov, r)
323       
324    def _accept_q(self, q):
325        """
326            Check q-value against user-defined range
327        """
328        if not self.q_min==None and q<self.q_min:
329            return False
330        if not self.q_max==None and q>self.q_max:
331            return False
332        return True
333       
334    def lstsq(self, nfunc=5, nr=20):
335        """
336            The problem is solved by posing the problem as  Ax = b,
337            where x is the set of coefficients we are looking for.
338           
339            Npts is the number of points.
340           
341            In the following i refers to the ith base function coefficient.
342            The matrix has its entries j in its first Npts rows set to
343                A[i][j] = (Fourier transformed base function for point j)
344               
345            We them choose a number of r-points, n_r, to evaluate the second
346            derivative of P(r) at. This is used as our regularization term.
347            For a vector r of length n_r, the following n_r rows are set to
348                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
349               
350            The vector b has its first Npts entries set to
351                b[j] = (I(q) observed for point j)
352               
353            The following n_r entries are set to zero.
354           
355            The result is found by using scipy.linalg.basic.lstsq to invert
356            the matrix and find the coefficients x.
357           
358            @param nfunc: number of base functions to use.
359            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
360
361            If the result does not allow us to compute the covariance matrix,
362            a matrix filled with zeros will be returned.
363
364        """
365        # Note: To make sure an array is contiguous:
366        # blah = numpy.ascontiguousarray(blah_original)
367        # ... before passing it to C
368       
369        if self.is_valid()<0:
370            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
371       
372        self.nfunc = nfunc
373        # a -- An M x N matrix.
374        # b -- An M x nrhs matrix or M vector.
375        npts = len(self.x)
376        nq   = nr
377        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
378        if sqrt_alpha<0.0:
379            nq = 0
380
381        # If we need to fit the background, add a term
382        if self.has_bck==True:
383            nfunc_0 = nfunc
384            nfunc += 1
385
386        a = numpy.zeros([npts+nq, nfunc])
387        b = numpy.zeros(npts+nq)
388        err = numpy.zeros([nfunc, nfunc])
389       
390        # Construct the a matrix and b vector that represent the problem
391        t_0 = time.time()
392        self._get_matrix(nfunc, nq, a, b)
393             
394        # Perform the inversion (least square fit)
395        c, chi2, rank, n = lstsq(a, b)
396        # Sanity check
397        try:
398            float(chi2)
399        except:
400            chi2 = -1.0
401        self.chi2 = chi2
402               
403        inv_cov = numpy.zeros([nfunc,nfunc])
404        # Get the covariance matrix, defined as inv_cov = a_transposed * a
405        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
406                   
407        # Compute the reg term size for the output
408        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
409                   
410        if math.fabs(self.alpha)>0:
411            new_alpha = sum_sig/(sum_reg/self.alpha)
412        else:
413            new_alpha = 0.0
414        self.suggested_alpha = new_alpha
415       
416        try:
417            cov = numpy.linalg.pinv(inv_cov)
418            err = math.fabs(chi2/float(npts-nfunc)) * cov
419        except:
420            # We were not able to estimate the errors
421            # Return an empty error matrix
422            pass
423           
424        # Keep a copy of the last output
425        if self.has_bck==False:
426            self.background = 0
427            self.out = c
428            self.cov = err
429        else:
430            self.background = c[0]
431           
432            err_0 = numpy.zeros([nfunc, nfunc])
433            c_0 = numpy.zeros(nfunc)
434           
435            for i in range(nfunc_0):
436                c_0[i] = c[i+1]
437                for j in range(nfunc_0):
438                    err_0[i][j] = err[i+1][j+1]
439                   
440            self.out = c_0
441            self.cov = err_0
442           
443        return self.out, self.cov
444       
445    def estimate_numterms(self, isquit_func=None):
446        """
447            Returns a reasonable guess for the
448            number of terms
449            @param isquit_func: reference to thread function to call to
450                                check whether the computation needs to
451                                be stopped.
452           
453            @return: number of terms, alpha, message
454        """
455        from num_term import Num_terms
456        estimator = Num_terms(self.clone())
457        try:
458            return estimator.num_terms(isquit_func)
459        except:
460            # If we fail, estimate alpha and return the default
461            # number of terms
462            best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
463            return self.nfunc, best_alpha, "Could not estimate number of terms"
464                   
465    def estimate_alpha(self, nfunc):
466        """
467            Returns a reasonable guess for the
468            regularization constant alpha
469           
470            @param nfunc: number of terms to use in the expansion.
471            @return: alpha, message, elapsed
472           
473            where alpha is the estimate for alpha,
474            message is a message for the user,
475            elapsed is the computation time
476        """
477        import time
478        try:           
479            pr = self.clone()
480           
481            # T_0 for computation time
482            starttime = time.time()
483            elapsed = 0
484           
485            # If the current alpha is zero, try
486            # another value
487            if pr.alpha<=0:
488                pr.alpha = 0.0001
489                 
490            # Perform inversion to find the largest alpha
491            out, cov = pr.invert(nfunc)
492            elapsed = time.time()-starttime
493            initial_alpha = pr.alpha
494            initial_peaks = pr.get_peaks(out)
495   
496            # Try the inversion with the estimated alpha
497            pr.alpha = pr.suggested_alpha
498            out, cov = pr.invert(nfunc)
499   
500            npeaks = pr.get_peaks(out)
501            # if more than one peak to start with
502            # just return the estimate
503            if npeaks>1:
504                #message = "Your P(r) is not smooth, please check your inversion parameters"
505                message = None
506                return pr.suggested_alpha, message, elapsed
507            else:
508               
509                # Look at smaller values
510                # We assume that for the suggested alpha, we have 1 peak
511                # if not, send a message to change parameters
512                alpha = pr.suggested_alpha
513                best_alpha = pr.suggested_alpha
514                found = False
515                for i in range(10):
516                    pr.alpha = (0.33)**(i+1)*alpha
517                    out, cov = pr.invert(nfunc)
518                   
519                    peaks = pr.get_peaks(out)
520                    if peaks>1:
521                        found = True
522                        break
523                    best_alpha = pr.alpha
524                   
525                # If we didn't find a turning point for alpha and
526                # the initial alpha already had only one peak,
527                # just return that
528                if not found and initial_peaks==1 and initial_alpha<best_alpha:
529                    best_alpha = initial_alpha
530                   
531                # Check whether the size makes sense
532                message=''
533               
534                if not found:
535                    message = None
536                elif best_alpha>=0.5*pr.suggested_alpha:
537                    # best alpha is too big, return a
538                    # reasonable value
539                    message  = "The estimated alpha for your system is too large. "
540                    message += "Try increasing your maximum distance."
541               
542                return best_alpha, message, elapsed
543   
544        except:
545            message = "Invertor.estimate_alpha: %s" % sys.exc_value
546            return 0, message, elapsed
547   
548       
549    def to_file(self, path, npts=100):
550        """
551            Save the state to a file that will be readable
552            by SliceView.
553            @param path: path of the file to write
554            @param npts: number of P(r) points to be written
555        """
556        file = open(path, 'w')
557        file.write("#d_max=%g\n" % self.d_max)
558        file.write("#nfunc=%g\n" % self.nfunc)
559        file.write("#alpha=%g\n" % self.alpha)
560        file.write("#chi2=%g\n" % self.chi2)
561        file.write("#elapsed=%g\n" % self.elapsed)
562        file.write("#qmin=%s\n" % str(self.q_min))
563        file.write("#qmax=%s\n" % str(self.q_max))
564        file.write("#slit_height=%g\n" % self.slit_height)
565        file.write("#slit_width=%g\n" % self.slit_width)
566        file.write("#background=%g\n" % self.background)
567        if self.has_bck==True:
568            file.write("#has_bck=1\n")
569        else:
570            file.write("#has_bck=0\n")
571        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
572        if not self.out==None:
573            if len(self.out)==len(self.cov):
574                for i in range(len(self.out)):
575                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
576        file.write("<r>  <Pr>  <dPr>\n")
577        r = numpy.arange(0.0, self.d_max, self.d_max/npts)
578       
579        for r_i in r:
580            (value, err) = self.pr_err(self.out, self.cov, r_i)
581            file.write("%g  %g  %g\n" % (r_i, value, err))
582   
583        file.close()
584     
585       
586    def from_file(self, path):
587        """
588            Load the state of the Invertor from a file,
589            to be able to generate P(r) from a set of
590            parameters.
591            @param path: path of the file to load
592        """
593        import os
594        import re
595        if os.path.isfile(path):
596            try:
597                fd = open(path, 'r')
598               
599                buff    = fd.read()
600                lines   = buff.split('\n')
601                for line in lines:
602                    if line.startswith('#d_max='):
603                        toks = line.split('=')
604                        self.d_max = float(toks[1])
605                    elif line.startswith('#nfunc='):
606                        toks = line.split('=')
607                        self.nfunc = int(toks[1])
608                        self.out = numpy.zeros(self.nfunc)
609                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
610                    elif line.startswith('#alpha='):
611                        toks = line.split('=')
612                        self.alpha = float(toks[1])
613                    elif line.startswith('#chi2='):
614                        toks = line.split('=')
615                        self.chi2 = float(toks[1])
616                    elif line.startswith('#elapsed='):
617                        toks = line.split('=')
618                        self.elapsed = float(toks[1])
619                    elif line.startswith('#alpha_estimate='):
620                        toks = line.split('=')
621                        self.suggested_alpha = float(toks[1])
622                    elif line.startswith('#qmin='):
623                        toks = line.split('=')
624                        try:
625                            self.q_min = float(toks[1])
626                        except:
627                            self.q_min = None
628                    elif line.startswith('#qmax='):
629                        toks = line.split('=')
630                        try:
631                            self.q_max = float(toks[1])
632                        except:
633                            self.q_max = None
634                    elif line.startswith('#slit_height='):
635                        toks = line.split('=')
636                        self.slit_height = float(toks[1])
637                    elif line.startswith('#slit_width='):
638                        toks = line.split('=')
639                        self.slit_width = float(toks[1])
640                    elif line.startswith('#background='):
641                        toks = line.split('=')
642                        self.background = float(toks[1])
643                    elif line.startswith('#has_bck='):
644                        toks = line.split('=')
645                        if int(toks[1])==1:
646                            self.has_bck=True
647                        else:
648                            self.has_bck=False
649           
650                    # Now read in the parameters
651                    elif line.startswith('#C_'):
652                        toks = line.split('=')
653                        p = re.compile('#C_([0-9]+)')
654                        m = p.search(toks[0])
655                        toks2 = toks[1].split('+-')
656                        i = int(m.group(1))
657                        self.out[i] = float(toks2[0])
658                       
659                        self.cov[i][i] = float(toks2[1])                       
660           
661            except:
662                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
663        else:
664            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
665       
666       
667   
668   
669if __name__ == "__main__":
670    o = Invertor()
671
672   
673   
674   
675   
Note: See TracBrowser for help on using the repository browser.