source: sasview/pr_inversion/invertor.py @ 792db7d5

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 792db7d5 was 7578961, checked in by Mathieu Doucet <doucetm@…>, 16 years ago

Small improvements

  • Property mode set to 100644
File size: 25.7 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8import math, time
9from scipy.linalg.basic import lstsq
10
11def help():
12    """
13        Provide general online help text
14        Future work: extend this function to allow topic selection
15    """
16    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
17    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
18    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
19    info_txt += "in the expansion is found by performing a least square fit with the "
20    info_txt += "following fit function:\n\n"
21    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
22    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
23    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
24    info_txt += "The Reg_term term is a regularization term set to the second derivative "
25    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
26    info_txt += "The following are user inputs:\n\n"
27    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
28    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
29    info_txt += "the regularization term.\n\n"
30    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
31     
32    return info_txt
33   
34
35class Invertor(Cinvertor):
36    """
37        Invertor class to perform P(r) inversion
38       
39        The problem is solved by posing the problem as  Ax = b,
40        where x is the set of coefficients we are looking for.
41       
42        Npts is the number of points.
43       
44        In the following i refers to the ith base function coefficient.
45        The matrix has its entries j in its first Npts rows set to
46            A[j][i] = (Fourier transformed base function for point j)
47           
48        We them choose a number of r-points, n_r, to evaluate the second
49        derivative of P(r) at. This is used as our regularization term.
50        For a vector r of length n_r, the following n_r rows are set to
51            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
52           
53        The vector b has its first Npts entries set to
54            b[j] = (I(q) observed for point j)
55           
56        The following n_r entries are set to zero.
57       
58        The result is found by using scipy.linalg.basic.lstsq to invert
59        the matrix and find the coefficients x.
60       
61        Methods inherited from Cinvertor:
62        - get_peaks(pars): returns the number of P(r) peaks
63        - oscillations(pars): returns the oscillation parameters for the output P(r)
64        - get_positive(pars): returns the fraction of P(r) that is above zero
65        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
66    """
67    #TODO: Allow for slit smearing. Smear each base function once before filling
68    # the A matrix.
69   
70    ## Chisqr of the last computation
71    chi2  = 0
72    ## Time elapsed for last computation
73    elapsed = 0
74    ## Alpha to get the reg term the same size as the signal
75    suggested_alpha = 0
76    ## Last number of base functions used
77    nfunc = 10
78    ## Last output values
79    out = None
80    ## Last errors on output values
81    cov = None
82    ## Flag to allow I(q) data with constant background
83    #has_bck = False
84    ## Background value
85    background = 0
86   
87   
88    def __init__(self):
89        Cinvertor.__init__(self)
90       
91    def __setattr__(self, name, value):
92        """
93            Set the value of an attribute.
94            Access the parent class methods for
95            x, y, err, d_max, q_min, q_max and alpha
96        """
97        if   name=='x':
98            if 0.0 in value:
99                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
100            return self.set_x(value)
101        elif name=='y':
102            return self.set_y(value)
103        elif name=='err':
104            value2 = abs(value)
105            return self.set_err(value2)
106        elif name=='d_max':
107            return self.set_dmax(value)
108        elif name=='q_min':
109            if value==None:
110                return self.set_qmin(-1.0)
111            return self.set_qmin(value)
112        elif name=='q_max':
113            if value==None:
114                return self.set_qmax(-1.0)
115            return self.set_qmax(value)
116        elif name=='alpha':
117            return self.set_alpha(value)
118        elif name=='slit_height':
119            return self.set_slit_height(value)
120        elif name=='slit_width':
121            return self.set_slit_width(value)
122        elif name=='has_bck':
123            if value==True:
124                return self.set_has_bck(1)
125            elif value==False:
126                return self.set_has_bck(0)
127            else:
128                raise ValueError, "Invertor: has_bck can only be True or False"
129           
130        return Cinvertor.__setattr__(self, name, value)
131   
132    def __getattr__(self, name):
133        """
134           Return the value of an attribute
135           For the moment x, y, err and d_max are write-only
136           TODO: change that!
137        """
138        import numpy
139        if   name=='x':
140            out = numpy.ones(self.get_nx())
141            self.get_x(out)
142            return out
143        elif name=='y':
144            out = numpy.ones(self.get_ny())
145            self.get_y(out)
146            return out
147        elif name=='err':
148            out = numpy.ones(self.get_nerr())
149            self.get_err(out)
150            return out
151        elif name=='d_max':
152            return self.get_dmax()
153        elif name=='q_min':
154            qmin = self.get_qmin()
155            if qmin<0:
156                return None
157            return qmin
158        elif name=='q_max':
159            qmax = self.get_qmax()
160            if qmax<0:
161                return None
162            return qmax
163        elif name=='alpha':
164            return self.get_alpha()
165        elif name=='slit_height':
166            return self.get_slit_height()
167        elif name=='slit_width':
168            return self.get_slit_width()
169        elif name=='has_bck':
170            value = self.get_has_bck()
171            if value==1:
172                return True
173            else:
174                return False
175        elif name in self.__dict__:
176            return self.__dict__[name]
177        return None
178   
179    def clone(self):
180        """
181            Return a clone of this instance
182        """
183        invertor = Invertor()
184        invertor.chi2    = self.chi2
185        invertor.elapsed = self.elapsed
186        invertor.alpha   = self.alpha
187        invertor.d_max   = self.d_max
188        invertor.q_min   = self.q_min
189        invertor.q_max   = self.q_max
190       
191        invertor.x = self.x
192        invertor.y = self.y
193        invertor.err = self.err
194        invertor.has_bck = self.has_bck
195        invertor.slit_height = self.slit_height
196        invertor.slit_width  = self.slit_width
197       
198        return invertor
199   
200    def invert(self, nfunc=10, nr=20):
201        """
202            Perform inversion to P(r)
203           
204            The problem is solved by posing the problem as  Ax = b,
205            where x is the set of coefficients we are looking for.
206           
207            Npts is the number of points.
208           
209            In the following i refers to the ith base function coefficient.
210            The matrix has its entries j in its first Npts rows set to
211                A[i][j] = (Fourier transformed base function for point j)
212               
213            We them choose a number of r-points, n_r, to evaluate the second
214            derivative of P(r) at. This is used as our regularization term.
215            For a vector r of length n_r, the following n_r rows are set to
216                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
217               
218            The vector b has its first Npts entries set to
219                b[j] = (I(q) observed for point j)
220               
221            The following n_r entries are set to zero.
222           
223            The result is found by using scipy.linalg.basic.lstsq to invert
224            the matrix and find the coefficients x.
225           
226            @param nfunc: number of base functions to use.
227            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
228            @return: c_out, c_cov - the coefficients with covariance matrix
229        """
230        # Reset the background value before proceeding
231        self.background = 0.0
232        return self.lstsq(nfunc, nr=nr)
233   
234    def iq(self, out, q):
235        """
236            Function to call to evaluate the scattering intensity
237            @param args: c-parameters, and q
238            @return: I(q)
239        """
240        return Cinvertor.iq(self, out, q)+self.background
241   
242    def invert_optimize(self, nfunc=10, nr=20):
243        """
244            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
245           
246            This probably produce more reliable results, but is much slower.
247            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
248            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
249            of P(r), d(P(r))/dr, integrated over the full range of r.
250           
251            @param nfunc: number of base functions to use.
252            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
253            @return: c_out, c_cov - the coefficients with covariance matrix
254        """
255       
256        from scipy import optimize
257        import time
258       
259        self.nfunc = nfunc
260        # First, check that the current data is valid
261        if self.is_valid()<=0:
262            raise RuntimeError, "Invertor.invert: Data array are of different length"
263       
264        p = numpy.ones(nfunc)
265        t_0 = time.time()
266        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
267       
268        # Compute chi^2
269        res = self.residuals(out)
270        chisqr = 0
271        for i in range(len(res)):
272            chisqr += res[i]
273       
274        self.chi2 = chisqr
275
276        # Store computation time
277        self.elapsed = time.time() - t_0
278       
279        return out, cov_x
280   
281    def pr_fit(self, nfunc=5):
282        """
283            This is a direct fit to a given P(r). It assumes that the y data
284            is set to some P(r) distribution that we are trying to reproduce
285            with a set of base functions.
286           
287            This method is provided as a test.
288        """
289        from scipy import optimize
290       
291        # First, check that the current data is valid
292        if self.is_valid()<=0:
293            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
294       
295        p = numpy.ones(nfunc)
296        t_0 = time.time()
297        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
298       
299        # Compute chi^2
300        res = self.pr_residuals(out)
301        chisqr = 0
302        for i in range(len(res)):
303            chisqr += res[i]
304       
305        self.chisqr = chisqr
306       
307        # Store computation time
308        self.elapsed = time.time() - t_0
309
310        return out, cov_x
311   
312    def pr_err(self, c, c_cov, r):
313        """   
314            Returns the value of P(r) for a given r, and base function
315            coefficients, with error.
316           
317            @param c: base function coefficients
318            @param c_cov: covariance matrice of the base function coefficients
319            @param r: r-value to evaluate P(r) at
320            @return: P(r)
321        """
322        return self.get_pr_err(c, c_cov, r)
323       
324    def _accept_q(self, q):
325        """
326            Check q-value against user-defined range
327        """
328        if not self.q_min==None and q<self.q_min:
329            return False
330        if not self.q_max==None and q>self.q_max:
331            return False
332        return True
333       
334    def lstsq(self, nfunc=5, nr=20):
335        """
336            The problem is solved by posing the problem as  Ax = b,
337            where x is the set of coefficients we are looking for.
338           
339            Npts is the number of points.
340           
341            In the following i refers to the ith base function coefficient.
342            The matrix has its entries j in its first Npts rows set to
343                A[i][j] = (Fourier transformed base function for point j)
344               
345            We them choose a number of r-points, n_r, to evaluate the second
346            derivative of P(r) at. This is used as our regularization term.
347            For a vector r of length n_r, the following n_r rows are set to
348                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
349               
350            The vector b has its first Npts entries set to
351                b[j] = (I(q) observed for point j)
352               
353            The following n_r entries are set to zero.
354           
355            The result is found by using scipy.linalg.basic.lstsq to invert
356            the matrix and find the coefficients x.
357           
358            @param nfunc: number of base functions to use.
359            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
360
361            If the result does not allow us to compute the covariance matrix,
362            a matrix filled with zeros will be returned.
363
364        """
365        # Note: To make sure an array is contiguous:
366        # blah = numpy.ascontiguousarray(blah_original)
367        # ... before passing it to C
368       
369        if self.is_valid()<0:
370            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
371       
372        self.nfunc = nfunc
373        # a -- An M x N matrix.
374        # b -- An M x nrhs matrix or M vector.
375        npts = len(self.x)
376        nq   = nr
377        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
378        if sqrt_alpha<0.0:
379            nq = 0
380
381        # If we need to fit the background, add a term
382        if self.has_bck==True:
383            nfunc_0 = nfunc
384            nfunc += 1
385
386        a = numpy.zeros([npts+nq, nfunc])
387        b = numpy.zeros(npts+nq)
388        err = numpy.zeros([nfunc, nfunc])
389       
390        # Construct the a matrix and b vector that represent the problem
391        t_0 = time.time()
392        self._get_matrix(nfunc, nq, a, b)
393             
394        # Perform the inversion (least square fit)
395        c, chi2, rank, n = lstsq(a, b)
396        # Sanity check
397        try:
398            float(chi2)
399        except:
400            chi2 = -1.0
401        self.chi2 = chi2
402               
403        inv_cov = numpy.zeros([nfunc,nfunc])
404        # Get the covariance matrix, defined as inv_cov = a_transposed * a
405        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
406                   
407        # Compute the reg term size for the output
408        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
409                   
410        if math.fabs(self.alpha)>0:
411            new_alpha = sum_sig/(sum_reg/self.alpha)
412        else:
413            new_alpha = 0.0
414        self.suggested_alpha = new_alpha
415       
416        try:
417            cov = numpy.linalg.pinv(inv_cov)
418            err = math.fabs(chi2/float(npts-nfunc)) * cov
419        except:
420            # We were not able to estimate the errors
421            # Return an empty error matrix
422            pass
423           
424        # Keep a copy of the last output
425        if self.has_bck==False:
426            self.background = 0
427            self.out = c
428            self.cov = err
429        else:
430            self.background = c[0]
431           
432            err_0 = numpy.zeros([nfunc, nfunc])
433            c_0 = numpy.zeros(nfunc)
434           
435            for i in range(nfunc_0):
436                c_0[i] = c[i+1]
437                for j in range(nfunc_0):
438                    err_0[i][j] = err[i+1][j+1]
439                   
440            self.out = c_0
441            self.cov = err_0
442           
443        return self.out, self.cov
444       
445    def estimate_numterms(self, isquit_func=None):
446        """
447            Returns a reasonable guess for the
448            number of terms
449            @param isquit_func: reference to thread function to call to
450                                check whether the computation needs to
451                                be stopped.
452           
453            @return: number of terms, alpha, message
454        """
455        from num_term import Num_terms
456        estimator = Num_terms(self.clone())
457        try:
458            return estimator.num_terms(isquit_func)
459        except:
460            # If we fail, estimate alpha and return the default
461            # number of terms
462            best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
463            return self.nfunc, best_alpha, "Could not estimate number of terms"
464                   
465    def estimate_alpha(self, nfunc):
466        """
467            Returns a reasonable guess for the
468            regularization constant alpha
469           
470            @param nfunc: number of terms to use in the expansion.
471            @return: alpha, message, elapsed
472           
473            where alpha is the estimate for alpha,
474            message is a message for the user,
475            elapsed is the computation time
476        """
477        import time
478        try:           
479            pr = self.clone()
480           
481            # T_0 for computation time
482            starttime = time.time()
483            elapsed = 0
484           
485            # If the current alpha is zero, try
486            # another value
487            if pr.alpha<=0:
488                pr.alpha = 0.0001
489                 
490            # Perform inversion to find the largest alpha
491            out, cov = pr.invert(nfunc)
492            elapsed = time.time()-starttime
493            initial_alpha = pr.alpha
494            initial_peaks = pr.get_peaks(out)
495   
496            # Try the inversion with the estimated alpha
497            pr.alpha = pr.suggested_alpha
498            out, cov = pr.invert(nfunc)
499   
500            npeaks = pr.get_peaks(out)
501            # if more than one peak to start with
502            # just return the estimate
503            if npeaks>1:
504                #message = "Your P(r) is not smooth, please check your inversion parameters"
505                message = None
506                return pr.suggested_alpha, message, elapsed
507            else:
508               
509                # Look at smaller values
510                # We assume that for the suggested alpha, we have 1 peak
511                # if not, send a message to change parameters
512                alpha = pr.suggested_alpha
513                best_alpha = pr.suggested_alpha
514                found = False
515                for i in range(10):
516                    pr.alpha = (0.33)**(i+1)*alpha
517                    out, cov = pr.invert(nfunc)
518                   
519                    peaks = pr.get_peaks(out)
520                    if peaks>1:
521                        found = True
522                        break
523                    best_alpha = pr.alpha
524                   
525                # If we didn't find a turning point for alpha and
526                # the initial alpha already had only one peak,
527                # just return that
528                if not found and initial_peaks==1 and initial_alpha<best_alpha:
529                    best_alpha = initial_alpha
530                   
531                # Check whether the size makes sense
532                message=''
533               
534                if not found:
535                    message = "None"
536                elif best_alpha>=0.5*pr.suggested_alpha:
537                    # best alpha is too big, return a
538                    # reasonable value
539                    message  = "The estimated alpha for your system is too large. "
540                    message += "Try increasing your maximum distance."
541               
542                return best_alpha, message, elapsed
543   
544        except:
545            message = "Invertor.estimate_alpha: %s" % sys.exc_value
546            return 0, message, elapsed
547   
548       
549    def to_file(self, path, npts=100):
550        """
551            Save the state to a file that will be readable
552            by SliceView.
553            @param path: path of the file to write
554            @param npts: number of P(r) points to be written
555        """
556        import pylab
557       
558        file = open(path, 'w')
559        file.write("#d_max=%g\n" % self.d_max)
560        file.write("#nfunc=%g\n" % self.nfunc)
561        file.write("#alpha=%g\n" % self.alpha)
562        file.write("#chi2=%g\n" % self.chi2)
563        file.write("#elapsed=%g\n" % self.elapsed)
564        file.write("#qmin=%s\n" % str(self.q_min))
565        file.write("#qmax=%s\n" % str(self.q_max))
566        file.write("#slit_height=%g\n" % self.slit_height)
567        file.write("#slit_width=%g\n" % self.slit_width)
568        file.write("#background=%g\n" % self.background)
569        if self.has_bck==True:
570            file.write("#has_bck=1\n")
571        else:
572            file.write("#has_bck=0\n")
573        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
574        if not self.out==None:
575            if len(self.out)==len(self.cov):
576                for i in range(len(self.out)):
577                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
578        file.write("<r>  <Pr>  <dPr>\n")
579        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
580       
581        for r_i in r:
582            (value, err) = self.pr_err(self.out, self.cov, r_i)
583            file.write("%g  %g  %g\n" % (r_i, value, err))
584   
585        file.close()
586     
587       
588    def from_file(self, path):
589        """
590            Load the state of the Invertor from a file,
591            to be able to generate P(r) from a set of
592            parameters.
593            @param path: path of the file to load
594        """
595        import os
596        import re
597        if os.path.isfile(path):
598            try:
599                fd = open(path, 'r')
600               
601                buff    = fd.read()
602                lines   = buff.split('\n')
603                for line in lines:
604                    if line.startswith('#d_max='):
605                        toks = line.split('=')
606                        self.d_max = float(toks[1])
607                    elif line.startswith('#nfunc='):
608                        toks = line.split('=')
609                        self.nfunc = int(toks[1])
610                        self.out = numpy.zeros(self.nfunc)
611                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
612                    elif line.startswith('#alpha='):
613                        toks = line.split('=')
614                        self.alpha = float(toks[1])
615                    elif line.startswith('#chi2='):
616                        toks = line.split('=')
617                        self.chi2 = float(toks[1])
618                    elif line.startswith('#elapsed='):
619                        toks = line.split('=')
620                        self.elapsed = float(toks[1])
621                    elif line.startswith('#alpha_estimate='):
622                        toks = line.split('=')
623                        self.suggested_alpha = float(toks[1])
624                    elif line.startswith('#qmin='):
625                        toks = line.split('=')
626                        try:
627                            self.q_min = float(toks[1])
628                        except:
629                            self.q_min = None
630                    elif line.startswith('#qmax='):
631                        toks = line.split('=')
632                        try:
633                            self.q_max = float(toks[1])
634                        except:
635                            self.q_max = None
636                    elif line.startswith('#slit_height='):
637                        toks = line.split('=')
638                        self.slit_height = float(toks[1])
639                    elif line.startswith('#slit_width='):
640                        toks = line.split('=')
641                        self.slit_width = float(toks[1])
642                    elif line.startswith('#background='):
643                        toks = line.split('=')
644                        self.background = float(toks[1])
645                    elif line.startswith('#has_bck='):
646                        toks = line.split('=')
647                        if int(toks[1])==1:
648                            self.has_bck=True
649                        else:
650                            self.has_bck=False
651           
652                    # Now read in the parameters
653                    elif line.startswith('#C_'):
654                        toks = line.split('=')
655                        p = re.compile('#C_([0-9]+)')
656                        m = p.search(toks[0])
657                        toks2 = toks[1].split('+-')
658                        i = int(m.group(1))
659                        self.out[i] = float(toks2[0])
660                       
661                        self.cov[i][i] = float(toks2[1])                       
662           
663            except:
664                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
665        else:
666            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
667       
668       
669   
670   
671if __name__ == "__main__":
672    o = Invertor()
673
674   
675   
676   
677   
Note: See TracBrowser for help on using the repository browser.