source: sasview/pr_inversion/invertor.py @ a4bd2ac

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since a4bd2ac was 9a23253e, checked in by Mathieu Doucet <doucetm@…>, 16 years ago

Started to add slit smearing. Added Rg and I(q=0) as outputs.

  • Property mode set to 100644
File size: 25.6 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8
9def help():
10    """
11        Provide general online help text
12        Future work: extend this function to allow topic selection
13    """
14    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
17    info_txt += "in the expansion is found by performing a least square fit with the "
18    info_txt += "following fit function:\n\n"
19    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
20    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22    info_txt += "The Reg_term term is a regularization term set to the second derivative "
23    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24    info_txt += "The following are user inputs:\n\n"
25    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
27    info_txt += "the regularization term.\n\n"
28    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
29     
30    return info_txt
31   
32
33class Invertor(Cinvertor):
34    """
35        Invertor class to perform P(r) inversion
36       
37        The problem is solved by posing the problem as  Ax = b,
38        where x is the set of coefficients we are looking for.
39       
40        Npts is the number of points.
41       
42        In the following i refers to the ith base function coefficient.
43        The matrix has its entries j in its first Npts rows set to
44            A[j][i] = (Fourier transformed base function for point j)
45           
46        We them choose a number of r-points, n_r, to evaluate the second
47        derivative of P(r) at. This is used as our regularization term.
48        For a vector r of length n_r, the following n_r rows are set to
49            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
50           
51        The vector b has its first Npts entries set to
52            b[j] = (I(q) observed for point j)
53           
54        The following n_r entries are set to zero.
55       
56        The result is found by using scipy.linalg.basic.lstsq to invert
57        the matrix and find the coefficients x.
58       
59        Methods inherited from Cinvertor:
60        - get_peaks(pars): returns the number of P(r) peaks
61        - oscillations(pars): returns the oscillation parameters for the output P(r)
62        - get_positive(pars): returns the fraction of P(r) that is above zero
63        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64    """
65    #TODO: Allow for slit smearing. Smear each base function once before filling
66    # the A matrix.
67   
68    ## Chisqr of the last computation
69    chi2  = 0
70    ## Time elapsed for last computation
71    elapsed = 0
72    ## Alpha to get the reg term the same size as the signal
73    suggested_alpha = 0
74    ## Last number of base functions used
75    nfunc = 0
76    ## Last output values
77    out = None
78    ## Last errors on output values
79    cov = None
80    ## Flag to allow I(q) data with constant background
81    #has_bck = False
82    ## Background value
83    background = 0
84   
85   
86    def __init__(self):
87        Cinvertor.__init__(self)
88       
89    def __setattr__(self, name, value):
90        """
91            Set the value of an attribute.
92            Access the parent class methods for
93            x, y, err, d_max, q_min, q_max and alpha
94        """
95        if   name=='x':
96            if 0.0 in value:
97                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
98            return self.set_x(value)
99        elif name=='y':
100            return self.set_y(value)
101        elif name=='err':
102            value2 = abs(value)
103            return self.set_err(value2)
104        elif name=='d_max':
105            return self.set_dmax(value)
106        elif name=='q_min':
107            if value==None:
108                return self.set_qmin(-1.0)
109            return self.set_qmin(value)
110        elif name=='q_max':
111            if value==None:
112                return self.set_qmax(-1.0)
113            return self.set_qmax(value)
114        elif name=='alpha':
115            return self.set_alpha(value)
116        elif name=='slit_height':
117            return self.set_slit_height(value)
118        elif name=='slit_width':
119            return self.set_slit_width(value)
120        elif name=='has_bck':
121            if value==True:
122                return self.set_has_bck(1)
123            elif value==False:
124                return self.set_has_bck(0)
125            else:
126                raise ValueError, "Invertor: has_bck can only be True or False"
127           
128        return Cinvertor.__setattr__(self, name, value)
129   
130    def __getattr__(self, name):
131        """
132           Return the value of an attribute
133           For the moment x, y, err and d_max are write-only
134           TODO: change that!
135        """
136        import numpy
137        if   name=='x':
138            out = numpy.ones(self.get_nx())
139            self.get_x(out)
140            return out
141        elif name=='y':
142            out = numpy.ones(self.get_ny())
143            self.get_y(out)
144            return out
145        elif name=='err':
146            out = numpy.ones(self.get_nerr())
147            self.get_err(out)
148            return out
149        elif name=='d_max':
150            return self.get_dmax()
151        elif name=='q_min':
152            qmin = self.get_qmin()
153            if qmin<0:
154                return None
155            return qmin
156        elif name=='q_max':
157            qmax = self.get_qmax()
158            if qmax<0:
159                return None
160            return qmax
161        elif name=='alpha':
162            return self.get_alpha()
163        elif name=='slit_height':
164            return self.get_slit_height()
165        elif name=='slit_width':
166            return self.get_slit_width()
167        elif name=='has_bck':
168            value = self.get_has_bck()
169            if value==1:
170                return True
171            else:
172                return False
173        elif name in self.__dict__:
174            return self.__dict__[name]
175        return None
176   
177    def clone(self):
178        """
179            Return a clone of this instance
180        """
181        invertor = Invertor()
182        invertor.chi2    = self.chi2
183        invertor.elapsed = self.elapsed
184        invertor.alpha   = self.alpha
185        invertor.d_max   = self.d_max
186        invertor.q_min   = self.q_min
187        invertor.q_max   = self.q_max
188       
189        invertor.x = self.x
190        invertor.y = self.y
191        invertor.err = self.err
192        invertor.has_bck = self.has_bck
193       
194        return invertor
195   
196    def invert(self, nfunc=10, nr=20):
197        """
198            Perform inversion to P(r)
199           
200            The problem is solved by posing the problem as  Ax = b,
201            where x is the set of coefficients we are looking for.
202           
203            Npts is the number of points.
204           
205            In the following i refers to the ith base function coefficient.
206            The matrix has its entries j in its first Npts rows set to
207                A[i][j] = (Fourier transformed base function for point j)
208               
209            We them choose a number of r-points, n_r, to evaluate the second
210            derivative of P(r) at. This is used as our regularization term.
211            For a vector r of length n_r, the following n_r rows are set to
212                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
213               
214            The vector b has its first Npts entries set to
215                b[j] = (I(q) observed for point j)
216               
217            The following n_r entries are set to zero.
218           
219            The result is found by using scipy.linalg.basic.lstsq to invert
220            the matrix and find the coefficients x.
221           
222            @param nfunc: number of base functions to use.
223            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
224            @return: c_out, c_cov - the coefficients with covariance matrix
225        """
226        # Reset the background value before proceeding
227        self.background = 0.0
228        return self.lstsq(nfunc, nr=nr)
229   
230    def iq(self, out, q):
231        """
232            Function to call to evaluate the scattering intensity
233            @param args: c-parameters, and q
234            @return: I(q)
235        """
236        return Cinvertor.iq(self, out, q)+self.background
237   
238    def invert_optimize(self, nfunc=10, nr=20):
239        """
240            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
241           
242            This probably produce more reliable results, but is much slower.
243            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
244            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
245            of P(r), d(P(r))/dr, integrated over the full range of r.
246           
247            @param nfunc: number of base functions to use.
248            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
249            @return: c_out, c_cov - the coefficients with covariance matrix
250        """
251       
252        from scipy import optimize
253        import time
254       
255        self.nfunc = nfunc
256        # First, check that the current data is valid
257        if self.is_valid()<=0:
258            raise RuntimeError, "Invertor.invert: Data array are of different length"
259       
260        p = numpy.ones(nfunc)
261        t_0 = time.time()
262        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
263       
264        # Compute chi^2
265        res = self.residuals(out)
266        chisqr = 0
267        for i in range(len(res)):
268            chisqr += res[i]
269       
270        self.chi2 = chisqr
271
272        # Store computation time
273        self.elapsed = time.time() - t_0
274       
275        return out, cov_x
276   
277    def pr_fit(self, nfunc=5):
278        """
279            This is a direct fit to a given P(r). It assumes that the y data
280            is set to some P(r) distribution that we are trying to reproduce
281            with a set of base functions.
282           
283            This method is provided as a test.
284        """
285        from scipy import optimize
286       
287        # First, check that the current data is valid
288        if self.is_valid()<=0:
289            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
290       
291        p = numpy.ones(nfunc)
292        t_0 = time.time()
293        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
294       
295        # Compute chi^2
296        res = self.pr_residuals(out)
297        chisqr = 0
298        for i in range(len(res)):
299            chisqr += res[i]
300       
301        self.chisqr = chisqr
302       
303        # Store computation time
304        self.elapsed = time.time() - t_0
305
306        return out, cov_x
307   
308    def pr_err(self, c, c_cov, r):
309        """   
310            Returns the value of P(r) for a given r, and base function
311            coefficients, with error.
312           
313            @param c: base function coefficients
314            @param c_cov: covariance matrice of the base function coefficients
315            @param r: r-value to evaluate P(r) at
316            @return: P(r)
317        """
318        return self.get_pr_err(c, c_cov, r)
319       
320    def _accept_q(self, q):
321        """
322            Check q-value against user-defined range
323        """
324        if not self.q_min==None and q<self.q_min:
325            return False
326        if not self.q_max==None and q>self.q_max:
327            return False
328        return True
329       
330    def lstsq(self, nfunc=5, nr=20):
331        #TODO: do this on the C side
332        #
333        # To make sure an array is contiguous:
334        # blah = numpy.ascontiguousarray(blah_original)
335        # ... before passing it to C
336        """
337            The problem is solved by posing the problem as  Ax = b,
338            where x is the set of coefficients we are looking for.
339           
340            Npts is the number of points.
341           
342            In the following i refers to the ith base function coefficient.
343            The matrix has its entries j in its first Npts rows set to
344                A[i][j] = (Fourier transformed base function for point j)
345               
346            We them choose a number of r-points, n_r, to evaluate the second
347            derivative of P(r) at. This is used as our regularization term.
348            For a vector r of length n_r, the following n_r rows are set to
349                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
350               
351            The vector b has its first Npts entries set to
352                b[j] = (I(q) observed for point j)
353               
354            The following n_r entries are set to zero.
355           
356            The result is found by using scipy.linalg.basic.lstsq to invert
357            the matrix and find the coefficients x.
358           
359            @param nfunc: number of base functions to use.
360            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
361
362            If the result does not allow us to compute the covariance matrix,
363            a matrix filled with zeros will be returned.
364
365        """
366        #TODO: Allow for background by starting at n=0 (since the base function
367        # is zero for n=0).
368        import math, time
369        from scipy.linalg.basic import lstsq
370       
371        if self.is_valid()<0:
372            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
373       
374        self.nfunc = nfunc
375        # a -- An M x N matrix.
376        # b -- An M x nrhs matrix or M vector.
377        npts = len(self.x)
378        nq   = nr
379        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
380        if sqrt_alpha<0.0:
381            nq = 0
382
383        # If we need to fit the background, add a term
384        if self.has_bck==True:
385            nfunc_0 = nfunc
386            nfunc += 1
387
388        a = numpy.zeros([npts+nq, nfunc])
389        b = numpy.zeros(npts+nq)
390        err = numpy.zeros([nfunc, nfunc])
391       
392        # Construct the a matrix and b vector that represent the problem
393        self._get_matrix(nfunc, nq, a, b)
394             
395        # Perform the inversion (least square fit)
396        c, chi2, rank, n = lstsq(a, b)
397        # Sanity check
398        try:
399            float(chi2)
400        except:
401            chi2 = -1.0
402        self.chi2 = chi2
403               
404        inv_cov = numpy.zeros([nfunc,nfunc])
405        # Get the covariance matrix, defined as inv_cov = a_transposed * a
406        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
407                   
408        # Compute the reg term size for the output
409        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
410                   
411        if math.fabs(self.alpha)>0:
412            new_alpha = sum_sig/(sum_reg/self.alpha)
413        else:
414            new_alpha = 0.0
415        self.suggested_alpha = new_alpha
416       
417        try:
418            cov = numpy.linalg.pinv(inv_cov)
419            err = math.fabs(chi2/float(npts-nfunc)) * cov
420        except:
421            # We were not able to estimate the errors,
422            # returns an empty covariance matrix
423            print "lstsq:", sys.exc_value
424            print chi2
425            pass
426           
427        # Keep a copy of the last output
428        if self.has_bck==False:
429            self.background = 0
430            self.out = c
431            self.cov = err
432        else:
433            self.background = c[0]
434           
435            err_0 = numpy.zeros([nfunc, nfunc])
436            c_0 = numpy.zeros(nfunc)
437           
438            for i in range(nfunc_0):
439                c_0[i] = c[i+1]
440                for j in range(nfunc_0):
441                    err_0[i][j] = err[i+1][j+1]
442                   
443            self.out = c_0
444            self.cov = err_0
445           
446        return self.out, self.cov
447       
448    def lstsq_bck(self, nfunc=5, nr=20):
449        #TODO: Allow for background by starting at n=0 (since the base function
450        # is zero for n=0).
451        import math
452        from scipy.linalg.basic import lstsq
453       
454        if self.is_valid()<0:
455            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
456       
457        self.nfunc = nfunc
458        # a -- An M x N matrix.
459        # b -- An M x nrhs matrix or M vector.
460        npts = len(self.x)
461        nq   = nr
462        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
463        if sqrt_alpha<0.0:
464            nq = 0
465       
466        err_0 = numpy.zeros([nfunc, nfunc])
467        c_0 = numpy.zeros(nfunc)
468        nfunc_0 = nfunc
469        nfunc += 1
470       
471        a = numpy.zeros([npts+nq, nfunc])
472        b = numpy.zeros(npts+nq)
473        err = numpy.zeros([nfunc, nfunc])
474       
475        # Construct the a matrix and b vector that represent the problem
476        self._get_matrix(nfunc, nq, a, b)
477           
478        c, chi2, rank, n = lstsq(a, b)
479        # Sanity check
480        try:
481            float(chi2)
482        except:
483            chi2 = -1.0
484        self.chi2 = chi2
485
486        inv_cov = numpy.zeros([nfunc,nfunc])
487        # Get the covariance matrix, defined as inv_cov = a_transposed * a
488        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
489                   
490        # Compute the reg term size for the output
491        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
492       
493        if math.fabs(self.alpha)>0:
494            new_alpha = sum_sig/(sum_reg/self.alpha)
495        else:
496            new_alpha = 0.0
497        self.suggested_alpha = new_alpha
498       
499        try:
500            cov = numpy.linalg.pinv(inv_cov)
501            err = math.fabs(chi2/float(npts-nfunc)) * cov
502        except:
503            # We were not able to estimate the errors,
504            # returns an empty covariance matrix
505            print "lstsq:", sys.exc_value
506            print chi2
507            pass
508           
509        # Keep a copy of the last output
510       
511        print "BACKGROUND =", c[0]
512        self.background = c[0]
513       
514        for i in range(nfunc_0):
515            c_0[i] = c[i+1]
516            for j in range(nfunc_0):
517                err_0[i][j] = err[i+1][j+1]
518               
519        self.out = c_0
520        self.cov = err_0
521       
522        return c_0, err_0
523       
524    def estimate_alpha(self, nfunc):
525        """
526            Returns a reasonable guess for the
527            regularization constant alpha
528           
529            @return: alpha, message, elapsed
530           
531            where alpha is the estimate for alpha,
532            message is a message for the user,
533            elapsed is the computation time
534        """
535        import time
536        try:           
537            pr = self.clone()
538           
539            # T_0 for computation time
540            starttime = time.time()
541            elapsed = 0
542           
543            # If the current alpha is zero, try
544            # another value
545            if pr.alpha<=0:
546                pr.alpha = 0.0001
547                 
548            # Perform inversion to find the largest alpha
549            out, cov = pr.invert(nfunc)
550            elapsed = time.time()-starttime
551            initial_alpha = pr.alpha
552            initial_peaks = pr.get_peaks(out)
553   
554            # Try the inversion with the estimated alpha
555            pr.alpha = pr.suggested_alpha
556            out, cov = pr.invert(nfunc)
557   
558            npeaks = pr.get_peaks(out)
559            # if more than one peak to start with
560            # just return the estimate
561            if npeaks>1:
562                message = "Your P(r) is not smooth, please check your inversion parameters"
563                return pr.suggested_alpha, message, elapsed
564            else:
565               
566                # Look at smaller values
567                # We assume that for the suggested alpha, we have 1 peak
568                # if not, send a message to change parameters
569                alpha = pr.suggested_alpha
570                best_alpha = pr.suggested_alpha
571                found = False
572                for i in range(10):
573                    pr.alpha = (0.33)**(i+1)*alpha
574                    out, cov = pr.invert(nfunc)
575                   
576                    peaks = pr.get_peaks(out)
577                    if peaks>1:
578                        found = True
579                        break
580                    best_alpha = pr.alpha
581                   
582                # If we didn't find a turning point for alpha and
583                # the initial alpha already had only one peak,
584                # just return that
585                if not found and initial_peaks==1 and initial_alpha<best_alpha:
586                    best_alpha = initial_alpha
587                   
588                # Check whether the size makes sense
589                message=''
590               
591                if not found:
592                    message = "None"
593                elif best_alpha>=0.5*pr.suggested_alpha:
594                    # best alpha is too big, return a
595                    # reasonable value
596                    message  = "The estimated alpha for your system is too large. "
597                    message += "Try increasing your maximum distance."
598               
599                return best_alpha, message, elapsed
600   
601        except:
602            message = "Invertor.estimate_alpha: %s" % sys.exc_value
603            return 0, message, elapsed
604   
605       
606    def to_file(self, path, npts=100):
607        """
608            Save the state to a file that will be readable
609            by SliceView.
610            @param path: path of the file to write
611            @param npts: number of P(r) points to be written
612        """
613        import pylab
614       
615        file = open(path, 'w')
616        file.write("#d_max=%g\n" % self.d_max)
617        file.write("#nfunc=%g\n" % self.nfunc)
618        file.write("#alpha=%g\n" % self.alpha)
619        file.write("#chi2=%g\n" % self.chi2)
620        file.write("#elapsed=%g\n" % self.elapsed)
621        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
622        if not self.out==None:
623            if len(self.out)==len(self.cov):
624                for i in range(len(self.out)):
625                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
626        file.write("<r>  <Pr>  <dPr>\n")
627        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
628       
629        for r_i in r:
630            (value, err) = self.pr_err(self.out, self.cov, r_i)
631            file.write("%g  %g  %g\n" % (r_i, value, err))
632   
633        file.close()
634     
635       
636    def from_file(self, path):
637        """
638            Load the state of the Invertor from a file,
639            to be able to generate P(r) from a set of
640            parameters.
641            @param path: path of the file to load
642        """
643        import os
644        import re
645        if os.path.isfile(path):
646            try:
647                fd = open(path, 'r')
648               
649                buff    = fd.read()
650                lines   = buff.split('\n')
651                for line in lines:
652                    if line.startswith('#d_max='):
653                        toks = line.split('=')
654                        self.d_max = float(toks[1])
655                    elif line.startswith('#nfunc='):
656                        toks = line.split('=')
657                        self.nfunc = int(toks[1])
658                        self.out = numpy.zeros(self.nfunc)
659                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
660                    elif line.startswith('#alpha='):
661                        toks = line.split('=')
662                        self.alpha = float(toks[1])
663                    elif line.startswith('#chi2='):
664                        toks = line.split('=')
665                        self.chi2 = float(toks[1])
666                    elif line.startswith('#elapsed='):
667                        toks = line.split('=')
668                        self.elapsed = float(toks[1])
669                    elif line.startswith('#alpha_estimate='):
670                        toks = line.split('=')
671                        self.suggested_alpha = float(toks[1])
672           
673                    # Now read in the parameters
674                    elif line.startswith('#C_'):
675                        toks = line.split('=')
676                        p = re.compile('#C_([0-9]+)')
677                        m = p.search(toks[0])
678                        toks2 = toks[1].split('+-')
679                        i = int(m.group(1))
680                        self.out[i] = float(toks2[0])
681                       
682                        self.cov[i][i] = float(toks2[1])                       
683           
684            except:
685                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
686        else:
687            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
688       
689       
690   
691   
692if __name__ == "__main__":
693    o = Invertor()
694
695   
696   
697   
698   
Note: See TracBrowser for help on using the repository browser.