source: sasview/pr_inversion/invertor.py @ 9e85792

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 9e85792 was f168d02, checked in by Mathieu Doucet <doucetm@…>, 16 years ago

Added slit smearing (still slow)

  • Property mode set to 100644
File size: 26.4 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8
9def help():
10    """
11        Provide general online help text
12        Future work: extend this function to allow topic selection
13    """
14    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
17    info_txt += "in the expansion is found by performing a least square fit with the "
18    info_txt += "following fit function:\n\n"
19    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
20    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22    info_txt += "The Reg_term term is a regularization term set to the second derivative "
23    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24    info_txt += "The following are user inputs:\n\n"
25    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
27    info_txt += "the regularization term.\n\n"
28    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
29     
30    return info_txt
31   
32
33class Invertor(Cinvertor):
34    """
35        Invertor class to perform P(r) inversion
36       
37        The problem is solved by posing the problem as  Ax = b,
38        where x is the set of coefficients we are looking for.
39       
40        Npts is the number of points.
41       
42        In the following i refers to the ith base function coefficient.
43        The matrix has its entries j in its first Npts rows set to
44            A[j][i] = (Fourier transformed base function for point j)
45           
46        We them choose a number of r-points, n_r, to evaluate the second
47        derivative of P(r) at. This is used as our regularization term.
48        For a vector r of length n_r, the following n_r rows are set to
49            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
50           
51        The vector b has its first Npts entries set to
52            b[j] = (I(q) observed for point j)
53           
54        The following n_r entries are set to zero.
55       
56        The result is found by using scipy.linalg.basic.lstsq to invert
57        the matrix and find the coefficients x.
58       
59        Methods inherited from Cinvertor:
60        - get_peaks(pars): returns the number of P(r) peaks
61        - oscillations(pars): returns the oscillation parameters for the output P(r)
62        - get_positive(pars): returns the fraction of P(r) that is above zero
63        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64    """
65    #TODO: Allow for slit smearing. Smear each base function once before filling
66    # the A matrix.
67   
68    ## Chisqr of the last computation
69    chi2  = 0
70    ## Time elapsed for last computation
71    elapsed = 0
72    ## Alpha to get the reg term the same size as the signal
73    suggested_alpha = 0
74    ## Last number of base functions used
75    nfunc = 10
76    ## Last output values
77    out = None
78    ## Last errors on output values
79    cov = None
80    ## Flag to allow I(q) data with constant background
81    #has_bck = False
82    ## Background value
83    background = 0
84   
85   
86    def __init__(self):
87        Cinvertor.__init__(self)
88       
89    def __setattr__(self, name, value):
90        """
91            Set the value of an attribute.
92            Access the parent class methods for
93            x, y, err, d_max, q_min, q_max and alpha
94        """
95        if   name=='x':
96            if 0.0 in value:
97                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
98            return self.set_x(value)
99        elif name=='y':
100            return self.set_y(value)
101        elif name=='err':
102            value2 = abs(value)
103            return self.set_err(value2)
104        elif name=='d_max':
105            return self.set_dmax(value)
106        elif name=='q_min':
107            if value==None:
108                return self.set_qmin(-1.0)
109            return self.set_qmin(value)
110        elif name=='q_max':
111            if value==None:
112                return self.set_qmax(-1.0)
113            return self.set_qmax(value)
114        elif name=='alpha':
115            return self.set_alpha(value)
116        elif name=='slit_height':
117            return self.set_slit_height(value)
118        elif name=='slit_width':
119            return self.set_slit_width(value)
120        elif name=='has_bck':
121            if value==True:
122                return self.set_has_bck(1)
123            elif value==False:
124                return self.set_has_bck(0)
125            else:
126                raise ValueError, "Invertor: has_bck can only be True or False"
127           
128        return Cinvertor.__setattr__(self, name, value)
129   
130    def __getattr__(self, name):
131        """
132           Return the value of an attribute
133           For the moment x, y, err and d_max are write-only
134           TODO: change that!
135        """
136        import numpy
137        if   name=='x':
138            out = numpy.ones(self.get_nx())
139            self.get_x(out)
140            return out
141        elif name=='y':
142            out = numpy.ones(self.get_ny())
143            self.get_y(out)
144            return out
145        elif name=='err':
146            out = numpy.ones(self.get_nerr())
147            self.get_err(out)
148            return out
149        elif name=='d_max':
150            return self.get_dmax()
151        elif name=='q_min':
152            qmin = self.get_qmin()
153            if qmin<0:
154                return None
155            return qmin
156        elif name=='q_max':
157            qmax = self.get_qmax()
158            if qmax<0:
159                return None
160            return qmax
161        elif name=='alpha':
162            return self.get_alpha()
163        elif name=='slit_height':
164            return self.get_slit_height()
165        elif name=='slit_width':
166            return self.get_slit_width()
167        elif name=='has_bck':
168            value = self.get_has_bck()
169            if value==1:
170                return True
171            else:
172                return False
173        elif name in self.__dict__:
174            return self.__dict__[name]
175        return None
176   
177    def clone(self):
178        """
179            Return a clone of this instance
180        """
181        invertor = Invertor()
182        invertor.chi2    = self.chi2
183        invertor.elapsed = self.elapsed
184        invertor.alpha   = self.alpha
185        invertor.d_max   = self.d_max
186        invertor.q_min   = self.q_min
187        invertor.q_max   = self.q_max
188       
189        invertor.x = self.x
190        invertor.y = self.y
191        invertor.err = self.err
192        invertor.has_bck = self.has_bck
193        invertor.slit_height = self.slit_height
194        invertor.slit_width  = self.slit_width
195       
196        return invertor
197   
198    def invert(self, nfunc=10, nr=20):
199        """
200            Perform inversion to P(r)
201           
202            The problem is solved by posing the problem as  Ax = b,
203            where x is the set of coefficients we are looking for.
204           
205            Npts is the number of points.
206           
207            In the following i refers to the ith base function coefficient.
208            The matrix has its entries j in its first Npts rows set to
209                A[i][j] = (Fourier transformed base function for point j)
210               
211            We them choose a number of r-points, n_r, to evaluate the second
212            derivative of P(r) at. This is used as our regularization term.
213            For a vector r of length n_r, the following n_r rows are set to
214                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
215               
216            The vector b has its first Npts entries set to
217                b[j] = (I(q) observed for point j)
218               
219            The following n_r entries are set to zero.
220           
221            The result is found by using scipy.linalg.basic.lstsq to invert
222            the matrix and find the coefficients x.
223           
224            @param nfunc: number of base functions to use.
225            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
226            @return: c_out, c_cov - the coefficients with covariance matrix
227        """
228        # Reset the background value before proceeding
229        self.background = 0.0
230        return self.lstsq(nfunc, nr=nr)
231   
232    def iq(self, out, q):
233        """
234            Function to call to evaluate the scattering intensity
235            @param args: c-parameters, and q
236            @return: I(q)
237        """
238        return Cinvertor.iq(self, out, q)+self.background
239   
240    def invert_optimize(self, nfunc=10, nr=20):
241        """
242            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
243           
244            This probably produce more reliable results, but is much slower.
245            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
246            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
247            of P(r), d(P(r))/dr, integrated over the full range of r.
248           
249            @param nfunc: number of base functions to use.
250            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
251            @return: c_out, c_cov - the coefficients with covariance matrix
252        """
253       
254        from scipy import optimize
255        import time
256       
257        self.nfunc = nfunc
258        # First, check that the current data is valid
259        if self.is_valid()<=0:
260            raise RuntimeError, "Invertor.invert: Data array are of different length"
261       
262        p = numpy.ones(nfunc)
263        t_0 = time.time()
264        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
265       
266        # Compute chi^2
267        res = self.residuals(out)
268        chisqr = 0
269        for i in range(len(res)):
270            chisqr += res[i]
271       
272        self.chi2 = chisqr
273
274        # Store computation time
275        self.elapsed = time.time() - t_0
276       
277        return out, cov_x
278   
279    def pr_fit(self, nfunc=5):
280        """
281            This is a direct fit to a given P(r). It assumes that the y data
282            is set to some P(r) distribution that we are trying to reproduce
283            with a set of base functions.
284           
285            This method is provided as a test.
286        """
287        from scipy import optimize
288       
289        # First, check that the current data is valid
290        if self.is_valid()<=0:
291            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
292       
293        p = numpy.ones(nfunc)
294        t_0 = time.time()
295        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
296       
297        # Compute chi^2
298        res = self.pr_residuals(out)
299        chisqr = 0
300        for i in range(len(res)):
301            chisqr += res[i]
302       
303        self.chisqr = chisqr
304       
305        # Store computation time
306        self.elapsed = time.time() - t_0
307
308        return out, cov_x
309   
310    def pr_err(self, c, c_cov, r):
311        """   
312            Returns the value of P(r) for a given r, and base function
313            coefficients, with error.
314           
315            @param c: base function coefficients
316            @param c_cov: covariance matrice of the base function coefficients
317            @param r: r-value to evaluate P(r) at
318            @return: P(r)
319        """
320        return self.get_pr_err(c, c_cov, r)
321       
322    def _accept_q(self, q):
323        """
324            Check q-value against user-defined range
325        """
326        if not self.q_min==None and q<self.q_min:
327            return False
328        if not self.q_max==None and q>self.q_max:
329            return False
330        return True
331       
332    def lstsq(self, nfunc=5, nr=20):
333        #TODO: do this on the C side
334        #
335        # To make sure an array is contiguous:
336        # blah = numpy.ascontiguousarray(blah_original)
337        # ... before passing it to C
338        """
339            The problem is solved by posing the problem as  Ax = b,
340            where x is the set of coefficients we are looking for.
341           
342            Npts is the number of points.
343           
344            In the following i refers to the ith base function coefficient.
345            The matrix has its entries j in its first Npts rows set to
346                A[i][j] = (Fourier transformed base function for point j)
347               
348            We them choose a number of r-points, n_r, to evaluate the second
349            derivative of P(r) at. This is used as our regularization term.
350            For a vector r of length n_r, the following n_r rows are set to
351                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
352               
353            The vector b has its first Npts entries set to
354                b[j] = (I(q) observed for point j)
355               
356            The following n_r entries are set to zero.
357           
358            The result is found by using scipy.linalg.basic.lstsq to invert
359            the matrix and find the coefficients x.
360           
361            @param nfunc: number of base functions to use.
362            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
363
364            If the result does not allow us to compute the covariance matrix,
365            a matrix filled with zeros will be returned.
366
367        """
368        #TODO: Allow for background by starting at n=0 (since the base function
369        # is zero for n=0).
370        import math, time
371        from scipy.linalg.basic import lstsq
372       
373        if self.is_valid()<0:
374            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
375       
376        self.nfunc = nfunc
377        # a -- An M x N matrix.
378        # b -- An M x nrhs matrix or M vector.
379        npts = len(self.x)
380        nq   = nr
381        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
382        if sqrt_alpha<0.0:
383            nq = 0
384
385        # If we need to fit the background, add a term
386        if self.has_bck==True:
387            nfunc_0 = nfunc
388            nfunc += 1
389
390        a = numpy.zeros([npts+nq, nfunc])
391        b = numpy.zeros(npts+nq)
392        err = numpy.zeros([nfunc, nfunc])
393       
394        # Construct the a matrix and b vector that represent the problem
395        t_0 = time.time()
396        self._get_matrix(nfunc, nq, a, b)
397        #print "elasped: ", time.time()-t_0
398             
399        # Perform the inversion (least square fit)
400        c, chi2, rank, n = lstsq(a, b)
401        # Sanity check
402        try:
403            float(chi2)
404        except:
405            chi2 = -1.0
406        self.chi2 = chi2
407               
408        inv_cov = numpy.zeros([nfunc,nfunc])
409        # Get the covariance matrix, defined as inv_cov = a_transposed * a
410        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
411                   
412        # Compute the reg term size for the output
413        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
414                   
415        if math.fabs(self.alpha)>0:
416            new_alpha = sum_sig/(sum_reg/self.alpha)
417        else:
418            new_alpha = 0.0
419        self.suggested_alpha = new_alpha
420       
421        try:
422            cov = numpy.linalg.pinv(inv_cov)
423            err = math.fabs(chi2/float(npts-nfunc)) * cov
424        except:
425            # We were not able to estimate the errors,
426            # returns an empty covariance matrix
427            print "lstsq:", sys.exc_value
428            print chi2
429            pass
430           
431        # Keep a copy of the last output
432        if self.has_bck==False:
433            self.background = 0
434            self.out = c
435            self.cov = err
436        else:
437            self.background = c[0]
438           
439            err_0 = numpy.zeros([nfunc, nfunc])
440            c_0 = numpy.zeros(nfunc)
441           
442            for i in range(nfunc_0):
443                c_0[i] = c[i+1]
444                for j in range(nfunc_0):
445                    err_0[i][j] = err[i+1][j+1]
446                   
447            self.out = c_0
448            self.cov = err_0
449           
450        return self.out, self.cov
451       
452    def lstsq_bck(self, nfunc=5, nr=20):
453        #TODO: Allow for background by starting at n=0 (since the base function
454        # is zero for n=0).
455        import math
456        from scipy.linalg.basic import lstsq
457       
458        if self.is_valid()<0:
459            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
460       
461        self.nfunc = nfunc
462        # a -- An M x N matrix.
463        # b -- An M x nrhs matrix or M vector.
464        npts = len(self.x)
465        nq   = nr
466        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
467        if sqrt_alpha<0.0:
468            nq = 0
469       
470        err_0 = numpy.zeros([nfunc, nfunc])
471        c_0 = numpy.zeros(nfunc)
472        nfunc_0 = nfunc
473        nfunc += 1
474       
475        a = numpy.zeros([npts+nq, nfunc])
476        b = numpy.zeros(npts+nq)
477        err = numpy.zeros([nfunc, nfunc])
478       
479        # Construct the a matrix and b vector that represent the problem
480        self._get_matrix(nfunc, nq, a, b)
481           
482        c, chi2, rank, n = lstsq(a, b)
483        # Sanity check
484        try:
485            float(chi2)
486        except:
487            chi2 = -1.0
488        self.chi2 = chi2
489
490        inv_cov = numpy.zeros([nfunc,nfunc])
491        # Get the covariance matrix, defined as inv_cov = a_transposed * a
492        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
493                   
494        # Compute the reg term size for the output
495        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
496       
497        if math.fabs(self.alpha)>0:
498            new_alpha = sum_sig/(sum_reg/self.alpha)
499        else:
500            new_alpha = 0.0
501        self.suggested_alpha = new_alpha
502       
503        try:
504            cov = numpy.linalg.pinv(inv_cov)
505            err = math.fabs(chi2/float(npts-nfunc)) * cov
506        except:
507            # We were not able to estimate the errors,
508            # returns an empty covariance matrix
509            print "lstsq:", sys.exc_value
510            print chi2
511            pass
512           
513        # Keep a copy of the last output
514       
515        print "BACKGROUND =", c[0]
516        self.background = c[0]
517       
518        for i in range(nfunc_0):
519            c_0[i] = c[i+1]
520            for j in range(nfunc_0):
521                err_0[i][j] = err[i+1][j+1]
522               
523        self.out = c_0
524        self.cov = err_0
525       
526        return c_0, err_0
527
528    def estimate_numterms(self, isquit_func=None):
529        """
530            Returns a reasonable guess for the
531            number of terms
532           
533            @return: number of terms, alpha, message
534        """
535        from num_term import Num_terms
536        estimator = Num_terms(self.clone())
537        try:
538            return estimator.num_terms(isquit_func)
539        except:
540            # If we fail, estimate alpha and return the default
541            # number of terms
542            best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
543            return self.nfunc, best_alpha, "Could not estimate number of terms"
544                   
545    def estimate_alpha(self, nfunc):
546        """
547            Returns a reasonable guess for the
548            regularization constant alpha
549           
550            @return: alpha, message, elapsed
551           
552            where alpha is the estimate for alpha,
553            message is a message for the user,
554            elapsed is the computation time
555        """
556        import time
557        try:           
558            pr = self.clone()
559           
560            # T_0 for computation time
561            starttime = time.time()
562            elapsed = 0
563           
564            # If the current alpha is zero, try
565            # another value
566            if pr.alpha<=0:
567                pr.alpha = 0.0001
568                 
569            # Perform inversion to find the largest alpha
570            out, cov = pr.invert(nfunc)
571            elapsed = time.time()-starttime
572            initial_alpha = pr.alpha
573            initial_peaks = pr.get_peaks(out)
574   
575            # Try the inversion with the estimated alpha
576            pr.alpha = pr.suggested_alpha
577            out, cov = pr.invert(nfunc)
578   
579            npeaks = pr.get_peaks(out)
580            # if more than one peak to start with
581            # just return the estimate
582            if npeaks>1:
583                #message = "Your P(r) is not smooth, please check your inversion parameters"
584                message = None
585                return pr.suggested_alpha, message, elapsed
586            else:
587               
588                # Look at smaller values
589                # We assume that for the suggested alpha, we have 1 peak
590                # if not, send a message to change parameters
591                alpha = pr.suggested_alpha
592                best_alpha = pr.suggested_alpha
593                found = False
594                for i in range(10):
595                    pr.alpha = (0.33)**(i+1)*alpha
596                    out, cov = pr.invert(nfunc)
597                   
598                    peaks = pr.get_peaks(out)
599                    if peaks>1:
600                        found = True
601                        break
602                    best_alpha = pr.alpha
603                   
604                # If we didn't find a turning point for alpha and
605                # the initial alpha already had only one peak,
606                # just return that
607                if not found and initial_peaks==1 and initial_alpha<best_alpha:
608                    best_alpha = initial_alpha
609                   
610                # Check whether the size makes sense
611                message=''
612               
613                if not found:
614                    message = "None"
615                elif best_alpha>=0.5*pr.suggested_alpha:
616                    # best alpha is too big, return a
617                    # reasonable value
618                    message  = "The estimated alpha for your system is too large. "
619                    message += "Try increasing your maximum distance."
620               
621                return best_alpha, message, elapsed
622   
623        except:
624            message = "Invertor.estimate_alpha: %s" % sys.exc_value
625            return 0, message, elapsed
626   
627       
628    def to_file(self, path, npts=100):
629        """
630            Save the state to a file that will be readable
631            by SliceView.
632            @param path: path of the file to write
633            @param npts: number of P(r) points to be written
634        """
635        import pylab
636       
637        file = open(path, 'w')
638        file.write("#d_max=%g\n" % self.d_max)
639        file.write("#nfunc=%g\n" % self.nfunc)
640        file.write("#alpha=%g\n" % self.alpha)
641        file.write("#chi2=%g\n" % self.chi2)
642        file.write("#elapsed=%g\n" % self.elapsed)
643        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
644        if not self.out==None:
645            if len(self.out)==len(self.cov):
646                for i in range(len(self.out)):
647                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
648        file.write("<r>  <Pr>  <dPr>\n")
649        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
650       
651        for r_i in r:
652            (value, err) = self.pr_err(self.out, self.cov, r_i)
653            file.write("%g  %g  %g\n" % (r_i, value, err))
654   
655        file.close()
656     
657       
658    def from_file(self, path):
659        """
660            Load the state of the Invertor from a file,
661            to be able to generate P(r) from a set of
662            parameters.
663            @param path: path of the file to load
664        """
665        import os
666        import re
667        if os.path.isfile(path):
668            try:
669                fd = open(path, 'r')
670               
671                buff    = fd.read()
672                lines   = buff.split('\n')
673                for line in lines:
674                    if line.startswith('#d_max='):
675                        toks = line.split('=')
676                        self.d_max = float(toks[1])
677                    elif line.startswith('#nfunc='):
678                        toks = line.split('=')
679                        self.nfunc = int(toks[1])
680                        self.out = numpy.zeros(self.nfunc)
681                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
682                    elif line.startswith('#alpha='):
683                        toks = line.split('=')
684                        self.alpha = float(toks[1])
685                    elif line.startswith('#chi2='):
686                        toks = line.split('=')
687                        self.chi2 = float(toks[1])
688                    elif line.startswith('#elapsed='):
689                        toks = line.split('=')
690                        self.elapsed = float(toks[1])
691                    elif line.startswith('#alpha_estimate='):
692                        toks = line.split('=')
693                        self.suggested_alpha = float(toks[1])
694           
695                    # Now read in the parameters
696                    elif line.startswith('#C_'):
697                        toks = line.split('=')
698                        p = re.compile('#C_([0-9]+)')
699                        m = p.search(toks[0])
700                        toks2 = toks[1].split('+-')
701                        i = int(m.group(1))
702                        self.out[i] = float(toks2[0])
703                       
704                        self.cov[i][i] = float(toks2[1])                       
705           
706            except:
707                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
708        else:
709            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
710       
711       
712   
713   
714if __name__ == "__main__":
715    o = Invertor()
716
717   
718   
719   
720   
Note: See TracBrowser for help on using the repository browser.