source: sasview/pr_inversion/invertor.py @ a17ffdf

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since a17ffdf was e96a852, checked in by Mathieu Doucet <doucetm@…>, 16 years ago

From Raiza: n_term estimator

  • Property mode set to 100644
File size: 25.9 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8
9def help():
10    """
11        Provide general online help text
12        Future work: extend this function to allow topic selection
13    """
14    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
17    info_txt += "in the expansion is found by performing a least square fit with the "
18    info_txt += "following fit function:\n\n"
19    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
20    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22    info_txt += "The Reg_term term is a regularization term set to the second derivative "
23    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24    info_txt += "The following are user inputs:\n\n"
25    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
27    info_txt += "the regularization term.\n\n"
28    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
29     
30    return info_txt
31   
32
33class Invertor(Cinvertor):
34    """
35        Invertor class to perform P(r) inversion
36       
37        The problem is solved by posing the problem as  Ax = b,
38        where x is the set of coefficients we are looking for.
39       
40        Npts is the number of points.
41       
42        In the following i refers to the ith base function coefficient.
43        The matrix has its entries j in its first Npts rows set to
44            A[j][i] = (Fourier transformed base function for point j)
45           
46        We them choose a number of r-points, n_r, to evaluate the second
47        derivative of P(r) at. This is used as our regularization term.
48        For a vector r of length n_r, the following n_r rows are set to
49            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
50           
51        The vector b has its first Npts entries set to
52            b[j] = (I(q) observed for point j)
53           
54        The following n_r entries are set to zero.
55       
56        The result is found by using scipy.linalg.basic.lstsq to invert
57        the matrix and find the coefficients x.
58       
59        Methods inherited from Cinvertor:
60        - get_peaks(pars): returns the number of P(r) peaks
61        - oscillations(pars): returns the oscillation parameters for the output P(r)
62        - get_positive(pars): returns the fraction of P(r) that is above zero
63        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64    """
65    #TODO: Allow for slit smearing. Smear each base function once before filling
66    # the A matrix.
67   
68    ## Chisqr of the last computation
69    chi2  = 0
70    ## Time elapsed for last computation
71    elapsed = 0
72    ## Alpha to get the reg term the same size as the signal
73    suggested_alpha = 0
74    ## Last number of base functions used
75    nfunc = 0
76    ## Last output values
77    out = None
78    ## Last errors on output values
79    cov = None
80    ## Flag to allow I(q) data with constant background
81    #has_bck = False
82    ## Background value
83    background = 0
84   
85   
86    def __init__(self):
87        Cinvertor.__init__(self)
88       
89    def __setattr__(self, name, value):
90        """
91            Set the value of an attribute.
92            Access the parent class methods for
93            x, y, err, d_max, q_min, q_max and alpha
94        """
95        if   name=='x':
96            if 0.0 in value:
97                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
98            return self.set_x(value)
99        elif name=='y':
100            return self.set_y(value)
101        elif name=='err':
102            value2 = abs(value)
103            return self.set_err(value2)
104        elif name=='d_max':
105            return self.set_dmax(value)
106        elif name=='q_min':
107            if value==None:
108                return self.set_qmin(-1.0)
109            return self.set_qmin(value)
110        elif name=='q_max':
111            if value==None:
112                return self.set_qmax(-1.0)
113            return self.set_qmax(value)
114        elif name=='alpha':
115            return self.set_alpha(value)
116        elif name=='slit_height':
117            return self.set_slit_height(value)
118        elif name=='slit_width':
119            return self.set_slit_width(value)
120        elif name=='has_bck':
121            if value==True:
122                return self.set_has_bck(1)
123            elif value==False:
124                return self.set_has_bck(0)
125            else:
126                raise ValueError, "Invertor: has_bck can only be True or False"
127           
128        return Cinvertor.__setattr__(self, name, value)
129   
130    def __getattr__(self, name):
131        """
132           Return the value of an attribute
133           For the moment x, y, err and d_max are write-only
134           TODO: change that!
135        """
136        import numpy
137        if   name=='x':
138            out = numpy.ones(self.get_nx())
139            self.get_x(out)
140            return out
141        elif name=='y':
142            out = numpy.ones(self.get_ny())
143            self.get_y(out)
144            return out
145        elif name=='err':
146            out = numpy.ones(self.get_nerr())
147            self.get_err(out)
148            return out
149        elif name=='d_max':
150            return self.get_dmax()
151        elif name=='q_min':
152            qmin = self.get_qmin()
153            if qmin<0:
154                return None
155            return qmin
156        elif name=='q_max':
157            qmax = self.get_qmax()
158            if qmax<0:
159                return None
160            return qmax
161        elif name=='alpha':
162            return self.get_alpha()
163        elif name=='slit_height':
164            return self.get_slit_height()
165        elif name=='slit_width':
166            return self.get_slit_width()
167        elif name=='has_bck':
168            value = self.get_has_bck()
169            if value==1:
170                return True
171            else:
172                return False
173        elif name in self.__dict__:
174            return self.__dict__[name]
175        return None
176   
177    def clone(self):
178        """
179            Return a clone of this instance
180        """
181        invertor = Invertor()
182        invertor.chi2    = self.chi2
183        invertor.elapsed = self.elapsed
184        invertor.alpha   = self.alpha
185        invertor.d_max   = self.d_max
186        invertor.q_min   = self.q_min
187        invertor.q_max   = self.q_max
188       
189        invertor.x = self.x
190        invertor.y = self.y
191        invertor.err = self.err
192        invertor.has_bck = self.has_bck
193       
194        return invertor
195   
196    def invert(self, nfunc=10, nr=20):
197        """
198            Perform inversion to P(r)
199           
200            The problem is solved by posing the problem as  Ax = b,
201            where x is the set of coefficients we are looking for.
202           
203            Npts is the number of points.
204           
205            In the following i refers to the ith base function coefficient.
206            The matrix has its entries j in its first Npts rows set to
207                A[i][j] = (Fourier transformed base function for point j)
208               
209            We them choose a number of r-points, n_r, to evaluate the second
210            derivative of P(r) at. This is used as our regularization term.
211            For a vector r of length n_r, the following n_r rows are set to
212                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
213               
214            The vector b has its first Npts entries set to
215                b[j] = (I(q) observed for point j)
216               
217            The following n_r entries are set to zero.
218           
219            The result is found by using scipy.linalg.basic.lstsq to invert
220            the matrix and find the coefficients x.
221           
222            @param nfunc: number of base functions to use.
223            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
224            @return: c_out, c_cov - the coefficients with covariance matrix
225        """
226        # Reset the background value before proceeding
227        self.background = 0.0
228        return self.lstsq(nfunc, nr=nr)
229   
230    def iq(self, out, q):
231        """
232            Function to call to evaluate the scattering intensity
233            @param args: c-parameters, and q
234            @return: I(q)
235        """
236        return Cinvertor.iq(self, out, q)+self.background
237   
238    def invert_optimize(self, nfunc=10, nr=20):
239        """
240            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
241           
242            This probably produce more reliable results, but is much slower.
243            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
244            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
245            of P(r), d(P(r))/dr, integrated over the full range of r.
246           
247            @param nfunc: number of base functions to use.
248            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
249            @return: c_out, c_cov - the coefficients with covariance matrix
250        """
251       
252        from scipy import optimize
253        import time
254       
255        self.nfunc = nfunc
256        # First, check that the current data is valid
257        if self.is_valid()<=0:
258            raise RuntimeError, "Invertor.invert: Data array are of different length"
259       
260        p = numpy.ones(nfunc)
261        t_0 = time.time()
262        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
263       
264        # Compute chi^2
265        res = self.residuals(out)
266        chisqr = 0
267        for i in range(len(res)):
268            chisqr += res[i]
269       
270        self.chi2 = chisqr
271
272        # Store computation time
273        self.elapsed = time.time() - t_0
274       
275        return out, cov_x
276   
277    def pr_fit(self, nfunc=5):
278        """
279            This is a direct fit to a given P(r). It assumes that the y data
280            is set to some P(r) distribution that we are trying to reproduce
281            with a set of base functions.
282           
283            This method is provided as a test.
284        """
285        from scipy import optimize
286       
287        # First, check that the current data is valid
288        if self.is_valid()<=0:
289            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
290       
291        p = numpy.ones(nfunc)
292        t_0 = time.time()
293        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
294       
295        # Compute chi^2
296        res = self.pr_residuals(out)
297        chisqr = 0
298        for i in range(len(res)):
299            chisqr += res[i]
300       
301        self.chisqr = chisqr
302       
303        # Store computation time
304        self.elapsed = time.time() - t_0
305
306        return out, cov_x
307   
308    def pr_err(self, c, c_cov, r):
309        """   
310            Returns the value of P(r) for a given r, and base function
311            coefficients, with error.
312           
313            @param c: base function coefficients
314            @param c_cov: covariance matrice of the base function coefficients
315            @param r: r-value to evaluate P(r) at
316            @return: P(r)
317        """
318        return self.get_pr_err(c, c_cov, r)
319       
320    def _accept_q(self, q):
321        """
322            Check q-value against user-defined range
323        """
324        if not self.q_min==None and q<self.q_min:
325            return False
326        if not self.q_max==None and q>self.q_max:
327            return False
328        return True
329       
330    def lstsq(self, nfunc=5, nr=20):
331        #TODO: do this on the C side
332        #
333        # To make sure an array is contiguous:
334        # blah = numpy.ascontiguousarray(blah_original)
335        # ... before passing it to C
336        """
337            The problem is solved by posing the problem as  Ax = b,
338            where x is the set of coefficients we are looking for.
339           
340            Npts is the number of points.
341           
342            In the following i refers to the ith base function coefficient.
343            The matrix has its entries j in its first Npts rows set to
344                A[i][j] = (Fourier transformed base function for point j)
345               
346            We them choose a number of r-points, n_r, to evaluate the second
347            derivative of P(r) at. This is used as our regularization term.
348            For a vector r of length n_r, the following n_r rows are set to
349                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
350               
351            The vector b has its first Npts entries set to
352                b[j] = (I(q) observed for point j)
353               
354            The following n_r entries are set to zero.
355           
356            The result is found by using scipy.linalg.basic.lstsq to invert
357            the matrix and find the coefficients x.
358           
359            @param nfunc: number of base functions to use.
360            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
361
362            If the result does not allow us to compute the covariance matrix,
363            a matrix filled with zeros will be returned.
364
365        """
366        #TODO: Allow for background by starting at n=0 (since the base function
367        # is zero for n=0).
368        import math, time
369        from scipy.linalg.basic import lstsq
370       
371        if self.is_valid()<0:
372            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
373       
374        self.nfunc = nfunc
375        # a -- An M x N matrix.
376        # b -- An M x nrhs matrix or M vector.
377        npts = len(self.x)
378        nq   = nr
379        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
380        if sqrt_alpha<0.0:
381            nq = 0
382
383        # If we need to fit the background, add a term
384        if self.has_bck==True:
385            nfunc_0 = nfunc
386            nfunc += 1
387
388        a = numpy.zeros([npts+nq, nfunc])
389        b = numpy.zeros(npts+nq)
390        err = numpy.zeros([nfunc, nfunc])
391       
392        # Construct the a matrix and b vector that represent the problem
393        self._get_matrix(nfunc, nq, a, b)
394             
395        # Perform the inversion (least square fit)
396        c, chi2, rank, n = lstsq(a, b)
397        # Sanity check
398        try:
399            float(chi2)
400        except:
401            chi2 = -1.0
402        self.chi2 = chi2
403               
404        inv_cov = numpy.zeros([nfunc,nfunc])
405        # Get the covariance matrix, defined as inv_cov = a_transposed * a
406        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
407                   
408        # Compute the reg term size for the output
409        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
410                   
411        if math.fabs(self.alpha)>0:
412            new_alpha = sum_sig/(sum_reg/self.alpha)
413        else:
414            new_alpha = 0.0
415        self.suggested_alpha = new_alpha
416       
417        try:
418            cov = numpy.linalg.pinv(inv_cov)
419            err = math.fabs(chi2/float(npts-nfunc)) * cov
420        except:
421            # We were not able to estimate the errors,
422            # returns an empty covariance matrix
423            print "lstsq:", sys.exc_value
424            print chi2
425            pass
426           
427        # Keep a copy of the last output
428        if self.has_bck==False:
429            self.background = 0
430            self.out = c
431            self.cov = err
432        else:
433            self.background = c[0]
434           
435            err_0 = numpy.zeros([nfunc, nfunc])
436            c_0 = numpy.zeros(nfunc)
437           
438            for i in range(nfunc_0):
439                c_0[i] = c[i+1]
440                for j in range(nfunc_0):
441                    err_0[i][j] = err[i+1][j+1]
442                   
443            self.out = c_0
444            self.cov = err_0
445           
446        return self.out, self.cov
447       
448    def lstsq_bck(self, nfunc=5, nr=20):
449        #TODO: Allow for background by starting at n=0 (since the base function
450        # is zero for n=0).
451        import math
452        from scipy.linalg.basic import lstsq
453       
454        if self.is_valid()<0:
455            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
456       
457        self.nfunc = nfunc
458        # a -- An M x N matrix.
459        # b -- An M x nrhs matrix or M vector.
460        npts = len(self.x)
461        nq   = nr
462        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
463        if sqrt_alpha<0.0:
464            nq = 0
465       
466        err_0 = numpy.zeros([nfunc, nfunc])
467        c_0 = numpy.zeros(nfunc)
468        nfunc_0 = nfunc
469        nfunc += 1
470       
471        a = numpy.zeros([npts+nq, nfunc])
472        b = numpy.zeros(npts+nq)
473        err = numpy.zeros([nfunc, nfunc])
474       
475        # Construct the a matrix and b vector that represent the problem
476        self._get_matrix(nfunc, nq, a, b)
477           
478        c, chi2, rank, n = lstsq(a, b)
479        # Sanity check
480        try:
481            float(chi2)
482        except:
483            chi2 = -1.0
484        self.chi2 = chi2
485
486        inv_cov = numpy.zeros([nfunc,nfunc])
487        # Get the covariance matrix, defined as inv_cov = a_transposed * a
488        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
489                   
490        # Compute the reg term size for the output
491        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
492       
493        if math.fabs(self.alpha)>0:
494            new_alpha = sum_sig/(sum_reg/self.alpha)
495        else:
496            new_alpha = 0.0
497        self.suggested_alpha = new_alpha
498       
499        try:
500            cov = numpy.linalg.pinv(inv_cov)
501            err = math.fabs(chi2/float(npts-nfunc)) * cov
502        except:
503            # We were not able to estimate the errors,
504            # returns an empty covariance matrix
505            print "lstsq:", sys.exc_value
506            print chi2
507            pass
508           
509        # Keep a copy of the last output
510       
511        print "BACKGROUND =", c[0]
512        self.background = c[0]
513       
514        for i in range(nfunc_0):
515            c_0[i] = c[i+1]
516            for j in range(nfunc_0):
517                err_0[i][j] = err[i+1][j+1]
518               
519        self.out = c_0
520        self.cov = err_0
521       
522        return c_0, err_0
523
524    def estimate_numterms(self, isquit_func=None):
525        """
526            Returns a reasonable guess for the
527            number of terms
528           
529            @return: number of terms, alpha, message
530        """
531        from num_term import Num_terms
532        estimator = Num_terms(self.clone())
533       
534        return estimator.num_terms(isquit_func)
535                   
536    def estimate_alpha(self, nfunc):
537        """
538            Returns a reasonable guess for the
539            regularization constant alpha
540           
541            @return: alpha, message, elapsed
542           
543            where alpha is the estimate for alpha,
544            message is a message for the user,
545            elapsed is the computation time
546        """
547        import time
548        try:           
549            pr = self.clone()
550           
551            # T_0 for computation time
552            starttime = time.time()
553            elapsed = 0
554           
555            # If the current alpha is zero, try
556            # another value
557            if pr.alpha<=0:
558                pr.alpha = 0.0001
559                 
560            # Perform inversion to find the largest alpha
561            out, cov = pr.invert(nfunc)
562            elapsed = time.time()-starttime
563            initial_alpha = pr.alpha
564            initial_peaks = pr.get_peaks(out)
565   
566            # Try the inversion with the estimated alpha
567            pr.alpha = pr.suggested_alpha
568            out, cov = pr.invert(nfunc)
569   
570            npeaks = pr.get_peaks(out)
571            # if more than one peak to start with
572            # just return the estimate
573            if npeaks>1:
574                message = "Your P(r) is not smooth, please check your inversion parameters"
575                return pr.suggested_alpha, message, elapsed
576            else:
577               
578                # Look at smaller values
579                # We assume that for the suggested alpha, we have 1 peak
580                # if not, send a message to change parameters
581                alpha = pr.suggested_alpha
582                best_alpha = pr.suggested_alpha
583                found = False
584                for i in range(10):
585                    pr.alpha = (0.33)**(i+1)*alpha
586                    out, cov = pr.invert(nfunc)
587                   
588                    peaks = pr.get_peaks(out)
589                    if peaks>1:
590                        found = True
591                        break
592                    best_alpha = pr.alpha
593                   
594                # If we didn't find a turning point for alpha and
595                # the initial alpha already had only one peak,
596                # just return that
597                if not found and initial_peaks==1 and initial_alpha<best_alpha:
598                    best_alpha = initial_alpha
599                   
600                # Check whether the size makes sense
601                message=''
602               
603                if not found:
604                    message = "None"
605                elif best_alpha>=0.5*pr.suggested_alpha:
606                    # best alpha is too big, return a
607                    # reasonable value
608                    message  = "The estimated alpha for your system is too large. "
609                    message += "Try increasing your maximum distance."
610               
611                return best_alpha, message, elapsed
612   
613        except:
614            message = "Invertor.estimate_alpha: %s" % sys.exc_value
615            return 0, message, elapsed
616   
617       
618    def to_file(self, path, npts=100):
619        """
620            Save the state to a file that will be readable
621            by SliceView.
622            @param path: path of the file to write
623            @param npts: number of P(r) points to be written
624        """
625        import pylab
626       
627        file = open(path, 'w')
628        file.write("#d_max=%g\n" % self.d_max)
629        file.write("#nfunc=%g\n" % self.nfunc)
630        file.write("#alpha=%g\n" % self.alpha)
631        file.write("#chi2=%g\n" % self.chi2)
632        file.write("#elapsed=%g\n" % self.elapsed)
633        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
634        if not self.out==None:
635            if len(self.out)==len(self.cov):
636                for i in range(len(self.out)):
637                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
638        file.write("<r>  <Pr>  <dPr>\n")
639        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
640       
641        for r_i in r:
642            (value, err) = self.pr_err(self.out, self.cov, r_i)
643            file.write("%g  %g  %g\n" % (r_i, value, err))
644   
645        file.close()
646     
647       
648    def from_file(self, path):
649        """
650            Load the state of the Invertor from a file,
651            to be able to generate P(r) from a set of
652            parameters.
653            @param path: path of the file to load
654        """
655        import os
656        import re
657        if os.path.isfile(path):
658            try:
659                fd = open(path, 'r')
660               
661                buff    = fd.read()
662                lines   = buff.split('\n')
663                for line in lines:
664                    if line.startswith('#d_max='):
665                        toks = line.split('=')
666                        self.d_max = float(toks[1])
667                    elif line.startswith('#nfunc='):
668                        toks = line.split('=')
669                        self.nfunc = int(toks[1])
670                        self.out = numpy.zeros(self.nfunc)
671                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
672                    elif line.startswith('#alpha='):
673                        toks = line.split('=')
674                        self.alpha = float(toks[1])
675                    elif line.startswith('#chi2='):
676                        toks = line.split('=')
677                        self.chi2 = float(toks[1])
678                    elif line.startswith('#elapsed='):
679                        toks = line.split('=')
680                        self.elapsed = float(toks[1])
681                    elif line.startswith('#alpha_estimate='):
682                        toks = line.split('=')
683                        self.suggested_alpha = float(toks[1])
684           
685                    # Now read in the parameters
686                    elif line.startswith('#C_'):
687                        toks = line.split('=')
688                        p = re.compile('#C_([0-9]+)')
689                        m = p.search(toks[0])
690                        toks2 = toks[1].split('+-')
691                        i = int(m.group(1))
692                        self.out[i] = float(toks2[0])
693                       
694                        self.cov[i][i] = float(toks2[1])                       
695           
696            except:
697                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
698        else:
699            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
700       
701       
702   
703   
704if __name__ == "__main__":
705    o = Invertor()
706
707   
708   
709   
710   
Note: See TracBrowser for help on using the repository browser.