source: sasview/pr_inversion/invertor.py @ 0b16ee3

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 0b16ee3 was 6e0f53a, checked in by Mathieu Doucet <doucetm@…>, 16 years ago

Minor change

  • Property mode set to 100644
File size: 25.5 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8import math, time
9from scipy.linalg.basic import lstsq
10
11def help():
12    """
13        Provide general online help text
14        Future work: extend this function to allow topic selection
15    """
16    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
17    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
18    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
19    info_txt += "in the expansion is found by performing a least square fit with the "
20    info_txt += "following fit function:\n\n"
21    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
22    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
23    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
24    info_txt += "The Reg_term term is a regularization term set to the second derivative "
25    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
26    info_txt += "The following are user inputs:\n\n"
27    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
28    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
29    info_txt += "the regularization term.\n\n"
30    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
31     
32    return info_txt
33   
34
35class Invertor(Cinvertor):
36    """
37        Invertor class to perform P(r) inversion
38       
39        The problem is solved by posing the problem as  Ax = b,
40        where x is the set of coefficients we are looking for.
41       
42        Npts is the number of points.
43       
44        In the following i refers to the ith base function coefficient.
45        The matrix has its entries j in its first Npts rows set to
46            A[j][i] = (Fourier transformed base function for point j)
47           
48        We them choose a number of r-points, n_r, to evaluate the second
49        derivative of P(r) at. This is used as our regularization term.
50        For a vector r of length n_r, the following n_r rows are set to
51            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
52           
53        The vector b has its first Npts entries set to
54            b[j] = (I(q) observed for point j)
55           
56        The following n_r entries are set to zero.
57       
58        The result is found by using scipy.linalg.basic.lstsq to invert
59        the matrix and find the coefficients x.
60       
61        Methods inherited from Cinvertor:
62        - get_peaks(pars): returns the number of P(r) peaks
63        - oscillations(pars): returns the oscillation parameters for the output P(r)
64        - get_positive(pars): returns the fraction of P(r) that is above zero
65        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
66    """
67    ## Chisqr of the last computation
68    chi2  = 0
69    ## Time elapsed for last computation
70    elapsed = 0
71    ## Alpha to get the reg term the same size as the signal
72    suggested_alpha = 0
73    ## Last number of base functions used
74    nfunc = 10
75    ## Last output values
76    out = None
77    ## Last errors on output values
78    cov = None
79    ## Background value
80    background = 0
81   
82   
83    def __init__(self):
84        Cinvertor.__init__(self)
85       
86    def __setattr__(self, name, value):
87        """
88            Set the value of an attribute.
89            Access the parent class methods for
90            x, y, err, d_max, q_min, q_max and alpha
91        """
92        if   name=='x':
93            if 0.0 in value:
94                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
95            return self.set_x(value)
96        elif name=='y':
97            return self.set_y(value)
98        elif name=='err':
99            value2 = abs(value)
100            return self.set_err(value2)
101        elif name=='d_max':
102            return self.set_dmax(value)
103        elif name=='q_min':
104            if value==None:
105                return self.set_qmin(-1.0)
106            return self.set_qmin(value)
107        elif name=='q_max':
108            if value==None:
109                return self.set_qmax(-1.0)
110            return self.set_qmax(value)
111        elif name=='alpha':
112            return self.set_alpha(value)
113        elif name=='slit_height':
114            return self.set_slit_height(value)
115        elif name=='slit_width':
116            return self.set_slit_width(value)
117        elif name=='has_bck':
118            if value==True:
119                return self.set_has_bck(1)
120            elif value==False:
121                return self.set_has_bck(0)
122            else:
123                raise ValueError, "Invertor: has_bck can only be True or False"
124           
125        return Cinvertor.__setattr__(self, name, value)
126   
127    def __getattr__(self, name):
128        """
129           Return the value of an attribute
130        """
131        import numpy
132        if   name=='x':
133            out = numpy.ones(self.get_nx())
134            self.get_x(out)
135            return out
136        elif name=='y':
137            out = numpy.ones(self.get_ny())
138            self.get_y(out)
139            return out
140        elif name=='err':
141            out = numpy.ones(self.get_nerr())
142            self.get_err(out)
143            return out
144        elif name=='d_max':
145            return self.get_dmax()
146        elif name=='q_min':
147            qmin = self.get_qmin()
148            if qmin<0:
149                return None
150            return qmin
151        elif name=='q_max':
152            qmax = self.get_qmax()
153            if qmax<0:
154                return None
155            return qmax
156        elif name=='alpha':
157            return self.get_alpha()
158        elif name=='slit_height':
159            return self.get_slit_height()
160        elif name=='slit_width':
161            return self.get_slit_width()
162        elif name=='has_bck':
163            value = self.get_has_bck()
164            if value==1:
165                return True
166            else:
167                return False
168        elif name in self.__dict__:
169            return self.__dict__[name]
170        return None
171   
172    def clone(self):
173        """
174            Return a clone of this instance
175        """
176        invertor = Invertor()
177        invertor.chi2    = self.chi2
178        invertor.elapsed = self.elapsed
179        invertor.nfunc   = self.nfunc
180        invertor.alpha   = self.alpha
181        invertor.d_max   = self.d_max
182        invertor.q_min   = self.q_min
183        invertor.q_max   = self.q_max
184       
185        invertor.x = self.x
186        invertor.y = self.y
187        invertor.err = self.err
188        invertor.has_bck = self.has_bck
189        invertor.slit_height = self.slit_height
190        invertor.slit_width  = self.slit_width
191       
192        return invertor
193   
194    def invert(self, nfunc=10, nr=20):
195        """
196            Perform inversion to P(r)
197           
198            The problem is solved by posing the problem as  Ax = b,
199            where x is the set of coefficients we are looking for.
200           
201            Npts is the number of points.
202           
203            In the following i refers to the ith base function coefficient.
204            The matrix has its entries j in its first Npts rows set to
205                A[i][j] = (Fourier transformed base function for point j)
206               
207            We them choose a number of r-points, n_r, to evaluate the second
208            derivative of P(r) at. This is used as our regularization term.
209            For a vector r of length n_r, the following n_r rows are set to
210                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
211               
212            The vector b has its first Npts entries set to
213                b[j] = (I(q) observed for point j)
214               
215            The following n_r entries are set to zero.
216           
217            The result is found by using scipy.linalg.basic.lstsq to invert
218            the matrix and find the coefficients x.
219           
220            @param nfunc: number of base functions to use.
221            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
222            @return: c_out, c_cov - the coefficients with covariance matrix
223        """
224        # Reset the background value before proceeding
225        self.background = 0.0
226        return self.lstsq(nfunc, nr=nr)
227   
228    def iq(self, out, q):
229        """
230            Function to call to evaluate the scattering intensity
231            @param args: c-parameters, and q
232            @return: I(q)
233        """
234        return Cinvertor.iq(self, out, q)+self.background
235   
236    def invert_optimize(self, nfunc=10, nr=20):
237        """
238            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
239           
240            This probably produce more reliable results, but is much slower.
241            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
242            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
243            of P(r), d(P(r))/dr, integrated over the full range of r.
244           
245            @param nfunc: number of base functions to use.
246            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
247            @return: c_out, c_cov - the coefficients with covariance matrix
248        """
249       
250        from scipy import optimize
251        import time
252       
253        self.nfunc = nfunc
254        # First, check that the current data is valid
255        if self.is_valid()<=0:
256            raise RuntimeError, "Invertor.invert: Data array are of different length"
257       
258        p = numpy.ones(nfunc)
259        t_0 = time.time()
260        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
261       
262        # Compute chi^2
263        res = self.residuals(out)
264        chisqr = 0
265        for i in range(len(res)):
266            chisqr += res[i]
267       
268        self.chi2 = chisqr
269
270        # Store computation time
271        self.elapsed = time.time() - t_0
272       
273        return out, cov_x
274   
275    def pr_fit(self, nfunc=5):
276        """
277            This is a direct fit to a given P(r). It assumes that the y data
278            is set to some P(r) distribution that we are trying to reproduce
279            with a set of base functions.
280           
281            This method is provided as a test.
282        """
283        from scipy import optimize
284       
285        # First, check that the current data is valid
286        if self.is_valid()<=0:
287            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
288       
289        p = numpy.ones(nfunc)
290        t_0 = time.time()
291        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
292       
293        # Compute chi^2
294        res = self.pr_residuals(out)
295        chisqr = 0
296        for i in range(len(res)):
297            chisqr += res[i]
298       
299        self.chisqr = chisqr
300       
301        # Store computation time
302        self.elapsed = time.time() - t_0
303
304        return out, cov_x
305   
306    def pr_err(self, c, c_cov, r):
307        """   
308            Returns the value of P(r) for a given r, and base function
309            coefficients, with error.
310           
311            @param c: base function coefficients
312            @param c_cov: covariance matrice of the base function coefficients
313            @param r: r-value to evaluate P(r) at
314            @return: P(r)
315        """
316        return self.get_pr_err(c, c_cov, r)
317       
318    def _accept_q(self, q):
319        """
320            Check q-value against user-defined range
321        """
322        if not self.q_min==None and q<self.q_min:
323            return False
324        if not self.q_max==None and q>self.q_max:
325            return False
326        return True
327       
328    def lstsq(self, nfunc=5, nr=20):
329        """
330            The problem is solved by posing the problem as  Ax = b,
331            where x is the set of coefficients we are looking for.
332           
333            Npts is the number of points.
334           
335            In the following i refers to the ith base function coefficient.
336            The matrix has its entries j in its first Npts rows set to
337                A[i][j] = (Fourier transformed base function for point j)
338               
339            We them choose a number of r-points, n_r, to evaluate the second
340            derivative of P(r) at. This is used as our regularization term.
341            For a vector r of length n_r, the following n_r rows are set to
342                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
343               
344            The vector b has its first Npts entries set to
345                b[j] = (I(q) observed for point j)
346               
347            The following n_r entries are set to zero.
348           
349            The result is found by using scipy.linalg.basic.lstsq to invert
350            the matrix and find the coefficients x.
351           
352            @param nfunc: number of base functions to use.
353            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
354
355            If the result does not allow us to compute the covariance matrix,
356            a matrix filled with zeros will be returned.
357
358        """
359        # Note: To make sure an array is contiguous:
360        # blah = numpy.ascontiguousarray(blah_original)
361        # ... before passing it to C
362       
363        if self.is_valid()<0:
364            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
365       
366        self.nfunc = nfunc
367        # a -- An M x N matrix.
368        # b -- An M x nrhs matrix or M vector.
369        npts = len(self.x)
370        nq   = nr
371        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
372        if sqrt_alpha<0.0:
373            nq = 0
374
375        # If we need to fit the background, add a term
376        if self.has_bck==True:
377            nfunc_0 = nfunc
378            nfunc += 1
379
380        a = numpy.zeros([npts+nq, nfunc])
381        b = numpy.zeros(npts+nq)
382        err = numpy.zeros([nfunc, nfunc])
383       
384        # Construct the a matrix and b vector that represent the problem
385        t_0 = time.time()
386        self._get_matrix(nfunc, nq, a, b)
387             
388        # Perform the inversion (least square fit)
389        c, chi2, rank, n = lstsq(a, b)
390        # Sanity check
391        try:
392            float(chi2)
393        except:
394            chi2 = -1.0
395        self.chi2 = chi2
396               
397        inv_cov = numpy.zeros([nfunc,nfunc])
398        # Get the covariance matrix, defined as inv_cov = a_transposed * a
399        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
400                   
401        # Compute the reg term size for the output
402        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
403                   
404        if math.fabs(self.alpha)>0:
405            new_alpha = sum_sig/(sum_reg/self.alpha)
406        else:
407            new_alpha = 0.0
408        self.suggested_alpha = new_alpha
409       
410        try:
411            cov = numpy.linalg.pinv(inv_cov)
412            err = math.fabs(chi2/float(npts-nfunc)) * cov
413        except:
414            # We were not able to estimate the errors
415            # Return an empty error matrix
416            pass
417           
418        # Keep a copy of the last output
419        if self.has_bck==False:
420            self.background = 0
421            self.out = c
422            self.cov = err
423        else:
424            self.background = c[0]
425           
426            err_0 = numpy.zeros([nfunc, nfunc])
427            c_0 = numpy.zeros(nfunc)
428           
429            for i in range(nfunc_0):
430                c_0[i] = c[i+1]
431                for j in range(nfunc_0):
432                    err_0[i][j] = err[i+1][j+1]
433                   
434            self.out = c_0
435            self.cov = err_0
436           
437        return self.out, self.cov
438       
439    def estimate_numterms(self, isquit_func=None):
440        """
441            Returns a reasonable guess for the
442            number of terms
443            @param isquit_func: reference to thread function to call to
444                                check whether the computation needs to
445                                be stopped.
446           
447            @return: number of terms, alpha, message
448        """
449        from num_term import Num_terms
450        estimator = Num_terms(self.clone())
451        try:
452            return estimator.num_terms(isquit_func)
453        except:
454            # If we fail, estimate alpha and return the default
455            # number of terms
456            best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
457            return self.nfunc, best_alpha, "Could not estimate number of terms"
458                   
459    def estimate_alpha(self, nfunc):
460        """
461            Returns a reasonable guess for the
462            regularization constant alpha
463           
464            @param nfunc: number of terms to use in the expansion.
465            @return: alpha, message, elapsed
466           
467            where alpha is the estimate for alpha,
468            message is a message for the user,
469            elapsed is the computation time
470        """
471        import time
472        try:           
473            pr = self.clone()
474           
475            # T_0 for computation time
476            starttime = time.time()
477            elapsed = 0
478           
479            # If the current alpha is zero, try
480            # another value
481            if pr.alpha<=0:
482                pr.alpha = 0.0001
483                 
484            # Perform inversion to find the largest alpha
485            out, cov = pr.invert(nfunc)
486            elapsed = time.time()-starttime
487            initial_alpha = pr.alpha
488            initial_peaks = pr.get_peaks(out)
489   
490            # Try the inversion with the estimated alpha
491            pr.alpha = pr.suggested_alpha
492            out, cov = pr.invert(nfunc)
493   
494            npeaks = pr.get_peaks(out)
495            # if more than one peak to start with
496            # just return the estimate
497            if npeaks>1:
498                #message = "Your P(r) is not smooth, please check your inversion parameters"
499                message = None
500                return pr.suggested_alpha, message, elapsed
501            else:
502               
503                # Look at smaller values
504                # We assume that for the suggested alpha, we have 1 peak
505                # if not, send a message to change parameters
506                alpha = pr.suggested_alpha
507                best_alpha = pr.suggested_alpha
508                found = False
509                for i in range(10):
510                    pr.alpha = (0.33)**(i+1)*alpha
511                    out, cov = pr.invert(nfunc)
512                   
513                    peaks = pr.get_peaks(out)
514                    if peaks>1:
515                        found = True
516                        break
517                    best_alpha = pr.alpha
518                   
519                # If we didn't find a turning point for alpha and
520                # the initial alpha already had only one peak,
521                # just return that
522                if not found and initial_peaks==1 and initial_alpha<best_alpha:
523                    best_alpha = initial_alpha
524                   
525                # Check whether the size makes sense
526                message=''
527               
528                if not found:
529                    message = "None"
530                elif best_alpha>=0.5*pr.suggested_alpha:
531                    # best alpha is too big, return a
532                    # reasonable value
533                    message  = "The estimated alpha for your system is too large. "
534                    message += "Try increasing your maximum distance."
535               
536                return best_alpha, message, elapsed
537   
538        except:
539            message = "Invertor.estimate_alpha: %s" % sys.exc_value
540            return 0, message, elapsed
541   
542       
543    def to_file(self, path, npts=100):
544        """
545            Save the state to a file that will be readable
546            by SliceView.
547            @param path: path of the file to write
548            @param npts: number of P(r) points to be written
549        """
550        import pylab
551       
552        file = open(path, 'w')
553        file.write("#d_max=%g\n" % self.d_max)
554        file.write("#nfunc=%g\n" % self.nfunc)
555        file.write("#alpha=%g\n" % self.alpha)
556        file.write("#chi2=%g\n" % self.chi2)
557        file.write("#elapsed=%g\n" % self.elapsed)
558        file.write("#qmin=%s\n" % str(self.q_min))
559        file.write("#qmax=%s\n" % str(self.q_max))
560        file.write("#slit_height=%g\n" % self.slit_height)
561        file.write("#slit_width=%g\n" % self.slit_width)
562        file.write("#background=%g\n" % self.background)
563        if self.has_bck==True:
564            file.write("#has_bck=1\n")
565        else:
566            file.write("#has_bck=0\n")
567        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
568        if not self.out==None:
569            if len(self.out)==len(self.cov):
570                for i in range(len(self.out)):
571                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
572        file.write("<r>  <Pr>  <dPr>\n")
573        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
574       
575        for r_i in r:
576            (value, err) = self.pr_err(self.out, self.cov, r_i)
577            file.write("%g  %g  %g\n" % (r_i, value, err))
578   
579        file.close()
580     
581       
582    def from_file(self, path):
583        """
584            Load the state of the Invertor from a file,
585            to be able to generate P(r) from a set of
586            parameters.
587            @param path: path of the file to load
588        """
589        import os
590        import re
591        if os.path.isfile(path):
592            try:
593                fd = open(path, 'r')
594               
595                buff    = fd.read()
596                lines   = buff.split('\n')
597                for line in lines:
598                    if line.startswith('#d_max='):
599                        toks = line.split('=')
600                        self.d_max = float(toks[1])
601                    elif line.startswith('#nfunc='):
602                        toks = line.split('=')
603                        self.nfunc = int(toks[1])
604                        self.out = numpy.zeros(self.nfunc)
605                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
606                    elif line.startswith('#alpha='):
607                        toks = line.split('=')
608                        self.alpha = float(toks[1])
609                    elif line.startswith('#chi2='):
610                        toks = line.split('=')
611                        self.chi2 = float(toks[1])
612                    elif line.startswith('#elapsed='):
613                        toks = line.split('=')
614                        self.elapsed = float(toks[1])
615                    elif line.startswith('#alpha_estimate='):
616                        toks = line.split('=')
617                        self.suggested_alpha = float(toks[1])
618                    elif line.startswith('#qmin='):
619                        toks = line.split('=')
620                        try:
621                            self.q_min = float(toks[1])
622                        except:
623                            self.q_min = None
624                    elif line.startswith('#qmax='):
625                        toks = line.split('=')
626                        try:
627                            self.q_max = float(toks[1])
628                        except:
629                            self.q_max = None
630                    elif line.startswith('#slit_height='):
631                        toks = line.split('=')
632                        self.slit_height = float(toks[1])
633                    elif line.startswith('#slit_width='):
634                        toks = line.split('=')
635                        self.slit_width = float(toks[1])
636                    elif line.startswith('#background='):
637                        toks = line.split('=')
638                        self.background = float(toks[1])
639                    elif line.startswith('#has_bck='):
640                        toks = line.split('=')
641                        if int(toks[1])==1:
642                            self.has_bck=True
643                        else:
644                            self.has_bck=False
645           
646                    # Now read in the parameters
647                    elif line.startswith('#C_'):
648                        toks = line.split('=')
649                        p = re.compile('#C_([0-9]+)')
650                        m = p.search(toks[0])
651                        toks2 = toks[1].split('+-')
652                        i = int(m.group(1))
653                        self.out[i] = float(toks2[0])
654                       
655                        self.cov[i][i] = float(toks2[1])                       
656           
657            except:
658                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
659        else:
660            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
661       
662       
663   
664   
665if __name__ == "__main__":
666    o = Invertor()
667
668   
669   
670   
671   
Note: See TracBrowser for help on using the repository browser.