source: sasview/pr_inversion/invertor.py @ c61228f

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since c61228f was e39640f, checked in by Mathieu Doucet <doucetm@…>, 16 years ago

pr_inversion 0.1

  • Property mode set to 100644
File size: 21.6 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8
9def help():
10    """
11        Provide general online help text
12        Future work: extend this function to allow topic selection
13    """
14    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
17    info_txt += "in the expansion is found by performing a least square fit with the "
18    info_txt += "following fit function:\n\n"
19    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
20    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22    info_txt += "The Reg_term term is a regularization term set to the second derivative "
23    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24    info_txt += "The following are user inputs:\n\n"
25    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
27    info_txt += "the regularization term.\n\n"
28    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
29     
30    return info_txt
31   
32
33class Invertor(Cinvertor):
34    """
35        Invertor class to perform P(r) inversion
36       
37        The problem is solved by posing the problem as  Ax = b,
38        where x is the set of coefficients we are looking for.
39       
40        Npts is the number of points.
41       
42        In the following i refers to the ith base function coefficient.
43        The matrix has its entries j in its first Npts rows set to
44            A[j][i] = (Fourier transformed base function for point j)
45           
46        We them choose a number of r-points, n_r, to evaluate the second
47        derivative of P(r) at. This is used as our regularization term.
48        For a vector r of length n_r, the following n_r rows are set to
49            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
50           
51        The vector b has its first Npts entries set to
52            b[j] = (I(q) observed for point j)
53           
54        The following n_r entries are set to zero.
55       
56        The result is found by using scipy.linalg.basic.lstsq to invert
57        the matrix and find the coefficients x.
58       
59        Methods inherited from Cinvertor:
60        - get_peaks(pars): returns the number of P(r) peaks
61        - oscillations(pars): returns the oscillation parameters for the output P(r)
62        - get_positive(pars): returns the fraction of P(r) that is above zero
63        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64    """
65    ## Chisqr of the last computation
66    chi2  = 0
67    ## Time elapsed for last computation
68    elapsed = 0
69    ## Alpha to get the reg term the same size as the signal
70    suggested_alpha = 0
71    ## Last number of base functions used
72    nfunc = 0
73    ## Last output values
74    out = None
75    ## Last errors on output values
76    cov = None
77   
78    def __init__(self):
79        Cinvertor.__init__(self)
80       
81    def __setattr__(self, name, value):
82        """
83            Set the value of an attribute.
84            Access the parent class methods for
85            x, y, err, d_max, q_min, q_max and alpha
86        """
87        if   name=='x':
88            if 0.0 in value:
89                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
90            return self.set_x(value)
91        elif name=='y':
92            return self.set_y(value)
93        elif name=='err':
94            return self.set_err(value)
95        elif name=='d_max':
96            return self.set_dmax(value)
97        elif name=='q_min':
98            if value==None:
99                return self.set_qmin(-1.0)
100            return self.set_qmin(value)
101        elif name=='q_max':
102            if value==None:
103                return self.set_qmax(-1.0)
104            return self.set_qmax(value)
105        elif name=='alpha':
106            return self.set_alpha(value)
107           
108        return Cinvertor.__setattr__(self, name, value)
109   
110    def __getattr__(self, name):
111        """
112           Return the value of an attribute
113           For the moment x, y, err and d_max are write-only
114           TODO: change that!
115        """
116        import numpy
117        if   name=='x':
118            out = numpy.ones(self.get_nx())
119            self.get_x(out)
120            return out
121        elif name=='y':
122            out = numpy.ones(self.get_ny())
123            self.get_y(out)
124            return out
125        elif name=='err':
126            out = numpy.ones(self.get_nerr())
127            self.get_err(out)
128            return out
129        elif name=='d_max':
130            return self.get_dmax()
131        elif name=='q_min':
132            qmin = self.get_qmin()
133            if qmin<0:
134                return None
135            return qmin
136        elif name=='q_max':
137            qmax = self.get_qmax()
138            if qmax<0:
139                return None
140            return qmax
141        elif name=='alpha':
142            return self.get_alpha()
143        elif name in self.__dict__:
144            return self.__dict__[name]
145        return None
146   
147    def clone(self):
148        """
149            Return a clone of this instance
150        """
151        invertor = Invertor()
152        invertor.chi2    = self.chi2
153        invertor.elapsed = self.elapsed
154        invertor.alpha   = self.alpha
155        invertor.d_max   = self.d_max
156        invertor.q_min   = self.q_min
157        invertor.q_max   = self.q_max
158       
159        invertor.x = self.x
160        invertor.y = self.y
161        invertor.err = self.err
162       
163        return invertor
164   
165    def invert(self, nfunc=10, nr=20):
166        """
167            Perform inversion to P(r)
168           
169            The problem is solved by posing the problem as  Ax = b,
170            where x is the set of coefficients we are looking for.
171           
172            Npts is the number of points.
173           
174            In the following i refers to the ith base function coefficient.
175            The matrix has its entries j in its first Npts rows set to
176                A[i][j] = (Fourier transformed base function for point j)
177               
178            We them choose a number of r-points, n_r, to evaluate the second
179            derivative of P(r) at. This is used as our regularization term.
180            For a vector r of length n_r, the following n_r rows are set to
181                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
182               
183            The vector b has its first Npts entries set to
184                b[j] = (I(q) observed for point j)
185               
186            The following n_r entries are set to zero.
187           
188            The result is found by using scipy.linalg.basic.lstsq to invert
189            the matrix and find the coefficients x.
190           
191            @param nfunc: number of base functions to use.
192            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
193            @return: c_out, c_cov - the coefficients with covariance matrix
194        """
195        #TODO: call the pyhton implementation for now. In the future, translate this to C.
196        return self.lstsq(nfunc, nr=nr)
197   
198    def invert_optimize(self, nfunc=10, nr=20):
199        """
200            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
201           
202            This probably produce more reliable results, but is much slower.
203            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
204            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
205            of P(r), d(P(r))/dr, integrated over the full range of r.
206           
207            @param nfunc: number of base functions to use.
208            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
209            @return: c_out, c_cov - the coefficients with covariance matrix
210        """
211       
212        from scipy import optimize
213        import time
214       
215        self.nfunc = nfunc
216        # First, check that the current data is valid
217        if self.is_valid()<=0:
218            raise RuntimeError, "Invertor.invert: Data array are of different length"
219       
220        p = numpy.ones(nfunc)
221        t_0 = time.time()
222        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
223       
224        # Compute chi^2
225        res = self.residuals(out)
226        chisqr = 0
227        for i in range(len(res)):
228            chisqr += res[i]
229       
230        self.chi2 = chisqr
231
232        # Store computation time
233        self.elapsed = time.time() - t_0
234       
235        return out, cov_x
236   
237    def pr_fit(self, nfunc=5):
238        """
239            This is a direct fit to a given P(r). It assumes that the y data
240            is set to some P(r) distribution that we are trying to reproduce
241            with a set of base functions.
242           
243            This method is provided as a test.
244        """
245        from scipy import optimize
246       
247        # First, check that the current data is valid
248        if self.is_valid()<=0:
249            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
250       
251        p = numpy.ones(nfunc)
252        t_0 = time.time()
253        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
254       
255        # Compute chi^2
256        res = self.pr_residuals(out)
257        chisqr = 0
258        for i in range(len(res)):
259            chisqr += res[i]
260       
261        self.chisqr = chisqr
262       
263        # Store computation time
264        self.elapsed = time.time() - t_0
265
266        return out, cov_x
267   
268    def pr_err(self, c, c_cov, r):
269        """   
270            Returns the value of P(r) for a given r, and base function
271            coefficients, with error.
272           
273            @param c: base function coefficients
274            @param c_cov: covariance matrice of the base function coefficients
275            @param r: r-value to evaluate P(r) at
276            @return: P(r)
277        """
278        return self.get_pr_err(c, c_cov, r)
279       
280    def _accept_q(self, q):
281        """
282            Check q-value against user-defined range
283        """
284        if not self.q_min==None and q<self.q_min:
285            return False
286        if not self.q_max==None and q>self.q_max:
287            return False
288        return True
289       
290    def lstsq(self, nfunc=5, nr=20):
291        #TODO: do this on the C side
292        #
293        # To make sure an array is contiguous:
294        # blah = numpy.ascontiguousarray(blah_original)
295        # ... before passing it to C
296        """
297            The problem is solved by posing the problem as  Ax = b,
298            where x is the set of coefficients we are looking for.
299           
300            Npts is the number of points.
301           
302            In the following i refers to the ith base function coefficient.
303            The matrix has its entries j in its first Npts rows set to
304                A[i][j] = (Fourier transformed base function for point j)
305               
306            We them choose a number of r-points, n_r, to evaluate the second
307            derivative of P(r) at. This is used as our regularization term.
308            For a vector r of length n_r, the following n_r rows are set to
309                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
310               
311            The vector b has its first Npts entries set to
312                b[j] = (I(q) observed for point j)
313               
314            The following n_r entries are set to zero.
315           
316            The result is found by using scipy.linalg.basic.lstsq to invert
317            the matrix and find the coefficients x.
318           
319            @param nfunc: number of base functions to use.
320            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
321        """
322        import math
323        from scipy.linalg.basic import lstsq
324       
325        self.nfunc = nfunc
326        # a -- An M x N matrix.
327        # b -- An M x nrhs matrix or M vector.
328        npts = len(self.x)
329        nq   = nr
330        sqrt_alpha = math.sqrt(self.alpha)
331       
332        a = numpy.zeros([npts+nq, nfunc])
333        b = numpy.zeros(npts+nq)
334        err = numpy.zeros(nfunc)
335       
336        for j in range(nfunc):
337            for i in range(npts):
338                if self._accept_q(self.x[i]):
339                    a[i][j] = self.basefunc_ft(self.d_max, j+1, self.x[i])/self.err[i]
340                   
341            #TODO: refactor this: i_q should really be i_r
342            for i_q in range(nq):
343                r = self.d_max/nq*i_q
344                #a[i_q+npts][j] = sqrt_alpha * 1.0/nq*self.d_max*2.0*math.fabs(math.sin(math.pi*(j+1)*r/self.d_max) + math.pi*(j+1)*r/self.d_max * math.cos(math.pi*(j+1)*r/self.d_max))     
345                a[i_q+npts][j] = sqrt_alpha * 1.0/nq*self.d_max*2.0*(2.0*math.pi*(j+1)/self.d_max*math.cos(math.pi*(j+1)*r/self.d_max) + math.pi**2*(j+1)**2*r/self.d_max**2 * math.sin(math.pi*(j+1)*r/self.d_max))     
346       
347        for i in range(npts):
348            if self._accept_q(self.x[i]):
349                b[i] = self.y[i]/self.err[i]
350           
351        c, chi2, rank, n = lstsq(a, b)
352        self.chi2 = chi2
353               
354        at = numpy.transpose(a)
355        inv_cov = numpy.zeros([nfunc,nfunc])
356        for i in range(nfunc):
357            for j in range(nfunc):
358                inv_cov[i][j] = 0.0
359                for k in range(npts+nr):
360                    if self._accept_q(self.x[i]):
361                        inv_cov[i][j] = at[i][k]*a[k][j]
362                   
363        # Compute the reg term size for the output
364        sum_sig = 0.0
365        sum_reg = 0.0
366        for j in range(nfunc):
367            for i in range(npts):
368                if self._accept_q(self.x[i]):
369                    sum_sig += (a[i][j])**2
370            for i in range(nq):
371                sum_reg += (a[i+npts][j])**2
372                   
373        if math.fabs(self.alpha)>0:
374            new_alpha = sum_sig/(sum_reg/self.alpha)
375        else:
376            new_alpha = 0.0
377        self.suggested_alpha = new_alpha
378       
379        try:
380            err = math.fabs(chi2/(npts-nfunc))* inv_cov
381        except:
382            print "Error estimating uncertainties"
383           
384        # Keep a copy of the last output
385        self.out = c
386        self.cov = err
387       
388        return c, err
389       
390    def estimate_alpha(self, nfunc):
391        """
392            Returns a reasonable guess for the
393            regularization constant alpha
394           
395            @return: alpha, message, elapsed
396           
397            where alpha is the estimate for alpha,
398            message is a message for the user,
399            elapsed is the computation time
400        """
401        import time
402        try:           
403            pr = self.clone()
404           
405            # T_0 for computation time
406            starttime = time.time()
407            elapsed = 0
408           
409            # If the current alpha is zero, try
410            # another value
411            if pr.alpha<=0:
412                pr.alpha = 0.0001
413                 
414            # Perform inversion to find the largest alpha
415            out, cov = pr.lstsq(nfunc)
416            elapsed = time.time()-starttime
417            initial_alpha = pr.alpha
418            initial_peaks = pr.get_peaks(out)
419   
420            # Try the inversion with the estimated alpha
421            pr.alpha = pr.suggested_alpha
422            out, cov = pr.lstsq(nfunc)
423   
424            npeaks = pr.get_peaks(out)
425            # if more than one peak to start with
426            # just return the estimate
427            if npeaks>1:
428                message = "Your P(r) is not smooth, please check your inversion parameters"
429                return pr.suggested_alpha, message, elapsed
430            else:
431               
432                # Look at smaller values
433                # We assume that for the suggested alpha, we have 1 peak
434                # if not, send a message to change parameters
435                alpha = pr.suggested_alpha
436                best_alpha = pr.suggested_alpha
437                found = False
438                for i in range(10):
439                    pr.alpha = (0.33)**(i+1)*alpha
440                    out, cov = pr.lstsq(nfunc)
441                   
442                    peaks = pr.get_peaks(out)
443                    if peaks>1:
444                        found = True
445                        break
446                    best_alpha = pr.alpha
447                   
448                # If we didn't find a turning point for alpha and
449                # the initial alpha already had only one peak,
450                # just return that
451                if not found and initial_peaks==1 and initial_alpha<best_alpha:
452                    best_alpha = initial_alpha
453                   
454                # Check whether the size makes sense
455                message=''
456               
457                if not found:
458                    message = "None"
459                elif best_alpha>=0.5*pr.suggested_alpha:
460                    # best alpha is too big, return a
461                    # reasonable value
462                    message  = "The estimated alpha for your system is too large. "
463                    message += "Try increasing your maximum distance."
464               
465                return best_alpha, message, elapsed
466   
467        except:
468            message = "Invertor.estimate_alpha: %s" % sys.exc_value
469            return 0, message, elapsed
470   
471       
472    def to_file(self, path, npts=100):
473        """
474            Save the state to a file that will be readable
475            by SliceView.
476            @param path: path of the file to write
477            @param npts: number of P(r) points to be written
478        """
479        import pylab
480       
481        file = open(path, 'w')
482        file.write("#d_max=%g\n" % self.d_max)
483        file.write("#nfunc=%g\n" % self.nfunc)
484        file.write("#alpha=%g\n" % self.alpha)
485        file.write("#chi2=%g\n" % self.chi2)
486        file.write("#elapsed=%g\n" % self.elapsed)
487        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
488        if not self.out==None:
489            if len(self.out)==len(self.cov):
490                for i in range(len(self.out)):
491                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
492        file.write("<r>  <Pr>  <dPr>\n")
493        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
494       
495        for r_i in r:
496            (value, err) = self.pr_err(self.out, self.cov, r_i)
497            file.write("%g  %g  %g\n" % (r_i, value, err))
498   
499        file.close()
500     
501       
502    def from_file(self, path):
503        """
504            Load the state of the Invertor from a file,
505            to be able to generate P(r) from a set of
506            parameters.
507            @param path: path of the file to load
508        """
509        import os
510        import re
511        if os.path.isfile(path):
512            try:
513                fd = open(path, 'r')
514               
515                buff    = fd.read()
516                lines   = buff.split('\n')
517                for line in lines:
518                    if line.startswith('#d_max='):
519                        toks = line.split('=')
520                        self.d_max = float(toks[1])
521                    elif line.startswith('#nfunc='):
522                        toks = line.split('=')
523                        self.nfunc = int(toks[1])
524                        self.out = numpy.zeros(self.nfunc)
525                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
526                    elif line.startswith('#alpha='):
527                        toks = line.split('=')
528                        self.alpha = float(toks[1])
529                    elif line.startswith('#chi2='):
530                        toks = line.split('=')
531                        self.chi2 = float(toks[1])
532                    elif line.startswith('#elapsed='):
533                        toks = line.split('=')
534                        self.elapsed = float(toks[1])
535                    elif line.startswith('#alpha_estimate='):
536                        toks = line.split('=')
537                        self.suggested_alpha = float(toks[1])
538           
539                    # Now read in the parameters
540                    elif line.startswith('#C_'):
541                        toks = line.split('=')
542                        p = re.compile('#C_([0-9]+)')
543                        m = p.search(toks[0])
544                        toks2 = toks[1].split('+-')
545                        i = int(m.group(1))
546                        self.out[i] = float(toks2[0])
547                       
548                        self.cov[i][i] = float(toks2[1])                       
549           
550            except:
551                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
552        else:
553            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
554       
555       
556   
557   
558if __name__ == "__main__":
559    o = Invertor()
560
561   
562   
563   
564   
Note: See TracBrowser for help on using the repository browser.