source: sasview/pr_inversion/invertor.py @ 1a5c946

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 1a5c946 was 1a5c946, checked in by Mathieu Doucet <doucetm@…>, 16 years ago

Fixed bug in error estimation

  • Property mode set to 100644
File size: 21.6 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8
9def help():
10    """
11        Provide general online help text
12        Future work: extend this function to allow topic selection
13    """
14    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
17    info_txt += "in the expansion is found by performing a least square fit with the "
18    info_txt += "following fit function:\n\n"
19    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
20    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22    info_txt += "The Reg_term term is a regularization term set to the second derivative "
23    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24    info_txt += "The following are user inputs:\n\n"
25    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
27    info_txt += "the regularization term.\n\n"
28    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
29     
30    return info_txt
31   
32
33class Invertor(Cinvertor):
34    """
35        Invertor class to perform P(r) inversion
36       
37        The problem is solved by posing the problem as  Ax = b,
38        where x is the set of coefficients we are looking for.
39       
40        Npts is the number of points.
41       
42        In the following i refers to the ith base function coefficient.
43        The matrix has its entries j in its first Npts rows set to
44            A[j][i] = (Fourier transformed base function for point j)
45           
46        We them choose a number of r-points, n_r, to evaluate the second
47        derivative of P(r) at. This is used as our regularization term.
48        For a vector r of length n_r, the following n_r rows are set to
49            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
50           
51        The vector b has its first Npts entries set to
52            b[j] = (I(q) observed for point j)
53           
54        The following n_r entries are set to zero.
55       
56        The result is found by using scipy.linalg.basic.lstsq to invert
57        the matrix and find the coefficients x.
58       
59        Methods inherited from Cinvertor:
60        - get_peaks(pars): returns the number of P(r) peaks
61        - oscillations(pars): returns the oscillation parameters for the output P(r)
62        - get_positive(pars): returns the fraction of P(r) that is above zero
63        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64    """
65    ## Chisqr of the last computation
66    chi2  = 0
67    ## Time elapsed for last computation
68    elapsed = 0
69    ## Alpha to get the reg term the same size as the signal
70    suggested_alpha = 0
71    ## Last number of base functions used
72    nfunc = 0
73    ## Last output values
74    out = None
75    ## Last errors on output values
76    cov = None
77   
78    def __init__(self):
79        Cinvertor.__init__(self)
80       
81    def __setattr__(self, name, value):
82        """
83            Set the value of an attribute.
84            Access the parent class methods for
85            x, y, err, d_max, q_min, q_max and alpha
86        """
87        if   name=='x':
88            if 0.0 in value:
89                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
90            return self.set_x(value)
91        elif name=='y':
92            return self.set_y(value)
93        elif name=='err':
94            return self.set_err(value)
95        elif name=='d_max':
96            return self.set_dmax(value)
97        elif name=='q_min':
98            if value==None:
99                return self.set_qmin(-1.0)
100            return self.set_qmin(value)
101        elif name=='q_max':
102            if value==None:
103                return self.set_qmax(-1.0)
104            return self.set_qmax(value)
105        elif name=='alpha':
106            return self.set_alpha(value)
107           
108        return Cinvertor.__setattr__(self, name, value)
109   
110    def __getattr__(self, name):
111        """
112           Return the value of an attribute
113           For the moment x, y, err and d_max are write-only
114           TODO: change that!
115        """
116        import numpy
117        if   name=='x':
118            out = numpy.ones(self.get_nx())
119            self.get_x(out)
120            return out
121        elif name=='y':
122            out = numpy.ones(self.get_ny())
123            self.get_y(out)
124            return out
125        elif name=='err':
126            out = numpy.ones(self.get_nerr())
127            self.get_err(out)
128            return out
129        elif name=='d_max':
130            return self.get_dmax()
131        elif name=='q_min':
132            qmin = self.get_qmin()
133            if qmin<0:
134                return None
135            return qmin
136        elif name=='q_max':
137            qmax = self.get_qmax()
138            if qmax<0:
139                return None
140            return qmax
141        elif name=='alpha':
142            return self.get_alpha()
143        elif name in self.__dict__:
144            return self.__dict__[name]
145        return None
146   
147    def clone(self):
148        """
149            Return a clone of this instance
150        """
151        invertor = Invertor()
152        invertor.chi2    = self.chi2
153        invertor.elapsed = self.elapsed
154        invertor.alpha   = self.alpha
155        invertor.d_max   = self.d_max
156        invertor.q_min   = self.q_min
157        invertor.q_max   = self.q_max
158       
159        invertor.x = self.x
160        invertor.y = self.y
161        invertor.err = self.err
162       
163        return invertor
164   
165    def invert(self, nfunc=10, nr=20):
166        """
167            Perform inversion to P(r)
168           
169            The problem is solved by posing the problem as  Ax = b,
170            where x is the set of coefficients we are looking for.
171           
172            Npts is the number of points.
173           
174            In the following i refers to the ith base function coefficient.
175            The matrix has its entries j in its first Npts rows set to
176                A[i][j] = (Fourier transformed base function for point j)
177               
178            We them choose a number of r-points, n_r, to evaluate the second
179            derivative of P(r) at. This is used as our regularization term.
180            For a vector r of length n_r, the following n_r rows are set to
181                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
182               
183            The vector b has its first Npts entries set to
184                b[j] = (I(q) observed for point j)
185               
186            The following n_r entries are set to zero.
187           
188            The result is found by using scipy.linalg.basic.lstsq to invert
189            the matrix and find the coefficients x.
190           
191            @param nfunc: number of base functions to use.
192            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
193            @return: c_out, c_cov - the coefficients with covariance matrix
194        """
195        #TODO: call the pyhton implementation for now. In the future, translate this to C.
196        return self.lstsq(nfunc, nr=nr)
197   
198    def invert_optimize(self, nfunc=10, nr=20):
199        """
200            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
201           
202            This probably produce more reliable results, but is much slower.
203            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
204            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
205            of P(r), d(P(r))/dr, integrated over the full range of r.
206           
207            @param nfunc: number of base functions to use.
208            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
209            @return: c_out, c_cov - the coefficients with covariance matrix
210        """
211       
212        from scipy import optimize
213        import time
214       
215        self.nfunc = nfunc
216        # First, check that the current data is valid
217        if self.is_valid()<=0:
218            raise RuntimeError, "Invertor.invert: Data array are of different length"
219       
220        p = numpy.ones(nfunc)
221        t_0 = time.time()
222        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
223       
224        # Compute chi^2
225        res = self.residuals(out)
226        chisqr = 0
227        for i in range(len(res)):
228            chisqr += res[i]
229       
230        self.chi2 = chisqr
231
232        # Store computation time
233        self.elapsed = time.time() - t_0
234       
235        return out, cov_x
236   
237    def pr_fit(self, nfunc=5):
238        """
239            This is a direct fit to a given P(r). It assumes that the y data
240            is set to some P(r) distribution that we are trying to reproduce
241            with a set of base functions.
242           
243            This method is provided as a test.
244        """
245        from scipy import optimize
246       
247        # First, check that the current data is valid
248        if self.is_valid()<=0:
249            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
250       
251        p = numpy.ones(nfunc)
252        t_0 = time.time()
253        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
254       
255        # Compute chi^2
256        res = self.pr_residuals(out)
257        chisqr = 0
258        for i in range(len(res)):
259            chisqr += res[i]
260       
261        self.chisqr = chisqr
262       
263        # Store computation time
264        self.elapsed = time.time() - t_0
265
266        return out, cov_x
267   
268    def pr_err(self, c, c_cov, r):
269        """   
270            Returns the value of P(r) for a given r, and base function
271            coefficients, with error.
272           
273            @param c: base function coefficients
274            @param c_cov: covariance matrice of the base function coefficients
275            @param r: r-value to evaluate P(r) at
276            @return: P(r)
277        """
278        return self.get_pr_err(c, c_cov, r)
279       
280    def _accept_q(self, q):
281        """
282            Check q-value against user-defined range
283        """
284        if not self.q_min==None and q<self.q_min:
285            return False
286        if not self.q_max==None and q>self.q_max:
287            return False
288        return True
289       
290    def lstsq(self, nfunc=5, nr=20):
291        #TODO: do this on the C side
292        #
293        # To make sure an array is contiguous:
294        # blah = numpy.ascontiguousarray(blah_original)
295        # ... before passing it to C
296        """
297            The problem is solved by posing the problem as  Ax = b,
298            where x is the set of coefficients we are looking for.
299           
300            Npts is the number of points.
301           
302            In the following i refers to the ith base function coefficient.
303            The matrix has its entries j in its first Npts rows set to
304                A[i][j] = (Fourier transformed base function for point j)
305               
306            We them choose a number of r-points, n_r, to evaluate the second
307            derivative of P(r) at. This is used as our regularization term.
308            For a vector r of length n_r, the following n_r rows are set to
309                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
310               
311            The vector b has its first Npts entries set to
312                b[j] = (I(q) observed for point j)
313               
314            The following n_r entries are set to zero.
315           
316            The result is found by using scipy.linalg.basic.lstsq to invert
317            the matrix and find the coefficients x.
318           
319            @param nfunc: number of base functions to use.
320            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
321        """
322        import math
323        from scipy.linalg.basic import lstsq
324       
325        self.nfunc = nfunc
326        # a -- An M x N matrix.
327        # b -- An M x nrhs matrix or M vector.
328        npts = len(self.x)
329        nq   = nr
330        sqrt_alpha = math.sqrt(self.alpha)
331       
332        a = numpy.zeros([npts+nq, nfunc])
333        b = numpy.zeros(npts+nq)
334        err = numpy.zeros(nfunc)
335       
336        for j in range(nfunc):
337            for i in range(npts):
338                if self._accept_q(self.x[i]):
339                    a[i][j] = self.basefunc_ft(self.d_max, j+1, self.x[i])/self.err[i]
340                   
341            #TODO: refactor this: i_q should really be i_r
342            for i_q in range(nq):
343                r = self.d_max/nq*i_q
344                #a[i_q+npts][j] = sqrt_alpha * 1.0/nq*self.d_max*2.0*math.fabs(math.sin(math.pi*(j+1)*r/self.d_max) + math.pi*(j+1)*r/self.d_max * math.cos(math.pi*(j+1)*r/self.d_max))     
345                a[i_q+npts][j] = sqrt_alpha * 1.0/nq*self.d_max*2.0*(2.0*math.pi*(j+1)/self.d_max*math.cos(math.pi*(j+1)*r/self.d_max) + math.pi**2*(j+1)**2*r/self.d_max**2 * math.sin(math.pi*(j+1)*r/self.d_max))     
346       
347        for i in range(npts):
348            if self._accept_q(self.x[i]):
349                b[i] = self.y[i]/self.err[i]
350           
351        c, chi2, rank, n = lstsq(a, b)
352        self.chi2 = chi2
353               
354        at = numpy.transpose(a)
355        inv_cov = numpy.zeros([nfunc,nfunc])
356        for i in range(nfunc):
357            for j in range(nfunc):
358                inv_cov[i][j] = 0.0
359                for k in range(npts+nr):
360                    if self._accept_q(self.x[i]):
361                        inv_cov[i][j] = at[i][k]*a[k][j]
362                   
363        # Compute the reg term size for the output
364        sum_sig = 0.0
365        sum_reg = 0.0
366        for j in range(nfunc):
367            for i in range(npts):
368                if self._accept_q(self.x[i]):
369                    sum_sig += (a[i][j])**2
370            for i in range(nq):
371                sum_reg += (a[i+npts][j])**2
372                   
373        if math.fabs(self.alpha)>0:
374            new_alpha = sum_sig/(sum_reg/self.alpha)
375        else:
376            new_alpha = 0.0
377        self.suggested_alpha = new_alpha
378       
379        try:
380            err = math.fabs(chi2/(npts-nfunc))* inv_cov
381        except:
382            print "Error estimating uncertainties"
383           
384        # Keep a copy of the last output
385        self.out = c
386        self.cov = err
387       
388        return c, err
389       
390    def estimate_alpha(self, nfunc):
391        """
392            Returns a reasonable guess for the
393            regularization constant alpha
394           
395            @return: alpha, message, elapsed
396           
397            where alpha is the estimate for alpha,
398            message is a message for the user,
399            elapsed is the computation time
400        """
401        import time
402        try:           
403            pr = self.clone()
404           
405            # T_0 for computation time
406            starttime = time.time()
407           
408            # If the current alpha is zero, try
409            # another value
410            if pr.alpha<=0:
411                pr.alpha = 0.0001
412                 
413            # Perform inversion to find the largest alpha
414            out, cov = pr.lstsq(nfunc)
415            elapsed = time.time()-starttime
416            initial_alpha = pr.alpha
417            initial_peaks = pr.get_peaks(out)
418   
419            # Try the inversion with the estimated alpha
420            pr.alpha = pr.suggested_alpha
421            out, cov = pr.lstsq(nfunc)
422   
423            npeaks = pr.get_peaks(out)
424            # if more than one peak to start with
425            # just return the estimate
426            if npeaks>1:
427                message = "Your P(r) is not smooth, please check your inversion parameters"
428                return pr.suggested_alpha, message, elapsed
429            else:
430               
431                # Look at smaller values
432                # We assume that for the suggested alpha, we have 1 peak
433                # if not, send a message to change parameters
434                alpha = pr.suggested_alpha
435                best_alpha = pr.suggested_alpha
436                found = False
437                for i in range(10):
438                    pr.alpha = (0.33)**(i+1)*alpha
439                    out, cov = pr.lstsq(nfunc)
440                   
441                    peaks = pr.get_peaks(out)
442                    if peaks>1:
443                        found = True
444                        break
445                    best_alpha = pr.alpha
446                   
447                # If we didn't find a turning point for alpha and
448                # the initial alpha already had only one peak,
449                # just return that
450                if not found and initial_peaks==1 and initial_alpha<best_alpha:
451                    best_alpha = initial_alpha
452                   
453                # Check whether the size makes sense
454                message=''
455               
456                if not found:
457                    message = "None"
458                elif best_alpha>=0.5*pr.suggested_alpha:
459                    # best alpha is too big, return a
460                    # reasonable value
461                    message  = "The estimated alpha for your system is too large. "
462                    message += "Try increasing your maximum distance."
463               
464                return best_alpha, message, elapsed
465   
466        except:
467            message = "Invertor.estimate_alpha: %s" % sys.exc_value
468            return 0, message, elapsed
469   
470       
471    def to_file(self, path, npts=100):
472        """
473            Save the state to a file that will be readable
474            by SliceView.
475            @param path: path of the file to write
476            @param npts: number of P(r) points to be written
477        """
478        import pylab
479       
480        file = open(path, 'w')
481        file.write("#d_max=%g\n" % self.d_max)
482        file.write("#nfunc=%g\n" % self.nfunc)
483        file.write("#alpha=%g\n" % self.alpha)
484        file.write("#chi2=%g\n" % self.chi2)
485        file.write("#elapsed=%g\n" % self.elapsed)
486        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
487        if not self.out==None:
488            if len(self.out)==len(self.cov):
489                for i in range(len(self.out)):
490                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
491        file.write("<r>  <Pr>  <dPr>\n")
492        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
493       
494        for r_i in r:
495            (value, err) = self.pr_err(self.out, self.cov, r_i)
496            file.write("%g  %g  %g\n" % (r_i, value, err))
497   
498        file.close()
499     
500       
501    def from_file(self, path):
502        """
503            Load the state of the Invertor from a file,
504            to be able to generate P(r) from a set of
505            parameters.
506            @param path: path of the file to load
507        """
508        import os
509        import re
510        if os.path.isfile(path):
511            try:
512                fd = open(path, 'r')
513               
514                buff    = fd.read()
515                lines   = buff.split('\n')
516                for line in lines:
517                    if line.startswith('#d_max='):
518                        toks = line.split('=')
519                        self.d_max = float(toks[1])
520                    elif line.startswith('#nfunc='):
521                        toks = line.split('=')
522                        self.nfunc = int(toks[1])
523                        self.out = numpy.zeros(self.nfunc)
524                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
525                    elif line.startswith('#alpha='):
526                        toks = line.split('=')
527                        self.alpha = float(toks[1])
528                    elif line.startswith('#chi2='):
529                        toks = line.split('=')
530                        self.chi2 = float(toks[1])
531                    elif line.startswith('#elapsed='):
532                        toks = line.split('=')
533                        self.elapsed = float(toks[1])
534                    elif line.startswith('#alpha_estimate='):
535                        toks = line.split('=')
536                        self.suggested_alpha = float(toks[1])
537           
538                    # Now read in the parameters
539                    elif line.startswith('#C_'):
540                        toks = line.split('=')
541                        p = re.compile('#C_([0-9]+)')
542                        m = p.search(toks[0])
543                        toks2 = toks[1].split('+-')
544                        i = int(m.group(1))
545                        self.out[i] = float(toks2[0])
546                       
547                        self.cov[i][i] = float(toks2[1])                       
548           
549            except:
550                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
551        else:
552            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
553       
554       
555   
556   
557if __name__ == "__main__":
558    o = Invertor()
559
560   
561   
562   
563   
Note: See TracBrowser for help on using the repository browser.