source: sasview/pr_inversion/invertor.py @ 7d3b2ba

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 7d3b2ba was b00b487, checked in by Mathieu Doucet <doucetm@…>, 16 years ago

Dealt with error conditions, fixed the uncertainty on the output.

  • Property mode set to 100644
File size: 22.3 KB
Line 
1"""
2    Module to perform P(r) inversion.
3    The module contains the Invertor class.
4"""
5from sans.pr.core.pr_inversion import Cinvertor
6import numpy
7import sys
8
9def help():
10    """
11        Provide general online help text
12        Future work: extend this function to allow topic selection
13    """
14    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
17    info_txt += "in the expansion is found by performing a least square fit with the "
18    info_txt += "following fit function:\n\n"
19    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
20    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22    info_txt += "The Reg_term term is a regularization term set to the second derivative "
23    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24    info_txt += "The following are user inputs:\n\n"
25    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
27    info_txt += "the regularization term.\n\n"
28    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
29     
30    return info_txt
31   
32
33class Invertor(Cinvertor):
34    """
35        Invertor class to perform P(r) inversion
36       
37        The problem is solved by posing the problem as  Ax = b,
38        where x is the set of coefficients we are looking for.
39       
40        Npts is the number of points.
41       
42        In the following i refers to the ith base function coefficient.
43        The matrix has its entries j in its first Npts rows set to
44            A[j][i] = (Fourier transformed base function for point j)
45           
46        We them choose a number of r-points, n_r, to evaluate the second
47        derivative of P(r) at. This is used as our regularization term.
48        For a vector r of length n_r, the following n_r rows are set to
49            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
50           
51        The vector b has its first Npts entries set to
52            b[j] = (I(q) observed for point j)
53           
54        The following n_r entries are set to zero.
55       
56        The result is found by using scipy.linalg.basic.lstsq to invert
57        the matrix and find the coefficients x.
58       
59        Methods inherited from Cinvertor:
60        - get_peaks(pars): returns the number of P(r) peaks
61        - oscillations(pars): returns the oscillation parameters for the output P(r)
62        - get_positive(pars): returns the fraction of P(r) that is above zero
63        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64    """
65    ## Chisqr of the last computation
66    chi2  = 0
67    ## Time elapsed for last computation
68    elapsed = 0
69    ## Alpha to get the reg term the same size as the signal
70    suggested_alpha = 0
71    ## Last number of base functions used
72    nfunc = 0
73    ## Last output values
74    out = None
75    ## Last errors on output values
76    cov = None
77   
78    def __init__(self):
79        Cinvertor.__init__(self)
80       
81    def __setattr__(self, name, value):
82        """
83            Set the value of an attribute.
84            Access the parent class methods for
85            x, y, err, d_max, q_min, q_max and alpha
86        """
87        if   name=='x':
88            if 0.0 in value:
89                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
90            return self.set_x(value)
91        elif name=='y':
92            return self.set_y(value)
93        elif name=='err':
94            value2 = abs(value)
95            return self.set_err(value2)
96        elif name=='d_max':
97            return self.set_dmax(value)
98        elif name=='q_min':
99            if value==None:
100                return self.set_qmin(-1.0)
101            return self.set_qmin(value)
102        elif name=='q_max':
103            if value==None:
104                return self.set_qmax(-1.0)
105            return self.set_qmax(value)
106        elif name=='alpha':
107            return self.set_alpha(value)
108           
109        return Cinvertor.__setattr__(self, name, value)
110   
111    def __getattr__(self, name):
112        """
113           Return the value of an attribute
114           For the moment x, y, err and d_max are write-only
115           TODO: change that!
116        """
117        import numpy
118        if   name=='x':
119            out = numpy.ones(self.get_nx())
120            self.get_x(out)
121            return out
122        elif name=='y':
123            out = numpy.ones(self.get_ny())
124            self.get_y(out)
125            return out
126        elif name=='err':
127            out = numpy.ones(self.get_nerr())
128            self.get_err(out)
129            return out
130        elif name=='d_max':
131            return self.get_dmax()
132        elif name=='q_min':
133            qmin = self.get_qmin()
134            if qmin<0:
135                return None
136            return qmin
137        elif name=='q_max':
138            qmax = self.get_qmax()
139            if qmax<0:
140                return None
141            return qmax
142        elif name=='alpha':
143            return self.get_alpha()
144        elif name in self.__dict__:
145            return self.__dict__[name]
146        return None
147   
148    def clone(self):
149        """
150            Return a clone of this instance
151        """
152        invertor = Invertor()
153        invertor.chi2    = self.chi2
154        invertor.elapsed = self.elapsed
155        invertor.alpha   = self.alpha
156        invertor.d_max   = self.d_max
157        invertor.q_min   = self.q_min
158        invertor.q_max   = self.q_max
159       
160        invertor.x = self.x
161        invertor.y = self.y
162        invertor.err = self.err
163       
164        return invertor
165   
166    def invert(self, nfunc=10, nr=20):
167        """
168            Perform inversion to P(r)
169           
170            The problem is solved by posing the problem as  Ax = b,
171            where x is the set of coefficients we are looking for.
172           
173            Npts is the number of points.
174           
175            In the following i refers to the ith base function coefficient.
176            The matrix has its entries j in its first Npts rows set to
177                A[i][j] = (Fourier transformed base function for point j)
178               
179            We them choose a number of r-points, n_r, to evaluate the second
180            derivative of P(r) at. This is used as our regularization term.
181            For a vector r of length n_r, the following n_r rows are set to
182                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
183               
184            The vector b has its first Npts entries set to
185                b[j] = (I(q) observed for point j)
186               
187            The following n_r entries are set to zero.
188           
189            The result is found by using scipy.linalg.basic.lstsq to invert
190            the matrix and find the coefficients x.
191           
192            @param nfunc: number of base functions to use.
193            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
194            @return: c_out, c_cov - the coefficients with covariance matrix
195        """
196        #TODO: call the pyhton implementation for now. In the future, translate this to C.
197        return self.lstsq(nfunc, nr=nr)
198   
199    def invert_optimize(self, nfunc=10, nr=20):
200        """
201            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
202           
203            This probably produce more reliable results, but is much slower.
204            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
205            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
206            of P(r), d(P(r))/dr, integrated over the full range of r.
207           
208            @param nfunc: number of base functions to use.
209            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
210            @return: c_out, c_cov - the coefficients with covariance matrix
211        """
212       
213        from scipy import optimize
214        import time
215       
216        self.nfunc = nfunc
217        # First, check that the current data is valid
218        if self.is_valid()<=0:
219            raise RuntimeError, "Invertor.invert: Data array are of different length"
220       
221        p = numpy.ones(nfunc)
222        t_0 = time.time()
223        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
224       
225        # Compute chi^2
226        res = self.residuals(out)
227        chisqr = 0
228        for i in range(len(res)):
229            chisqr += res[i]
230       
231        self.chi2 = chisqr
232
233        # Store computation time
234        self.elapsed = time.time() - t_0
235       
236        return out, cov_x
237   
238    def pr_fit(self, nfunc=5):
239        """
240            This is a direct fit to a given P(r). It assumes that the y data
241            is set to some P(r) distribution that we are trying to reproduce
242            with a set of base functions.
243           
244            This method is provided as a test.
245        """
246        from scipy import optimize
247       
248        # First, check that the current data is valid
249        if self.is_valid()<=0:
250            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
251       
252        p = numpy.ones(nfunc)
253        t_0 = time.time()
254        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
255       
256        # Compute chi^2
257        res = self.pr_residuals(out)
258        chisqr = 0
259        for i in range(len(res)):
260            chisqr += res[i]
261       
262        self.chisqr = chisqr
263       
264        # Store computation time
265        self.elapsed = time.time() - t_0
266
267        return out, cov_x
268   
269    def pr_err(self, c, c_cov, r):
270        """   
271            Returns the value of P(r) for a given r, and base function
272            coefficients, with error.
273           
274            @param c: base function coefficients
275            @param c_cov: covariance matrice of the base function coefficients
276            @param r: r-value to evaluate P(r) at
277            @return: P(r)
278        """
279        return self.get_pr_err(c, c_cov, r)
280       
281    def _accept_q(self, q):
282        """
283            Check q-value against user-defined range
284        """
285        if not self.q_min==None and q<self.q_min:
286            return False
287        if not self.q_max==None and q>self.q_max:
288            return False
289        return True
290       
291    def lstsq(self, nfunc=5, nr=20):
292        #TODO: do this on the C side
293        #
294        # To make sure an array is contiguous:
295        # blah = numpy.ascontiguousarray(blah_original)
296        # ... before passing it to C
297        """
298            The problem is solved by posing the problem as  Ax = b,
299            where x is the set of coefficients we are looking for.
300           
301            Npts is the number of points.
302           
303            In the following i refers to the ith base function coefficient.
304            The matrix has its entries j in its first Npts rows set to
305                A[i][j] = (Fourier transformed base function for point j)
306               
307            We them choose a number of r-points, n_r, to evaluate the second
308            derivative of P(r) at. This is used as our regularization term.
309            For a vector r of length n_r, the following n_r rows are set to
310                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
311               
312            The vector b has its first Npts entries set to
313                b[j] = (I(q) observed for point j)
314               
315            The following n_r entries are set to zero.
316           
317            The result is found by using scipy.linalg.basic.lstsq to invert
318            the matrix and find the coefficients x.
319           
320            @param nfunc: number of base functions to use.
321            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
322
323            If the result does not allow us to compute the covariance matrix,
324            a matrix filled with zeros will be returned.
325
326        """
327        import math
328        from scipy.linalg.basic import lstsq
329       
330        if self.is_valid()<0:
331            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
332       
333        self.nfunc = nfunc
334        # a -- An M x N matrix.
335        # b -- An M x nrhs matrix or M vector.
336        npts = len(self.x)
337        nq   = nr
338        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
339        if sqrt_alpha<0.0:
340            nq = 0
341       
342        a = numpy.zeros([npts+nq, nfunc])
343        b = numpy.zeros(npts+nq)
344        err = numpy.zeros([nfunc, nfunc])
345       
346        for j in range(nfunc):
347            for i in range(npts):
348                if self._accept_q(self.x[i]):
349                    a[i][j] = self.basefunc_ft(self.d_max, j+1, self.x[i])/self.err[i]
350                   
351            #TODO: refactor this: i_q should really be i_r
352            for i_q in range(nq):
353                r = self.d_max/nq*i_q
354                #a[i_q+npts][j] = sqrt_alpha * 1.0/nq*self.d_max*2.0*math.fabs(math.sin(math.pi*(j+1)*r/self.d_max) + math.pi*(j+1)*r/self.d_max * math.cos(math.pi*(j+1)*r/self.d_max))     
355                a[i_q+npts][j] = sqrt_alpha * 1.0/nq*self.d_max*2.0*(2.0*math.pi*(j+1)/self.d_max*math.cos(math.pi*(j+1)*r/self.d_max) + math.pi**2*(j+1)**2*r/self.d_max**2 * math.sin(math.pi*(j+1)*r/self.d_max))     
356       
357        for i in range(npts):
358            if self._accept_q(self.x[i]):
359                b[i] = self.y[i]/self.err[i]
360           
361        c, chi2, rank, n = lstsq(a, b)
362        # Sanity check
363        try:
364            float(chi2)
365        except:
366            chi2 = -1.0
367        self.chi2 = chi2
368               
369        at = numpy.transpose(a)
370        inv_cov = numpy.zeros([nfunc,nfunc])
371        for i in range(nfunc):
372            for j in range(nfunc):
373                inv_cov[i][j] = 0.0
374                for k in range(npts+nr):
375                    #if self._accept_q(self.x[i]):
376                    inv_cov[i][j] += at[i][k]*a[k][j]
377                   
378        # Compute the reg term size for the output
379        sum_sig = 0.0
380        sum_reg = 0.0
381        for j in range(nfunc):
382            for i in range(npts):
383                if self._accept_q(self.x[i]):
384                    sum_sig += (a[i][j])**2
385            for i in range(nq):
386                sum_reg += (a[i+npts][j])**2
387                   
388        if math.fabs(self.alpha)>0:
389            new_alpha = sum_sig/(sum_reg/self.alpha)
390        else:
391            new_alpha = 0.0
392        self.suggested_alpha = new_alpha
393       
394        try:
395            cov = numpy.linalg.pinv(inv_cov)
396            err = math.fabs(chi2/float(npts-nfunc)) * cov
397        except:
398            # We were not able to estimate the errors,
399            # returns an empty covariance matrix
400            print "lstsq:", sys.exc_value
401            print chi2
402            pass
403           
404        # Keep a copy of the last output
405        self.out = c
406        self.cov = err
407       
408        return c, err
409       
410    def estimate_alpha(self, nfunc):
411        """
412            Returns a reasonable guess for the
413            regularization constant alpha
414           
415            @return: alpha, message, elapsed
416           
417            where alpha is the estimate for alpha,
418            message is a message for the user,
419            elapsed is the computation time
420        """
421        import time
422        try:           
423            pr = self.clone()
424           
425            # T_0 for computation time
426            starttime = time.time()
427            elapsed = 0
428           
429            # If the current alpha is zero, try
430            # another value
431            if pr.alpha<=0:
432                pr.alpha = 0.0001
433                 
434            # Perform inversion to find the largest alpha
435            out, cov = pr.lstsq(nfunc)
436            elapsed = time.time()-starttime
437            initial_alpha = pr.alpha
438            initial_peaks = pr.get_peaks(out)
439   
440            # Try the inversion with the estimated alpha
441            pr.alpha = pr.suggested_alpha
442            out, cov = pr.lstsq(nfunc)
443   
444            npeaks = pr.get_peaks(out)
445            # if more than one peak to start with
446            # just return the estimate
447            if npeaks>1:
448                message = "Your P(r) is not smooth, please check your inversion parameters"
449                return pr.suggested_alpha, message, elapsed
450            else:
451               
452                # Look at smaller values
453                # We assume that for the suggested alpha, we have 1 peak
454                # if not, send a message to change parameters
455                alpha = pr.suggested_alpha
456                best_alpha = pr.suggested_alpha
457                found = False
458                for i in range(10):
459                    pr.alpha = (0.33)**(i+1)*alpha
460                    out, cov = pr.lstsq(nfunc)
461                   
462                    peaks = pr.get_peaks(out)
463                    if peaks>1:
464                        found = True
465                        break
466                    best_alpha = pr.alpha
467                   
468                # If we didn't find a turning point for alpha and
469                # the initial alpha already had only one peak,
470                # just return that
471                if not found and initial_peaks==1 and initial_alpha<best_alpha:
472                    best_alpha = initial_alpha
473                   
474                # Check whether the size makes sense
475                message=''
476               
477                if not found:
478                    message = "None"
479                elif best_alpha>=0.5*pr.suggested_alpha:
480                    # best alpha is too big, return a
481                    # reasonable value
482                    message  = "The estimated alpha for your system is too large. "
483                    message += "Try increasing your maximum distance."
484               
485                return best_alpha, message, elapsed
486   
487        except:
488            message = "Invertor.estimate_alpha: %s" % sys.exc_value
489            return 0, message, elapsed
490   
491       
492    def to_file(self, path, npts=100):
493        """
494            Save the state to a file that will be readable
495            by SliceView.
496            @param path: path of the file to write
497            @param npts: number of P(r) points to be written
498        """
499        import pylab
500       
501        file = open(path, 'w')
502        file.write("#d_max=%g\n" % self.d_max)
503        file.write("#nfunc=%g\n" % self.nfunc)
504        file.write("#alpha=%g\n" % self.alpha)
505        file.write("#chi2=%g\n" % self.chi2)
506        file.write("#elapsed=%g\n" % self.elapsed)
507        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
508        if not self.out==None:
509            if len(self.out)==len(self.cov):
510                for i in range(len(self.out)):
511                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
512        file.write("<r>  <Pr>  <dPr>\n")
513        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
514       
515        for r_i in r:
516            (value, err) = self.pr_err(self.out, self.cov, r_i)
517            file.write("%g  %g  %g\n" % (r_i, value, err))
518   
519        file.close()
520     
521       
522    def from_file(self, path):
523        """
524            Load the state of the Invertor from a file,
525            to be able to generate P(r) from a set of
526            parameters.
527            @param path: path of the file to load
528        """
529        import os
530        import re
531        if os.path.isfile(path):
532            try:
533                fd = open(path, 'r')
534               
535                buff    = fd.read()
536                lines   = buff.split('\n')
537                for line in lines:
538                    if line.startswith('#d_max='):
539                        toks = line.split('=')
540                        self.d_max = float(toks[1])
541                    elif line.startswith('#nfunc='):
542                        toks = line.split('=')
543                        self.nfunc = int(toks[1])
544                        self.out = numpy.zeros(self.nfunc)
545                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
546                    elif line.startswith('#alpha='):
547                        toks = line.split('=')
548                        self.alpha = float(toks[1])
549                    elif line.startswith('#chi2='):
550                        toks = line.split('=')
551                        self.chi2 = float(toks[1])
552                    elif line.startswith('#elapsed='):
553                        toks = line.split('=')
554                        self.elapsed = float(toks[1])
555                    elif line.startswith('#alpha_estimate='):
556                        toks = line.split('=')
557                        self.suggested_alpha = float(toks[1])
558           
559                    # Now read in the parameters
560                    elif line.startswith('#C_'):
561                        toks = line.split('=')
562                        p = re.compile('#C_([0-9]+)')
563                        m = p.search(toks[0])
564                        toks2 = toks[1].split('+-')
565                        i = int(m.group(1))
566                        self.out[i] = float(toks2[0])
567                       
568                        self.cov[i][i] = float(toks2[1])                       
569           
570            except:
571                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
572        else:
573            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
574       
575       
576   
577   
578if __name__ == "__main__":
579    o = Invertor()
580
581   
582   
583   
584   
Note: See TracBrowser for help on using the repository browser.