source: sasview/pr_inversion/invertor.py @ eb8474f

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since eb8474f was 1db4a53, checked in by Gervaise Alina <gervyh@…>, 14 years ago

working on pylint

  • Property mode set to 100644
File size: 26.2 KB
Line 
1"""
2Module to perform P(r) inversion.
3The module contains the Invertor class.
4"""
5
6import numpy
7import sys
8import math
9import time
10import copy
11import os
12import re
13from numpy.linalg import lstsq
14from scipy import optimize
15from sans.pr.core.pr_inversion import Cinvertor
16
17def help():
18    """
19    Provide general online help text
20    Future work: extend this function to allow topic selection
21    """
22    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. " 
23    info_txt += "(1980) 13, 168-175.\n\n"
24    info_txt += "P(r) is set to be equal to an expansion of base functions "
25    info_txt += "of the type "
26    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each "
27    info_txt += "base functions "
28    info_txt += "in the expansion is found by performing a least square fit "
29    info_txt += "with the "
30    info_txt += "following fit function:\n\n"
31    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 +"
32    info_txt += "Reg_term\n\n"
33    info_txt += "where I_meas(q) is the measured scattering intensity and "
34    info_txt += "I_th(q) is "
35    info_txt += "the prediction from the Fourier transform of the P(r) "
36    info_txt += "expansion. "
37    info_txt += "The Reg_term term is a regularization term set to the second"
38    info_txt += " derivative "
39    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce "
40    info_txt += "a smooth P(r) output.\n\n"
41    info_txt += "The following are user inputs:\n\n"
42    info_txt += "   - Number of terms: the number of base functions in the P(r)"
43    info_txt += " expansion.\n\n"
44    info_txt += "   - Regularization constant: a multiplicative constant "
45    info_txt += "to set the size of "
46    info_txt += "the regularization term.\n\n"
47    info_txt += "   - Maximum distance: the maximum distance between any "
48    info_txt += "two points in the system.\n"
49     
50    return info_txt
51   
52
53class Invertor(Cinvertor):
54    """
55    Invertor class to perform P(r) inversion
56   
57    The problem is solved by posing the problem as  Ax = b,
58    where x is the set of coefficients we are looking for.
59   
60    Npts is the number of points.
61   
62    In the following i refers to the ith base function coefficient.
63    The matrix has its entries j in its first Npts rows set to
64        A[j][i] = (Fourier transformed base function for point j)
65       
66    We them choose a number of r-points, n_r, to evaluate the second
67    derivative of P(r) at. This is used as our regularization term.
68    For a vector r of length n_r, the following n_r rows are set to
69        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
70        evaluated at r[j])
71       
72    The vector b has its first Npts entries set to
73        b[j] = (I(q) observed for point j)
74       
75    The following n_r entries are set to zero.
76   
77    The result is found by using scipy.linalg.basic.lstsq to invert
78    the matrix and find the coefficients x.
79   
80    Methods inherited from Cinvertor:
81    - get_peaks(pars): returns the number of P(r) peaks
82    - oscillations(pars): returns the oscillation parameters for the output P(r)
83    - get_positive(pars): returns the fraction of P(r) that is above zero
84    - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
85    """
86    ## Chisqr of the last computation
87    chi2  = 0
88    ## Time elapsed for last computation
89    elapsed = 0
90    ## Alpha to get the reg term the same size as the signal
91    suggested_alpha = 0
92    ## Last number of base functions used
93    nfunc = 10
94    ## Last output values
95    out = None
96    ## Last errors on output values
97    cov = None
98    ## Background value
99    background = 0
100    ## Information dictionary for application use
101    info = {}
102   
103   
104    def __init__(self):
105        Cinvertor.__init__(self)
106       
107    def __setattr__(self, name, value):
108        """
109        Set the value of an attribute.
110        Access the parent class methods for
111        x, y, err, d_max, q_min, q_max and alpha
112        """
113        if   name == 'x':
114            if 0.0 in value:
115                msg = "Invertor: one of your q-values is zero. "
116                msg += "Delete that entry before proceeding"
117                raise ValueError, msg
118            return self.set_x(value)
119        elif name == 'y':
120            return self.set_y(value)
121        elif name == 'err':
122            value2 = abs(value)
123            return self.set_err(value2)
124        elif name == 'd_max':
125            return self.set_dmax(value)
126        elif name == 'q_min':
127            if value == None:
128                return self.set_qmin(-1.0)
129            return self.set_qmin(value)
130        elif name == 'q_max':
131            if value == None:
132                return self.set_qmax(-1.0)
133            return self.set_qmax(value)
134        elif name == 'alpha':
135            return self.set_alpha(value)
136        elif name == 'slit_height':
137            return self.set_slit_height(value)
138        elif name == 'slit_width':
139            return self.set_slit_width(value)
140        elif name == 'has_bck':
141            if value == True:
142                return self.set_has_bck(1)
143            elif value == False:
144                return self.set_has_bck(0)
145            else:
146                raise ValueError, "Invertor: has_bck can only be True or False"
147           
148        return Cinvertor.__setattr__(self, name, value)
149   
150    def __getattr__(self, name):
151        """
152        Return the value of an attribute
153        """
154        #import numpy
155        if name == 'x':
156            out = numpy.ones(self.get_nx())
157            self.get_x(out)
158            return out
159        elif name == 'y':
160            out = numpy.ones(self.get_ny())
161            self.get_y(out)
162            return out
163        elif name == 'err':
164            out = numpy.ones(self.get_nerr())
165            self.get_err(out)
166            return out
167        elif name == 'd_max':
168            return self.get_dmax()
169        elif name == 'q_min':
170            qmin = self.get_qmin()
171            if qmin < 0:
172                return None
173            return qmin
174        elif name == 'q_max':
175            qmax = self.get_qmax()
176            if qmax < 0:
177                return None
178            return qmax
179        elif name == 'alpha':
180            return self.get_alpha()
181        elif name == 'slit_height':
182            return self.get_slit_height()
183        elif name == 'slit_width':
184            return self.get_slit_width()
185        elif name == 'has_bck':
186            value = self.get_has_bck()
187            if value == 1:
188                return True
189            else:
190                return False
191        elif name in self.__dict__:
192            return self.__dict__[name]
193        return None
194   
195    def clone(self):
196        """
197        Return a clone of this instance
198        """
199        #import copy
200       
201        invertor = Invertor()
202        invertor.chi2    = self.chi2
203        invertor.elapsed = self.elapsed
204        invertor.nfunc   = self.nfunc
205        invertor.alpha   = self.alpha
206        invertor.d_max   = self.d_max
207        invertor.q_min   = self.q_min
208        invertor.q_max   = self.q_max
209       
210        invertor.x = self.x
211        invertor.y = self.y
212        invertor.err = self.err
213        invertor.has_bck = self.has_bck
214        invertor.slit_height = self.slit_height
215        invertor.slit_width  = self.slit_width
216       
217        invertor.info = copy.deepcopy(self.info)
218       
219        return invertor
220   
221    def invert(self, nfunc=10, nr=20):
222        """
223        Perform inversion to P(r)
224       
225        The problem is solved by posing the problem as  Ax = b,
226        where x is the set of coefficients we are looking for.
227       
228        Npts is the number of points.
229       
230        In the following i refers to the ith base function coefficient.
231        The matrix has its entries j in its first Npts rows set to
232            A[i][j] = (Fourier transformed base function for point j)
233           
234        We them choose a number of r-points, n_r, to evaluate the second
235        derivative of P(r) at. This is used as our regularization term.
236        For a vector r of length n_r, the following n_r rows are set to
237            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
238           
239        The vector b has its first Npts entries set to
240            b[j] = (I(q) observed for point j)
241           
242        The following n_r entries are set to zero.
243       
244        The result is found by using scipy.linalg.basic.lstsq to invert
245        the matrix and find the coefficients x.
246       
247        :param nfunc: number of base functions to use.
248        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
249        :return: c_out, c_cov - the coefficients with covariance matrix
250       
251        """
252        # Reset the background value before proceeding
253        self.background = 0.0
254        return self.lstsq(nfunc, nr=nr)
255   
256    def iq(self, out, q):
257        """
258        Function to call to evaluate the scattering intensity
259       
260        :param args: c-parameters, and q
261        :return: I(q)
262       
263        """
264        return Cinvertor.iq(self, out, q) + self.background
265   
266    def invert_optimize(self, nfunc=10, nr=20):
267        """
268        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
269       
270        This probably produce more reliable results, but is much slower.
271        The minimization function is set to
272        sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
273        where the reg_term is given by Svergun: it is the integral of
274        the square of the first derivative
275        of P(r), d(P(r))/dr, integrated over the full range of r.
276       
277        :param nfunc: number of base functions to use.
278        :param nr: number of r points to evaluate the 2nd derivative at
279            for the reg. term.
280       
281        :return: c_out, c_cov - the coefficients with covariance matrix
282       
283        """
284       
285        #from scipy import optimize
286        #import time
287       
288        self.nfunc = nfunc
289        # First, check that the current data is valid
290        if self.is_valid() <= 0:
291            msg = "Invertor.invert: Data array are of different length"
292            raise RuntimeError, msg
293       
294        p = numpy.ones(nfunc)
295        t_0 = time.time()
296        out, cov_x, _, _, _ = optimize.leastsq(self.residuals,
297                                                            p, full_output=1,
298                                                             warning=True)
299       
300        # Compute chi^2
301        res = self.residuals(out)
302        chisqr = 0
303        for i in range(len(res)):
304            chisqr += res[i]
305       
306        self.chi2 = chisqr
307
308        # Store computation time
309        self.elapsed = time.time() - t_0
310       
311        return out, cov_x
312   
313    def pr_fit(self, nfunc=5):
314        """
315        This is a direct fit to a given P(r). It assumes that the y data
316        is set to some P(r) distribution that we are trying to reproduce
317        with a set of base functions.
318       
319        This method is provided as a test.
320        """
321        #from scipy import optimize
322       
323        # First, check that the current data is valid
324        if self.is_valid() <= 0:
325            msg = "Invertor.invert: Data arrays are of different length"
326            raise RuntimeError, msg
327       
328        p = numpy.ones(nfunc)
329        t_0 = time.time()
330        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p,
331                                                            full_output=1,
332                                                            warning=True)
333       
334        # Compute chi^2
335        res = self.pr_residuals(out)
336        chisqr = 0
337        for i in range(len(res)):
338            chisqr += res[i]
339       
340        self.chisqr = chisqr
341       
342        # Store computation time
343        self.elapsed = time.time() - t_0
344
345        return out, cov_x
346   
347    def pr_err(self, c, c_cov, r):
348        """   
349        Returns the value of P(r) for a given r, and base function
350        coefficients, with error.
351       
352        :param c: base function coefficients
353        :param c_cov: covariance matrice of the base function coefficients
354        :param r: r-value to evaluate P(r) at
355       
356        :return: P(r)
357       
358        """
359        return self.get_pr_err(c, c_cov, r)
360       
361    def _accept_q(self, q):
362        """
363        Check q-value against user-defined range
364        """
365        if not self.q_min == None and q < self.q_min:
366            return False
367        if not self.q_max == None and q > self.q_max:
368            return False
369        return True
370       
371    def lstsq(self, nfunc=5, nr=20):
372        """
373        The problem is solved by posing the problem as  Ax = b,
374        where x is the set of coefficients we are looking for.
375       
376        Npts is the number of points.
377       
378        In the following i refers to the ith base function coefficient.
379        The matrix has its entries j in its first Npts rows set to
380            A[i][j] = (Fourier transformed base function for point j)
381           
382        We them choose a number of r-points, n_r, to evaluate the second
383        derivative of P(r) at. This is used as our regularization term.
384        For a vector r of length n_r, the following n_r rows are set to
385            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
386            evaluated at r[j])
387           
388        The vector b has its first Npts entries set to
389            b[j] = (I(q) observed for point j)
390           
391        The following n_r entries are set to zero.
392       
393        The result is found by using scipy.linalg.basic.lstsq to invert
394        the matrix and find the coefficients x.
395       
396        :param nfunc: number of base functions to use.
397        :param nr: number of r points to evaluate the 2nd derivative at
398            for the reg. term.
399
400        If the result does not allow us to compute the covariance matrix,
401        a matrix filled with zeros will be returned.
402
403        """
404        # Note: To make sure an array is contiguous:
405        # blah = numpy.ascontiguousarray(blah_original)
406        # ... before passing it to C
407       
408        if self.is_valid() < 0:
409            msg = "Invertor: invalid data; incompatible data lengths."
410            raise RuntimeError, msg
411       
412        self.nfunc = nfunc
413        # a -- An M x N matrix.
414        # b -- An M x nrhs matrix or M vector.
415        npts = len(self.x)
416        nq   = nr
417        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
418        if sqrt_alpha < 0.0:
419            nq = 0
420
421        # If we need to fit the background, add a term
422        if self.has_bck == True:
423            nfunc_0 = nfunc
424            nfunc += 1
425
426        a = numpy.zeros([npts+nq, nfunc])
427        b = numpy.zeros(npts+nq)
428        err = numpy.zeros([nfunc, nfunc])
429       
430        # Construct the a matrix and b vector that represent the problem
431        t_0 = time.time()
432        self._get_matrix(nfunc, nq, a, b)
433             
434        # Perform the inversion (least square fit)
435        c, chi2, rank, n = lstsq(a, b)
436        # Sanity check
437        try:
438            float(chi2)
439        except:
440            chi2 = -1.0
441        self.chi2 = chi2
442               
443        inv_cov = numpy.zeros([nfunc,nfunc])
444        # Get the covariance matrix, defined as inv_cov = a_transposed * a
445        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
446                   
447        # Compute the reg term size for the output
448        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
449                   
450        if math.fabs(self.alpha) > 0:
451            new_alpha = sum_sig/(sum_reg/self.alpha)
452        else:
453            new_alpha = 0.0
454        self.suggested_alpha = new_alpha
455       
456        try:
457            cov = numpy.linalg.pinv(inv_cov)
458            err = math.fabs(chi2/float(npts-nfunc)) * cov
459        except:
460            # We were not able to estimate the errors
461            # Return an empty error matrix
462            pass
463           
464        # Keep a copy of the last output
465        if self.has_bck == False:
466            self.background = 0
467            self.out = c
468            self.cov = err
469        else:
470            self.background = c[0]
471           
472            err_0 = numpy.zeros([nfunc, nfunc])
473            c_0 = numpy.zeros(nfunc)
474           
475            for i in range(nfunc_0):
476                c_0[i] = c[i+1]
477                for j in range(nfunc_0):
478                    err_0[i][j] = err[i+1][j+1]
479                   
480            self.out = c_0
481            self.cov = err_0
482           
483        return self.out, self.cov
484       
485    def estimate_numterms(self, isquit_func=None):
486        """
487        Returns a reasonable guess for the
488        number of terms
489       
490        :param isquit_func: reference to thread function to call to
491                            check whether the computation needs to
492                            be stopped.
493       
494        :return: number of terms, alpha, message
495       
496        """
497        from num_term import Num_terms
498        estimator = Num_terms(self.clone())
499        try:
500            return estimator.num_terms(isquit_func)
501        except:
502            # If we fail, estimate alpha and return the default
503            # number of terms
504            best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
505            return self.nfunc, best_alpha, "Could not estimate number of terms"
506                   
507    def estimate_alpha(self, nfunc):
508        """
509        Returns a reasonable guess for the
510        regularization constant alpha
511       
512        :param nfunc: number of terms to use in the expansion.
513       
514        :return: alpha, message, elapsed
515       
516        where alpha is the estimate for alpha,
517        message is a message for the user,
518        elapsed is the computation time
519        """
520        #import time
521        try:           
522            pr = self.clone()
523           
524            # T_0 for computation time
525            starttime = time.time()
526            elapsed = 0
527           
528            # If the current alpha is zero, try
529            # another value
530            if pr.alpha <= 0:
531                pr.alpha = 0.0001
532                 
533            # Perform inversion to find the largest alpha
534            out, cov = pr.invert(nfunc)
535            elapsed = time.time() - starttime
536            initial_alpha = pr.alpha
537            initial_peaks = pr.get_peaks(out)
538   
539            # Try the inversion with the estimated alpha
540            pr.alpha = pr.suggested_alpha
541            out, cov = pr.invert(nfunc)
542   
543            npeaks = pr.get_peaks(out)
544            # if more than one peak to start with
545            # just return the estimate
546            if npeaks > 1:
547                #message = "Your P(r) is not smooth,
548                #please check your inversion parameters"
549                message = None
550                return pr.suggested_alpha, message, elapsed
551            else:
552               
553                # Look at smaller values
554                # We assume that for the suggested alpha, we have 1 peak
555                # if not, send a message to change parameters
556                alpha = pr.suggested_alpha
557                best_alpha = pr.suggested_alpha
558                found = False
559                for i in range(10):
560                    pr.alpha = (0.33)**(i+1) * alpha
561                    out, cov = pr.invert(nfunc)
562                   
563                    peaks = pr.get_peaks(out)
564                    if peaks > 1:
565                        found = True
566                        break
567                    best_alpha = pr.alpha
568                   
569                # If we didn't find a turning point for alpha and
570                # the initial alpha already had only one peak,
571                # just return that
572                if not found and initial_peaks == 1 and \
573                    initial_alpha<best_alpha:
574                    best_alpha = initial_alpha
575                   
576                # Check whether the size makes sense
577                message = ''
578               
579                if not found:
580                    message = None
581                elif best_alpha >= 0.5 * pr.suggested_alpha:
582                    # best alpha is too big, return a
583                    # reasonable value
584                    message  = "The estimated alpha for your system is too "
585                    messsage += "large. "
586                    message += "Try increasing your maximum distance."
587               
588                return best_alpha, message, elapsed
589   
590        except:
591            message = "Invertor.estimate_alpha: %s" % sys.exc_value
592            return 0, message, elapsed
593   
594       
595    def to_file(self, path, npts=100):
596        """
597        Save the state to a file that will be readable
598        by SliceView.
599       
600        :param path: path of the file to write
601        :param npts: number of P(r) points to be written
602       
603        """
604        file = open(path, 'w')
605        file.write("#d_max=%g\n" % self.d_max)
606        file.write("#nfunc=%g\n" % self.nfunc)
607        file.write("#alpha=%g\n" % self.alpha)
608        file.write("#chi2=%g\n" % self.chi2)
609        file.write("#elapsed=%g\n" % self.elapsed)
610        file.write("#qmin=%s\n" % str(self.q_min))
611        file.write("#qmax=%s\n" % str(self.q_max))
612        file.write("#slit_height=%g\n" % self.slit_height)
613        file.write("#slit_width=%g\n" % self.slit_width)
614        file.write("#background=%g\n" % self.background)
615        if self.has_bck == True:
616            file.write("#has_bck=1\n")
617        else:
618            file.write("#has_bck=0\n")
619        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
620        if not self.out == None:
621            if len(self.out) == len(self.cov):
622                for i in range(len(self.out)):
623                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
624                                                    str(self.cov[i][i])))
625        file.write("<r>  <Pr>  <dPr>\n")
626        r = numpy.arange(0.0, self.d_max, self.d_max/npts)
627       
628        for r_i in r:
629            (value, err) = self.pr_err(self.out, self.cov, r_i)
630            file.write("%g  %g  %g\n" % (r_i, value, err))
631   
632        file.close()
633     
634       
635    def from_file(self, path):
636        """
637        Load the state of the Invertor from a file,
638        to be able to generate P(r) from a set of
639        parameters.
640       
641        :param path: path of the file to load
642       
643        """
644        #import os
645        #import re
646        if os.path.isfile(path):
647            try:
648                fd = open(path, 'r')
649               
650                buff    = fd.read()
651                lines   = buff.split('\n')
652                for line in lines:
653                    if line.startswith('#d_max='):
654                        toks = line.split('=')
655                        self.d_max = float(toks[1])
656                    elif line.startswith('#nfunc='):
657                        toks = line.split('=')
658                        self.nfunc = int(toks[1])
659                        self.out = numpy.zeros(self.nfunc)
660                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
661                    elif line.startswith('#alpha='):
662                        toks = line.split('=')
663                        self.alpha = float(toks[1])
664                    elif line.startswith('#chi2='):
665                        toks = line.split('=')
666                        self.chi2 = float(toks[1])
667                    elif line.startswith('#elapsed='):
668                        toks = line.split('=')
669                        self.elapsed = float(toks[1])
670                    elif line.startswith('#alpha_estimate='):
671                        toks = line.split('=')
672                        self.suggested_alpha = float(toks[1])
673                    elif line.startswith('#qmin='):
674                        toks = line.split('=')
675                        try:
676                            self.q_min = float(toks[1])
677                        except:
678                            self.q_min = None
679                    elif line.startswith('#qmax='):
680                        toks = line.split('=')
681                        try:
682                            self.q_max = float(toks[1])
683                        except:
684                            self.q_max = None
685                    elif line.startswith('#slit_height='):
686                        toks = line.split('=')
687                        self.slit_height = float(toks[1])
688                    elif line.startswith('#slit_width='):
689                        toks = line.split('=')
690                        self.slit_width = float(toks[1])
691                    elif line.startswith('#background='):
692                        toks = line.split('=')
693                        self.background = float(toks[1])
694                    elif line.startswith('#has_bck='):
695                        toks = line.split('=')
696                        if int(toks[1]) == 1:
697                            self.has_bck = True
698                        else:
699                            self.has_bck = False
700           
701                    # Now read in the parameters
702                    elif line.startswith('#C_'):
703                        toks = line.split('=')
704                        p = re.compile('#C_([0-9]+)')
705                        m = p.search(toks[0])
706                        toks2 = toks[1].split('+-')
707                        i = int(m.group(1))
708                        self.out[i] = float(toks2[0])
709                       
710                        self.cov[i][i] = float(toks2[1])                       
711           
712            except:
713                msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
714                raise RuntimeError, msg
715        else:
716            msg = "Invertor.from_file: '%s' is not a file" % str(path) 
717            raise RuntimeError, msg
718       
719         
720if __name__ == "__main__":
721    o = Invertor()
722
723   
724   
725   
726   
Note: See TracBrowser for help on using the repository browser.