""" Module to perform P(r) inversion. The module contains the Invertor class. """ from sans.pr.core.pr_inversion import Cinvertor import numpy import sys import math, time from scipy.linalg.basic import lstsq def help(): """ Provide general online help text Future work: extend this function to allow topic selection """ info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n" info_txt += "P(r) is set to be equal to an expansion of base functions of the type " info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions " info_txt += "in the expansion is found by performing a least square fit with the " info_txt += "following fit function:\n\n" info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n" info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is " info_txt += "the prediction from the Fourier transform of the P(r) expansion. " info_txt += "The Reg_term term is a regularization term set to the second derivative " info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n" info_txt += "The following are user inputs:\n\n" info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n" info_txt += " - Regularization constant: a multiplicative constant to set the size of " info_txt += "the regularization term.\n\n" info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n" return info_txt class Invertor(Cinvertor): """ Invertor class to perform P(r) inversion The problem is solved by posing the problem as Ax = b, where x is the set of coefficients we are looking for. Npts is the number of points. In the following i refers to the ith base function coefficient. The matrix has its entries j in its first Npts rows set to A[j][i] = (Fourier transformed base function for point j) We them choose a number of r-points, n_r, to evaluate the second derivative of P(r) at. This is used as our regularization term. For a vector r of length n_r, the following n_r rows are set to A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j]) The vector b has its first Npts entries set to b[j] = (I(q) observed for point j) The following n_r entries are set to zero. The result is found by using scipy.linalg.basic.lstsq to invert the matrix and find the coefficients x. Methods inherited from Cinvertor: - get_peaks(pars): returns the number of P(r) peaks - oscillations(pars): returns the oscillation parameters for the output P(r) - get_positive(pars): returns the fraction of P(r) that is above zero - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero """ ## Chisqr of the last computation chi2 = 0 ## Time elapsed for last computation elapsed = 0 ## Alpha to get the reg term the same size as the signal suggested_alpha = 0 ## Last number of base functions used nfunc = 10 ## Last output values out = None ## Last errors on output values cov = None ## Background value background = 0 def __init__(self): Cinvertor.__init__(self) def __setattr__(self, name, value): """ Set the value of an attribute. Access the parent class methods for x, y, err, d_max, q_min, q_max and alpha """ if name=='x': if 0.0 in value: raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding" return self.set_x(value) elif name=='y': return self.set_y(value) elif name=='err': value2 = abs(value) return self.set_err(value2) elif name=='d_max': return self.set_dmax(value) elif name=='q_min': if value==None: return self.set_qmin(-1.0) return self.set_qmin(value) elif name=='q_max': if value==None: return self.set_qmax(-1.0) return self.set_qmax(value) elif name=='alpha': return self.set_alpha(value) elif name=='slit_height': return self.set_slit_height(value) elif name=='slit_width': return self.set_slit_width(value) elif name=='has_bck': if value==True: return self.set_has_bck(1) elif value==False: return self.set_has_bck(0) else: raise ValueError, "Invertor: has_bck can only be True or False" return Cinvertor.__setattr__(self, name, value) def __getattr__(self, name): """ Return the value of an attribute """ import numpy if name=='x': out = numpy.ones(self.get_nx()) self.get_x(out) return out elif name=='y': out = numpy.ones(self.get_ny()) self.get_y(out) return out elif name=='err': out = numpy.ones(self.get_nerr()) self.get_err(out) return out elif name=='d_max': return self.get_dmax() elif name=='q_min': qmin = self.get_qmin() if qmin<0: return None return qmin elif name=='q_max': qmax = self.get_qmax() if qmax<0: return None return qmax elif name=='alpha': return self.get_alpha() elif name=='slit_height': return self.get_slit_height() elif name=='slit_width': return self.get_slit_width() elif name=='has_bck': value = self.get_has_bck() if value==1: return True else: return False elif name in self.__dict__: return self.__dict__[name] return None def clone(self): """ Return a clone of this instance """ invertor = Invertor() invertor.chi2 = self.chi2 invertor.elapsed = self.elapsed invertor.nfunc = self.nfunc invertor.alpha = self.alpha invertor.d_max = self.d_max invertor.q_min = self.q_min invertor.q_max = self.q_max invertor.x = self.x invertor.y = self.y invertor.err = self.err invertor.has_bck = self.has_bck invertor.slit_height = self.slit_height invertor.slit_width = self.slit_width return invertor def invert(self, nfunc=10, nr=20): """ Perform inversion to P(r) The problem is solved by posing the problem as Ax = b, where x is the set of coefficients we are looking for. Npts is the number of points. In the following i refers to the ith base function coefficient. The matrix has its entries j in its first Npts rows set to A[i][j] = (Fourier transformed base function for point j) We them choose a number of r-points, n_r, to evaluate the second derivative of P(r) at. This is used as our regularization term. For a vector r of length n_r, the following n_r rows are set to A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j]) The vector b has its first Npts entries set to b[j] = (I(q) observed for point j) The following n_r entries are set to zero. The result is found by using scipy.linalg.basic.lstsq to invert the matrix and find the coefficients x. @param nfunc: number of base functions to use. @param nr: number of r points to evaluate the 2nd derivative at for the reg. term. @return: c_out, c_cov - the coefficients with covariance matrix """ # Reset the background value before proceeding self.background = 0.0 return self.lstsq(nfunc, nr=nr) def iq(self, out, q): """ Function to call to evaluate the scattering intensity @param args: c-parameters, and q @return: I(q) """ return Cinvertor.iq(self, out, q)+self.background def invert_optimize(self, nfunc=10, nr=20): """ Slower version of the P(r) inversion that uses scipy.optimize.leastsq. This probably produce more reliable results, but is much slower. The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term, where the reg_term is given by Svergun: it is the integral of the square of the first derivative of P(r), d(P(r))/dr, integrated over the full range of r. @param nfunc: number of base functions to use. @param nr: number of r points to evaluate the 2nd derivative at for the reg. term. @return: c_out, c_cov - the coefficients with covariance matrix """ from scipy import optimize import time self.nfunc = nfunc # First, check that the current data is valid if self.is_valid()<=0: raise RuntimeError, "Invertor.invert: Data array are of different length" p = numpy.ones(nfunc) t_0 = time.time() out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True) # Compute chi^2 res = self.residuals(out) chisqr = 0 for i in range(len(res)): chisqr += res[i] self.chi2 = chisqr # Store computation time self.elapsed = time.time() - t_0 return out, cov_x def pr_fit(self, nfunc=5): """ This is a direct fit to a given P(r). It assumes that the y data is set to some P(r) distribution that we are trying to reproduce with a set of base functions. This method is provided as a test. """ from scipy import optimize # First, check that the current data is valid if self.is_valid()<=0: raise RuntimeError, "Invertor.invert: Data arrays are of different length" p = numpy.ones(nfunc) t_0 = time.time() out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True) # Compute chi^2 res = self.pr_residuals(out) chisqr = 0 for i in range(len(res)): chisqr += res[i] self.chisqr = chisqr # Store computation time self.elapsed = time.time() - t_0 return out, cov_x def pr_err(self, c, c_cov, r): """ Returns the value of P(r) for a given r, and base function coefficients, with error. @param c: base function coefficients @param c_cov: covariance matrice of the base function coefficients @param r: r-value to evaluate P(r) at @return: P(r) """ return self.get_pr_err(c, c_cov, r) def _accept_q(self, q): """ Check q-value against user-defined range """ if not self.q_min==None and qself.q_max: return False return True def lstsq(self, nfunc=5, nr=20): """ The problem is solved by posing the problem as Ax = b, where x is the set of coefficients we are looking for. Npts is the number of points. In the following i refers to the ith base function coefficient. The matrix has its entries j in its first Npts rows set to A[i][j] = (Fourier transformed base function for point j) We them choose a number of r-points, n_r, to evaluate the second derivative of P(r) at. This is used as our regularization term. For a vector r of length n_r, the following n_r rows are set to A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j]) The vector b has its first Npts entries set to b[j] = (I(q) observed for point j) The following n_r entries are set to zero. The result is found by using scipy.linalg.basic.lstsq to invert the matrix and find the coefficients x. @param nfunc: number of base functions to use. @param nr: number of r points to evaluate the 2nd derivative at for the reg. term. If the result does not allow us to compute the covariance matrix, a matrix filled with zeros will be returned. """ # Note: To make sure an array is contiguous: # blah = numpy.ascontiguousarray(blah_original) # ... before passing it to C if self.is_valid()<0: raise RuntimeError, "Invertor: invalid data; incompatible data lengths." self.nfunc = nfunc # a -- An M x N matrix. # b -- An M x nrhs matrix or M vector. npts = len(self.x) nq = nr sqrt_alpha = math.sqrt(math.fabs(self.alpha)) if sqrt_alpha<0.0: nq = 0 # If we need to fit the background, add a term if self.has_bck==True: nfunc_0 = nfunc nfunc += 1 a = numpy.zeros([npts+nq, nfunc]) b = numpy.zeros(npts+nq) err = numpy.zeros([nfunc, nfunc]) # Construct the a matrix and b vector that represent the problem t_0 = time.time() self._get_matrix(nfunc, nq, a, b) # Perform the inversion (least square fit) c, chi2, rank, n = lstsq(a, b) # Sanity check try: float(chi2) except: chi2 = -1.0 self.chi2 = chi2 inv_cov = numpy.zeros([nfunc,nfunc]) # Get the covariance matrix, defined as inv_cov = a_transposed * a self._get_invcov_matrix(nfunc, nr, a, inv_cov) # Compute the reg term size for the output sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a) if math.fabs(self.alpha)>0: new_alpha = sum_sig/(sum_reg/self.alpha) else: new_alpha = 0.0 self.suggested_alpha = new_alpha try: cov = numpy.linalg.pinv(inv_cov) err = math.fabs(chi2/float(npts-nfunc)) * cov except: # We were not able to estimate the errors # Return an empty error matrix pass # Keep a copy of the last output if self.has_bck==False: self.background = 0 self.out = c self.cov = err else: self.background = c[0] err_0 = numpy.zeros([nfunc, nfunc]) c_0 = numpy.zeros(nfunc) for i in range(nfunc_0): c_0[i] = c[i+1] for j in range(nfunc_0): err_0[i][j] = err[i+1][j+1] self.out = c_0 self.cov = err_0 return self.out, self.cov def estimate_numterms(self, isquit_func=None): """ Returns a reasonable guess for the number of terms @param isquit_func: reference to thread function to call to check whether the computation needs to be stopped. @return: number of terms, alpha, message """ from num_term import Num_terms estimator = Num_terms(self.clone()) try: return estimator.num_terms(isquit_func) except: # If we fail, estimate alpha and return the default # number of terms best_alpha, message, elapsed =self.estimate_alpha(self.nfunc) return self.nfunc, best_alpha, "Could not estimate number of terms" def estimate_alpha(self, nfunc): """ Returns a reasonable guess for the regularization constant alpha @param nfunc: number of terms to use in the expansion. @return: alpha, message, elapsed where alpha is the estimate for alpha, message is a message for the user, elapsed is the computation time """ import time try: pr = self.clone() # T_0 for computation time starttime = time.time() elapsed = 0 # If the current alpha is zero, try # another value if pr.alpha<=0: pr.alpha = 0.0001 # Perform inversion to find the largest alpha out, cov = pr.invert(nfunc) elapsed = time.time()-starttime initial_alpha = pr.alpha initial_peaks = pr.get_peaks(out) # Try the inversion with the estimated alpha pr.alpha = pr.suggested_alpha out, cov = pr.invert(nfunc) npeaks = pr.get_peaks(out) # if more than one peak to start with # just return the estimate if npeaks>1: #message = "Your P(r) is not smooth, please check your inversion parameters" message = None return pr.suggested_alpha, message, elapsed else: # Look at smaller values # We assume that for the suggested alpha, we have 1 peak # if not, send a message to change parameters alpha = pr.suggested_alpha best_alpha = pr.suggested_alpha found = False for i in range(10): pr.alpha = (0.33)**(i+1)*alpha out, cov = pr.invert(nfunc) peaks = pr.get_peaks(out) if peaks>1: found = True break best_alpha = pr.alpha # If we didn't find a turning point for alpha and # the initial alpha already had only one peak, # just return that if not found and initial_peaks==1 and initial_alpha=0.5*pr.suggested_alpha: # best alpha is too big, return a # reasonable value message = "The estimated alpha for your system is too large. " message += "Try increasing your maximum distance." return best_alpha, message, elapsed except: message = "Invertor.estimate_alpha: %s" % sys.exc_value return 0, message, elapsed def to_file(self, path, npts=100): """ Save the state to a file that will be readable by SliceView. @param path: path of the file to write @param npts: number of P(r) points to be written """ import pylab file = open(path, 'w') file.write("#d_max=%g\n" % self.d_max) file.write("#nfunc=%g\n" % self.nfunc) file.write("#alpha=%g\n" % self.alpha) file.write("#chi2=%g\n" % self.chi2) file.write("#elapsed=%g\n" % self.elapsed) file.write("#qmin=%s\n" % str(self.q_min)) file.write("#qmax=%s\n" % str(self.q_max)) file.write("#slit_height=%g\n" % self.slit_height) file.write("#slit_width=%g\n" % self.slit_width) file.write("#background=%g\n" % self.background) if self.has_bck==True: file.write("#has_bck=1\n") else: file.write("#has_bck=0\n") file.write("#alpha_estimate=%g\n" % self.suggested_alpha) if not self.out==None: if len(self.out)==len(self.cov): for i in range(len(self.out)): file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i]))) file.write(" \n") r = pylab.arange(0.0, self.d_max, self.d_max/npts) for r_i in r: (value, err) = self.pr_err(self.out, self.cov, r_i) file.write("%g %g %g\n" % (r_i, value, err)) file.close() def from_file(self, path): """ Load the state of the Invertor from a file, to be able to generate P(r) from a set of parameters. @param path: path of the file to load """ import os import re if os.path.isfile(path): try: fd = open(path, 'r') buff = fd.read() lines = buff.split('\n') for line in lines: if line.startswith('#d_max='): toks = line.split('=') self.d_max = float(toks[1]) elif line.startswith('#nfunc='): toks = line.split('=') self.nfunc = int(toks[1]) self.out = numpy.zeros(self.nfunc) self.cov = numpy.zeros([self.nfunc, self.nfunc]) elif line.startswith('#alpha='): toks = line.split('=') self.alpha = float(toks[1]) elif line.startswith('#chi2='): toks = line.split('=') self.chi2 = float(toks[1]) elif line.startswith('#elapsed='): toks = line.split('=') self.elapsed = float(toks[1]) elif line.startswith('#alpha_estimate='): toks = line.split('=') self.suggested_alpha = float(toks[1]) elif line.startswith('#qmin='): toks = line.split('=') try: self.q_min = float(toks[1]) except: self.q_min = None elif line.startswith('#qmax='): toks = line.split('=') try: self.q_max = float(toks[1]) except: self.q_max = None elif line.startswith('#slit_height='): toks = line.split('=') self.slit_height = float(toks[1]) elif line.startswith('#slit_width='): toks = line.split('=') self.slit_width = float(toks[1]) elif line.startswith('#background='): toks = line.split('=') self.background = float(toks[1]) elif line.startswith('#has_bck='): toks = line.split('=') if int(toks[1])==1: self.has_bck=True else: self.has_bck=False # Now read in the parameters elif line.startswith('#C_'): toks = line.split('=') p = re.compile('#C_([0-9]+)') m = p.search(toks[0]) toks2 = toks[1].split('+-') i = int(m.group(1)) self.out[i] = float(toks2[0]) self.cov[i][i] = float(toks2[1]) except: raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value else: raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) if __name__ == "__main__": o = Invertor()