""" Module to perform P(r) inversion. The module contains the Invertor class. """ from sans.pr.core.pr_inversion import Cinvertor import numpy import sys def help(): """ Provide general online help text Future work: extend this function to allow topic selection """ info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n" info_txt += "P(r) is set to be equal to an expansion of base functions of the type " info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions " info_txt += "in the expansion is found by performing a least square fit with the " info_txt += "following fit function:\n\n" info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n" info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is " info_txt += "the prediction from the Fourier transform of the P(r) expansion. " info_txt += "The Reg_term term is a regularization term set to the second derivative " info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n" info_txt += "The following are user inputs:\n\n" info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n" info_txt += " - Regularization constant: a multiplicative constant to set the size of " info_txt += "the regularization term.\n\n" info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n" return info_txt class Invertor(Cinvertor): """ Invertor class to perform P(r) inversion The problem is solved by posing the problem as Ax = b, where x is the set of coefficients we are looking for. Npts is the number of points. In the following i refers to the ith base function coefficient. The matrix has its entries j in its first Npts rows set to A[j][i] = (Fourier transformed base function for point j) We them choose a number of r-points, n_r, to evaluate the second derivative of P(r) at. This is used as our regularization term. For a vector r of length n_r, the following n_r rows are set to A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j]) The vector b has its first Npts entries set to b[j] = (I(q) observed for point j) The following n_r entries are set to zero. The result is found by using scipy.linalg.basic.lstsq to invert the matrix and find the coefficients x. Methods inherited from Cinvertor: - get_peaks(pars): returns the number of P(r) peaks - oscillations(pars): returns the oscillation parameters for the output P(r) - get_positive(pars): returns the fraction of P(r) that is above zero - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero """ ## Chisqr of the last computation chi2 = 0 ## Time elapsed for last computation elapsed = 0 ## Alpha to get the reg term the same size as the signal suggested_alpha = 0 ## Last number of base functions used nfunc = 0 ## Last output values out = None ## Last errors on output values cov = None def __init__(self): Cinvertor.__init__(self) def __setattr__(self, name, value): """ Set the value of an attribute. Access the parent class methods for x, y, err, d_max, q_min, q_max and alpha """ if name=='x': if 0.0 in value: raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding" return self.set_x(value) elif name=='y': return self.set_y(value) elif name=='err': return self.set_err(value) elif name=='d_max': return self.set_dmax(value) elif name=='q_min': if value==None: return self.set_qmin(-1.0) return self.set_qmin(value) elif name=='q_max': if value==None: return self.set_qmax(-1.0) return self.set_qmax(value) elif name=='alpha': return self.set_alpha(value) return Cinvertor.__setattr__(self, name, value) def __getattr__(self, name): """ Return the value of an attribute For the moment x, y, err and d_max are write-only TODO: change that! """ import numpy if name=='x': out = numpy.ones(self.get_nx()) self.get_x(out) return out elif name=='y': out = numpy.ones(self.get_ny()) self.get_y(out) return out elif name=='err': out = numpy.ones(self.get_nerr()) self.get_err(out) return out elif name=='d_max': return self.get_dmax() elif name=='q_min': qmin = self.get_qmin() if qmin<0: return None return qmin elif name=='q_max': qmax = self.get_qmax() if qmax<0: return None return qmax elif name=='alpha': return self.get_alpha() elif name in self.__dict__: return self.__dict__[name] return None def clone(self): """ Return a clone of this instance """ invertor = Invertor() invertor.chi2 = self.chi2 invertor.elapsed = self.elapsed invertor.alpha = self.alpha invertor.d_max = self.d_max invertor.q_min = self.q_min invertor.q_max = self.q_max invertor.x = self.x invertor.y = self.y invertor.err = self.err return invertor def invert(self, nfunc=10, nr=20): """ Perform inversion to P(r) The problem is solved by posing the problem as Ax = b, where x is the set of coefficients we are looking for. Npts is the number of points. In the following i refers to the ith base function coefficient. The matrix has its entries j in its first Npts rows set to A[i][j] = (Fourier transformed base function for point j) We them choose a number of r-points, n_r, to evaluate the second derivative of P(r) at. This is used as our regularization term. For a vector r of length n_r, the following n_r rows are set to A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j]) The vector b has its first Npts entries set to b[j] = (I(q) observed for point j) The following n_r entries are set to zero. The result is found by using scipy.linalg.basic.lstsq to invert the matrix and find the coefficients x. @param nfunc: number of base functions to use. @param nr: number of r points to evaluate the 2nd derivative at for the reg. term. @return: c_out, c_cov - the coefficients with covariance matrix """ #TODO: call the pyhton implementation for now. In the future, translate this to C. return self.lstsq(nfunc, nr=nr) def invert_optimize(self, nfunc=10, nr=20): """ Slower version of the P(r) inversion that uses scipy.optimize.leastsq. This probably produce more reliable results, but is much slower. The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term, where the reg_term is given by Svergun: it is the integral of the square of the first derivative of P(r), d(P(r))/dr, integrated over the full range of r. @param nfunc: number of base functions to use. @param nr: number of r points to evaluate the 2nd derivative at for the reg. term. @return: c_out, c_cov - the coefficients with covariance matrix """ from scipy import optimize import time self.nfunc = nfunc # First, check that the current data is valid if self.is_valid()<=0: raise RuntimeError, "Invertor.invert: Data array are of different length" p = numpy.ones(nfunc) t_0 = time.time() out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True) # Compute chi^2 res = self.residuals(out) chisqr = 0 for i in range(len(res)): chisqr += res[i] self.chi2 = chisqr # Store computation time self.elapsed = time.time() - t_0 return out, cov_x def pr_fit(self, nfunc=5): """ This is a direct fit to a given P(r). It assumes that the y data is set to some P(r) distribution that we are trying to reproduce with a set of base functions. This method is provided as a test. """ from scipy import optimize # First, check that the current data is valid if self.is_valid()<=0: raise RuntimeError, "Invertor.invert: Data arrays are of different length" p = numpy.ones(nfunc) t_0 = time.time() out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True) # Compute chi^2 res = self.pr_residuals(out) chisqr = 0 for i in range(len(res)): chisqr += res[i] self.chisqr = chisqr # Store computation time self.elapsed = time.time() - t_0 return out, cov_x def pr_err(self, c, c_cov, r): """ Returns the value of P(r) for a given r, and base function coefficients, with error. @param c: base function coefficients @param c_cov: covariance matrice of the base function coefficients @param r: r-value to evaluate P(r) at @return: P(r) """ return self.get_pr_err(c, c_cov, r) def _accept_q(self, q): """ Check q-value against user-defined range """ if not self.q_min==None and qself.q_max: return False return True def lstsq(self, nfunc=5, nr=20): #TODO: do this on the C side # # To make sure an array is contiguous: # blah = numpy.ascontiguousarray(blah_original) # ... before passing it to C """ The problem is solved by posing the problem as Ax = b, where x is the set of coefficients we are looking for. Npts is the number of points. In the following i refers to the ith base function coefficient. The matrix has its entries j in its first Npts rows set to A[i][j] = (Fourier transformed base function for point j) We them choose a number of r-points, n_r, to evaluate the second derivative of P(r) at. This is used as our regularization term. For a vector r of length n_r, the following n_r rows are set to A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j]) The vector b has its first Npts entries set to b[j] = (I(q) observed for point j) The following n_r entries are set to zero. The result is found by using scipy.linalg.basic.lstsq to invert the matrix and find the coefficients x. @param nfunc: number of base functions to use. @param nr: number of r points to evaluate the 2nd derivative at for the reg. term. """ import math from scipy.linalg.basic import lstsq self.nfunc = nfunc # a -- An M x N matrix. # b -- An M x nrhs matrix or M vector. npts = len(self.x) nq = nr sqrt_alpha = math.sqrt(self.alpha) a = numpy.zeros([npts+nq, nfunc]) b = numpy.zeros(npts+nq) err = numpy.zeros(nfunc) for j in range(nfunc): for i in range(npts): if self._accept_q(self.x[i]): a[i][j] = self.basefunc_ft(self.d_max, j+1, self.x[i])/self.err[i] #TODO: refactor this: i_q should really be i_r for i_q in range(nq): r = self.d_max/nq*i_q #a[i_q+npts][j] = sqrt_alpha * 1.0/nq*self.d_max*2.0*math.fabs(math.sin(math.pi*(j+1)*r/self.d_max) + math.pi*(j+1)*r/self.d_max * math.cos(math.pi*(j+1)*r/self.d_max)) a[i_q+npts][j] = sqrt_alpha * 1.0/nq*self.d_max*2.0*(2.0*math.pi*(j+1)/self.d_max*math.cos(math.pi*(j+1)*r/self.d_max) + math.pi**2*(j+1)**2*r/self.d_max**2 * math.sin(math.pi*(j+1)*r/self.d_max)) for i in range(npts): if self._accept_q(self.x[i]): b[i] = self.y[i]/self.err[i] c, chi2, rank, n = lstsq(a, b) self.chi2 = chi2 at = numpy.transpose(a) inv_cov = numpy.zeros([nfunc,nfunc]) for i in range(nfunc): for j in range(nfunc): inv_cov[i][j] = 0.0 for k in range(npts+nr): if self._accept_q(self.x[i]): inv_cov[i][j] = at[i][k]*a[k][j] # Compute the reg term size for the output sum_sig = 0.0 sum_reg = 0.0 for j in range(nfunc): for i in range(npts): if self._accept_q(self.x[i]): sum_sig += (a[i][j])**2 for i in range(nq): sum_reg += (a[i+npts][j])**2 if math.fabs(self.alpha)>0: new_alpha = sum_sig/(sum_reg/self.alpha) else: new_alpha = 0.0 self.suggested_alpha = new_alpha try: err = math.fabs(chi2/(npts-nfunc))* inv_cov except: print "Error estimating uncertainties" # Keep a copy of the last output self.out = c self.cov = err return c, err def estimate_alpha(self, nfunc): """ Returns a reasonable guess for the regularization constant alpha @return: alpha, message, elapsed where alpha is the estimate for alpha, message is a message for the user, elapsed is the computation time """ import time try: pr = self.clone() # T_0 for computation time starttime = time.time() elapsed = 0 # If the current alpha is zero, try # another value if pr.alpha<=0: pr.alpha = 0.0001 # Perform inversion to find the largest alpha out, cov = pr.lstsq(nfunc) elapsed = time.time()-starttime initial_alpha = pr.alpha initial_peaks = pr.get_peaks(out) # Try the inversion with the estimated alpha pr.alpha = pr.suggested_alpha out, cov = pr.lstsq(nfunc) npeaks = pr.get_peaks(out) # if more than one peak to start with # just return the estimate if npeaks>1: message = "Your P(r) is not smooth, please check your inversion parameters" return pr.suggested_alpha, message, elapsed else: # Look at smaller values # We assume that for the suggested alpha, we have 1 peak # if not, send a message to change parameters alpha = pr.suggested_alpha best_alpha = pr.suggested_alpha found = False for i in range(10): pr.alpha = (0.33)**(i+1)*alpha out, cov = pr.lstsq(nfunc) peaks = pr.get_peaks(out) if peaks>1: found = True break best_alpha = pr.alpha # If we didn't find a turning point for alpha and # the initial alpha already had only one peak, # just return that if not found and initial_peaks==1 and initial_alpha=0.5*pr.suggested_alpha: # best alpha is too big, return a # reasonable value message = "The estimated alpha for your system is too large. " message += "Try increasing your maximum distance." return best_alpha, message, elapsed except: message = "Invertor.estimate_alpha: %s" % sys.exc_value return 0, message, elapsed def to_file(self, path, npts=100): """ Save the state to a file that will be readable by SliceView. @param path: path of the file to write @param npts: number of P(r) points to be written """ import pylab file = open(path, 'w') file.write("#d_max=%g\n" % self.d_max) file.write("#nfunc=%g\n" % self.nfunc) file.write("#alpha=%g\n" % self.alpha) file.write("#chi2=%g\n" % self.chi2) file.write("#elapsed=%g\n" % self.elapsed) file.write("#alpha_estimate=%g\n" % self.suggested_alpha) if not self.out==None: if len(self.out)==len(self.cov): for i in range(len(self.out)): file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i]))) file.write(" \n") r = pylab.arange(0.0, self.d_max, self.d_max/npts) for r_i in r: (value, err) = self.pr_err(self.out, self.cov, r_i) file.write("%g %g %g\n" % (r_i, value, err)) file.close() def from_file(self, path): """ Load the state of the Invertor from a file, to be able to generate P(r) from a set of parameters. @param path: path of the file to load """ import os import re if os.path.isfile(path): try: fd = open(path, 'r') buff = fd.read() lines = buff.split('\n') for line in lines: if line.startswith('#d_max='): toks = line.split('=') self.d_max = float(toks[1]) elif line.startswith('#nfunc='): toks = line.split('=') self.nfunc = int(toks[1]) self.out = numpy.zeros(self.nfunc) self.cov = numpy.zeros([self.nfunc, self.nfunc]) elif line.startswith('#alpha='): toks = line.split('=') self.alpha = float(toks[1]) elif line.startswith('#chi2='): toks = line.split('=') self.chi2 = float(toks[1]) elif line.startswith('#elapsed='): toks = line.split('=') self.elapsed = float(toks[1]) elif line.startswith('#alpha_estimate='): toks = line.split('=') self.suggested_alpha = float(toks[1]) # Now read in the parameters elif line.startswith('#C_'): toks = line.split('=') p = re.compile('#C_([0-9]+)') m = p.search(toks[0]) toks2 = toks[1].split('+-') i = int(m.group(1)) self.out[i] = float(toks2[0]) self.cov[i][i] = float(toks2[1]) except: raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value else: raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) if __name__ == "__main__": o = Invertor()