invertor.py @ ab6098f

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since ab6098f was e39640f, checked in by Mathieu Doucet <doucetm@…>, 17 years ago
pr_inversion 0.1
Property mode set to `100644`
File size: 21.6 KB

Rev	Line
[896abb3]	1	"""
	2	Module to perform P(r) inversion.
	3	The module contains the Invertor class.
	4	"""
[9e8dc22]	5	from sans.pr.core.pr_inversion import Cinvertor
	6	import numpy
[f71287f4]	7	import sys
[9e8dc22]	8
[9a11937]	9	def help():
	10	"""
	11	Provide general online help text
[43c0a8e]	12	Future work: extend this function to allow topic selection
[9a11937]	13	"""
	14	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
	15	info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
	16	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each base functions "
	17	info_txt += "in the expansion is found by performing a least square fit with the "
	18	info_txt += "following fit function:\n\n"
	19	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 + Reg_term\n\n"
	20	info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
	21	info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
	22	info_txt += "The Reg_term term is a regularization term set to the second derivative "
	23	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
	24	info_txt += "The following are user inputs:\n\n"
	25	info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n"
	26	info_txt += " - Regularization constant: a multiplicative constant to set the size of "
	27	info_txt += "the regularization term.\n\n"
	28	info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n"
	29
	30	return info_txt
[9e8dc22]	31
[9a11937]	32
	33	class Invertor(Cinvertor):
	34	"""
	35	Invertor class to perform P(r) inversion
	36
[ffca8f2]	37	The problem is solved by posing the problem as Ax = b,
	38	where x is the set of coefficients we are looking for.
[43c0a8e]	39
[ffca8f2]	40	Npts is the number of points.
	41
	42	In the following i refers to the ith base function coefficient.
	43	The matrix has its entries j in its first Npts rows set to
[a8b6364]	44	A[j][i] = (Fourier transformed base function for point j)
[ffca8f2]	45
	46	We them choose a number of r-points, n_r, to evaluate the second
	47	derivative of P(r) at. This is used as our regularization term.
	48	For a vector r of length n_r, the following n_r rows are set to
[a8b6364]	49	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
[ffca8f2]	50
	51	The vector b has its first Npts entries set to
	52	b[j] = (I(q) observed for point j)
	53
	54	The following n_r entries are set to zero.
	55
	56	The result is found by using scipy.linalg.basic.lstsq to invert
	57	the matrix and find the coefficients x.
[43c0a8e]	58
	59	Methods inherited from Cinvertor:
[896abb3]	60	- get_peaks(pars): returns the number of P(r) peaks
	61	- oscillations(pars): returns the oscillation parameters for the output P(r)
	62	- get_positive(pars): returns the fraction of P(r) that is above zero
	63	- get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
[9a11937]	64	"""
[eca05c8]	65	## Chisqr of the last computation
[2d06beb]	66	chi2 = 0
	67	## Time elapsed for last computation
	68	elapsed = 0
[abad620]	69	## Alpha to get the reg term the same size as the signal
	70	suggested_alpha = 0
[f71287f4]	71	## Last number of base functions used
	72	nfunc = 0
	73	## Last output values
	74	out = None
	75	## Last errors on output values
	76	cov = None
[eca05c8]	77
[9e8dc22]	78	def __init__(self):
	79	Cinvertor.__init__(self)
	80
	81	def __setattr__(self, name, value):
	82	"""
	83	Set the value of an attribute.
	84	Access the parent class methods for
[896abb3]	85	x, y, err, d_max, q_min, q_max and alpha
[9e8dc22]	86	"""
	87	if name=='x':
[eca05c8]	88	if 0.0 in value:
	89	raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
[9e8dc22]	90	return self.set_x(value)
	91	elif name=='y':
	92	return self.set_y(value)
	93	elif name=='err':
	94	return self.set_err(value)
	95	elif name=='d_max':
	96	return self.set_dmax(value)
[f71287f4]	97	elif name=='q_min':
	98	if value==None:
	99	return self.set_qmin(-1.0)
	100	return self.set_qmin(value)
	101	elif name=='q_max':
	102	if value==None:
	103	return self.set_qmax(-1.0)
	104	return self.set_qmax(value)
[eca05c8]	105	elif name=='alpha':
	106	return self.set_alpha(value)
[9e8dc22]	107
	108	return Cinvertor.__setattr__(self, name, value)
	109
	110	def __getattr__(self, name):
	111	"""
	112	Return the value of an attribute
	113	For the moment x, y, err and d_max are write-only
	114	TODO: change that!
	115	"""
	116	import numpy
	117	if name=='x':
	118	out = numpy.ones(self.get_nx())
	119	self.get_x(out)
	120	return out
	121	elif name=='y':
	122	out = numpy.ones(self.get_ny())
	123	self.get_y(out)
	124	return out
	125	elif name=='err':
	126	out = numpy.ones(self.get_nerr())
	127	self.get_err(out)
	128	return out
	129	elif name=='d_max':
	130	return self.get_dmax()
[f71287f4]	131	elif name=='q_min':
	132	qmin = self.get_qmin()
	133	if qmin<0:
	134	return None
	135	return qmin
	136	elif name=='q_max':
	137	qmax = self.get_qmax()
	138	if qmax<0:
	139	return None
	140	return qmax
[eca05c8]	141	elif name=='alpha':
	142	return self.get_alpha()
[9e8dc22]	143	elif name in self.__dict__:
	144	return self.__dict__[name]
	145	return None
	146
[2d06beb]	147	def clone(self):
	148	"""
	149	Return a clone of this instance
	150	"""
	151	invertor = Invertor()
	152	invertor.chi2 = self.chi2
	153	invertor.elapsed = self.elapsed
	154	invertor.alpha = self.alpha
	155	invertor.d_max = self.d_max
[f71287f4]	156	invertor.q_min = self.q_min
	157	invertor.q_max = self.q_max
[2d06beb]	158
	159	invertor.x = self.x
	160	invertor.y = self.y
	161	invertor.err = self.err
	162
	163	return invertor
	164
[ffca8f2]	165	def invert(self, nfunc=10, nr=20):
[9e8dc22]	166	"""
	167	Perform inversion to P(r)
[ffca8f2]	168
	169	The problem is solved by posing the problem as Ax = b,
	170	where x is the set of coefficients we are looking for.
	171
	172	Npts is the number of points.
	173
	174	In the following i refers to the ith base function coefficient.
	175	The matrix has its entries j in its first Npts rows set to
	176	A[i][j] = (Fourier transformed base function for point j)
	177
	178	We them choose a number of r-points, n_r, to evaluate the second
	179	derivative of P(r) at. This is used as our regularization term.
	180	For a vector r of length n_r, the following n_r rows are set to
	181	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
	182
	183	The vector b has its first Npts entries set to
	184	b[j] = (I(q) observed for point j)
	185
	186	The following n_r entries are set to zero.
	187
	188	The result is found by using scipy.linalg.basic.lstsq to invert
	189	the matrix and find the coefficients x.
	190
	191	@param nfunc: number of base functions to use.
	192	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
	193	@return: c_out, c_cov - the coefficients with covariance matrix
	194	"""
	195	#TODO: call the pyhton implementation for now. In the future, translate this to C.
	196	return self.lstsq(nfunc, nr=nr)
	197
	198	def invert_optimize(self, nfunc=10, nr=20):
	199	"""
	200	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
	201
	202	This probably produce more reliable results, but is much slower.
	203	The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
	204	where the reg_term is given by Svergun: it is the integral of the square of the first derivative
	205	of P(r), d(P(r))/dr, integrated over the full range of r.
	206
	207	@param nfunc: number of base functions to use.
	208	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
	209	@return: c_out, c_cov - the coefficients with covariance matrix
[9e8dc22]	210	"""
[ffca8f2]	211
[9e8dc22]	212	from scipy import optimize
[2d06beb]	213	import time
[9e8dc22]	214
[f71287f4]	215	self.nfunc = nfunc
[9e8dc22]	216	# First, check that the current data is valid
	217	if self.is_valid()<=0:
	218	raise RuntimeError, "Invertor.invert: Data array are of different length"
	219
	220	p = numpy.ones(nfunc)
[2d06beb]	221	t_0 = time.time()
[9e8dc22]	222	out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
	223
[eca05c8]	224	# Compute chi^2
	225	res = self.residuals(out)
	226	chisqr = 0
	227	for i in range(len(res)):
	228	chisqr += res[i]
	229
	230	self.chi2 = chisqr
[2d06beb]	231
	232	# Store computation time
	233	self.elapsed = time.time() - t_0
[eca05c8]	234
	235	return out, cov_x
	236
	237	def pr_fit(self, nfunc=5):
	238	"""
[ffca8f2]	239	This is a direct fit to a given P(r). It assumes that the y data
	240	is set to some P(r) distribution that we are trying to reproduce
	241	with a set of base functions.
	242
	243	This method is provided as a test.
[eca05c8]	244	"""
	245	from scipy import optimize
	246
	247	# First, check that the current data is valid
	248	if self.is_valid()<=0:
	249	raise RuntimeError, "Invertor.invert: Data arrays are of different length"
	250
	251	p = numpy.ones(nfunc)
[2d06beb]	252	t_0 = time.time()
[eca05c8]	253	out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
	254
	255	# Compute chi^2
	256	res = self.pr_residuals(out)
	257	chisqr = 0
	258	for i in range(len(res)):
	259	chisqr += res[i]
	260
	261	self.chisqr = chisqr
	262
[2d06beb]	263	# Store computation time
	264	self.elapsed = time.time() - t_0
	265
[9e8dc22]	266	return out, cov_x
	267
[eca05c8]	268	def pr_err(self, c, c_cov, r):
[896abb3]	269	"""
	270	Returns the value of P(r) for a given r, and base function
	271	coefficients, with error.
	272
	273	@param c: base function coefficients
	274	@param c_cov: covariance matrice of the base function coefficients
	275	@param r: r-value to evaluate P(r) at
	276	@return: P(r)
	277	"""
[43c0a8e]	278	return self.get_pr_err(c, c_cov, r)
[2d06beb]	279
[f71287f4]	280	def _accept_q(self, q):
	281	"""
	282	Check q-value against user-defined range
	283	"""
	284	if not self.q_min==None and q<self.q_min:
	285	return False
	286	if not self.q_max==None and q>self.q_max:
	287	return False
	288	return True
	289
[ffca8f2]	290	def lstsq(self, nfunc=5, nr=20):
[f71287f4]	291	#TODO: do this on the C side
	292	#
	293	# To make sure an array is contiguous:
	294	# blah = numpy.ascontiguousarray(blah_original)
	295	# ... before passing it to C
[9a11937]	296	"""
[ffca8f2]	297	The problem is solved by posing the problem as Ax = b,
	298	where x is the set of coefficients we are looking for.
	299
	300	Npts is the number of points.
	301
	302	In the following i refers to the ith base function coefficient.
	303	The matrix has its entries j in its first Npts rows set to
	304	A[i][j] = (Fourier transformed base function for point j)
	305
	306	We them choose a number of r-points, n_r, to evaluate the second
	307	derivative of P(r) at. This is used as our regularization term.
	308	For a vector r of length n_r, the following n_r rows are set to
	309	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
	310
	311	The vector b has its first Npts entries set to
	312	b[j] = (I(q) observed for point j)
	313
	314	The following n_r entries are set to zero.
	315
	316	The result is found by using scipy.linalg.basic.lstsq to invert
	317	the matrix and find the coefficients x.
	318
	319	@param nfunc: number of base functions to use.
	320	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
[9a11937]	321	"""
[2d06beb]	322	import math
	323	from scipy.linalg.basic import lstsq
	324
[f71287f4]	325	self.nfunc = nfunc
[2d06beb]	326	# a -- An M x N matrix.
	327	# b -- An M x nrhs matrix or M vector.
	328	npts = len(self.x)
[ffca8f2]	329	nq = nr
[2d06beb]	330	sqrt_alpha = math.sqrt(self.alpha)
	331
	332	a = numpy.zeros([npts+nq, nfunc])
	333	b = numpy.zeros(npts+nq)
	334	err = numpy.zeros(nfunc)
	335
	336	for j in range(nfunc):
	337	for i in range(npts):
[f71287f4]	338	if self._accept_q(self.x[i]):
	339	a[i][j] = self.basefunc_ft(self.d_max, j+1, self.x[i])/self.err[i]
[ffca8f2]	340
	341	#TODO: refactor this: i_q should really be i_r
[2d06beb]	342	for i_q in range(nq):
	343	r = self.d_max/nq*i_q
	344	#a[i_q+npts][j] = sqrt_alpha * 1.0/nqself.d_max2.0math.fabs(math.sin(math.pi(j+1)r/self.d_max) + math.pi(j+1)r/self.d_max math.cos(math.pi(j+1)r/self.d_max))
	345	a[i_q+npts][j] = sqrt_alpha * 1.0/nqself.d_max2.0(2.0math.pi(j+1)/self.d_maxmath.cos(math.pi(j+1)r/self.d_max) + math.pi*2(j+1)*2r/self.d_max*2 math.sin(math.pi(j+1)r/self.d_max))
	346
	347	for i in range(npts):
[f71287f4]	348	if self._accept_q(self.x[i]):
	349	b[i] = self.y[i]/self.err[i]
[2d06beb]	350
	351	c, chi2, rank, n = lstsq(a, b)
	352	self.chi2 = chi2
	353
	354	at = numpy.transpose(a)
	355	inv_cov = numpy.zeros([nfunc,nfunc])
	356	for i in range(nfunc):
	357	for j in range(nfunc):
	358	inv_cov[i][j] = 0.0
[1a5c946]	359	for k in range(npts+nr):
[f71287f4]	360	if self._accept_q(self.x[i]):
	361	inv_cov[i][j] = at[i][k]*a[k][j]
[2d06beb]	362
	363	# Compute the reg term size for the output
	364	sum_sig = 0.0
	365	sum_reg = 0.0
	366	for j in range(nfunc):
	367	for i in range(npts):
[f71287f4]	368	if self._accept_q(self.x[i]):
	369	sum_sig += (a[i][j])**2
[2d06beb]	370	for i in range(nq):
[f71287f4]	371	sum_reg += (a[i+npts][j])**2
[2d06beb]	372
[4241e48]	373	if math.fabs(self.alpha)>0:
	374	new_alpha = sum_sig/(sum_reg/self.alpha)
	375	else:
	376	new_alpha = 0.0
[abad620]	377	self.suggested_alpha = new_alpha
[2d06beb]	378
	379	try:
	380	err = math.fabs(chi2/(npts-nfunc))* inv_cov
	381	except:
	382	print "Error estimating uncertainties"
	383
[f71287f4]	384	# Keep a copy of the last output
	385	self.out = c
	386	self.cov = err
[2d06beb]	387
	388	return c, err
	389
[f71287f4]	390	def estimate_alpha(self, nfunc):
	391	"""
	392	Returns a reasonable guess for the
	393	regularization constant alpha
	394
	395	@return: alpha, message, elapsed
	396
	397	where alpha is the estimate for alpha,
	398	message is a message for the user,
	399	elapsed is the computation time
	400	"""
	401	import time
	402	try:
	403	pr = self.clone()
	404
	405	# T_0 for computation time
	406	starttime = time.time()
[e39640f]	407	elapsed = 0
[f71287f4]	408
	409	# If the current alpha is zero, try
	410	# another value
	411	if pr.alpha<=0:
	412	pr.alpha = 0.0001
	413
	414	# Perform inversion to find the largest alpha
	415	out, cov = pr.lstsq(nfunc)
	416	elapsed = time.time()-starttime
	417	initial_alpha = pr.alpha
	418	initial_peaks = pr.get_peaks(out)
	419
	420	# Try the inversion with the estimated alpha
	421	pr.alpha = pr.suggested_alpha
	422	out, cov = pr.lstsq(nfunc)
	423
	424	npeaks = pr.get_peaks(out)
	425	# if more than one peak to start with
	426	# just return the estimate
	427	if npeaks>1:
	428	message = "Your P(r) is not smooth, please check your inversion parameters"
	429	return pr.suggested_alpha, message, elapsed
	430	else:
	431
	432	# Look at smaller values
	433	# We assume that for the suggested alpha, we have 1 peak
	434	# if not, send a message to change parameters
	435	alpha = pr.suggested_alpha
	436	best_alpha = pr.suggested_alpha
	437	found = False
	438	for i in range(10):
	439	pr.alpha = (0.33)*(i+1)alpha
	440	out, cov = pr.lstsq(nfunc)
	441
	442	peaks = pr.get_peaks(out)
	443	if peaks>1:
	444	found = True
	445	break
	446	best_alpha = pr.alpha
	447
	448	# If we didn't find a turning point for alpha and
	449	# the initial alpha already had only one peak,
	450	# just return that
	451	if not found and initial_peaks==1 and initial_alpha<best_alpha:
	452	best_alpha = initial_alpha
	453
	454	# Check whether the size makes sense
	455	message=''
	456
	457	if not found:
	458	message = "None"
	459	elif best_alpha>=0.5*pr.suggested_alpha:
	460	# best alpha is too big, return a
	461	# reasonable value
	462	message = "The estimated alpha for your system is too large. "
	463	message += "Try increasing your maximum distance."
	464
	465	return best_alpha, message, elapsed
	466
	467	except:
	468	message = "Invertor.estimate_alpha: %s" % sys.exc_value
	469	return 0, message, elapsed
	470
	471
	472	def to_file(self, path, npts=100):
	473	"""
	474	Save the state to a file that will be readable
	475	by SliceView.
	476	@param path: path of the file to write
	477	@param npts: number of P(r) points to be written
	478	"""
	479	import pylab
	480
	481	file = open(path, 'w')
	482	file.write("#d_max=%g\n" % self.d_max)
	483	file.write("#nfunc=%g\n" % self.nfunc)
	484	file.write("#alpha=%g\n" % self.alpha)
	485	file.write("#chi2=%g\n" % self.chi2)
	486	file.write("#elapsed=%g\n" % self.elapsed)
	487	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
	488	if not self.out==None:
	489	if len(self.out)==len(self.cov):
	490	for i in range(len(self.out)):
	491	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
	492	file.write("<r> <Pr> <dPr>\n")
	493	r = pylab.arange(0.0, self.d_max, self.d_max/npts)
	494
	495	for r_i in r:
	496	(value, err) = self.pr_err(self.out, self.cov, r_i)
	497	file.write("%g %g %g\n" % (r_i, value, err))
	498
	499	file.close()
[9a11937]	500
[2d06beb]	501
[f71287f4]	502	def from_file(self, path):
	503	"""
	504	Load the state of the Invertor from a file,
	505	to be able to generate P(r) from a set of
	506	parameters.
	507	@param path: path of the file to load
	508	"""
	509	import os
	510	import re
	511	if os.path.isfile(path):
	512	try:
	513	fd = open(path, 'r')
	514
	515	buff = fd.read()
	516	lines = buff.split('\n')
	517	for line in lines:
	518	if line.startswith('#d_max='):
	519	toks = line.split('=')
	520	self.d_max = float(toks[1])
	521	elif line.startswith('#nfunc='):
	522	toks = line.split('=')
	523	self.nfunc = int(toks[1])
	524	self.out = numpy.zeros(self.nfunc)
	525	self.cov = numpy.zeros([self.nfunc, self.nfunc])
	526	elif line.startswith('#alpha='):
	527	toks = line.split('=')
	528	self.alpha = float(toks[1])
	529	elif line.startswith('#chi2='):
	530	toks = line.split('=')
	531	self.chi2 = float(toks[1])
	532	elif line.startswith('#elapsed='):
	533	toks = line.split('=')
	534	self.elapsed = float(toks[1])
	535	elif line.startswith('#alpha_estimate='):
	536	toks = line.split('=')
	537	self.suggested_alpha = float(toks[1])
	538
	539	# Now read in the parameters
	540	elif line.startswith('#C_'):
	541	toks = line.split('=')
	542	p = re.compile('#C_([0-9]+)')
	543	m = p.search(toks[0])
	544	toks2 = toks[1].split('+-')
	545	i = int(m.group(1))
	546	self.out[i] = float(toks2[0])
	547
	548	self.cov[i][i] = float(toks2[1])
	549
	550	except:
	551	raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
	552	else:
	553	raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path)
[2d06beb]	554
[eca05c8]	555
	556
[f71287f4]	557
[9e8dc22]	558	if __name__ == "__main__":
	559	o = Invertor()
	560
	561
	562
	563
	564

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/pr_inversion/invertor.py @ ab6098f

Download in other formats: