invertor.py @ 7d3b2ba

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since 7d3b2ba was b00b487, checked in by Mathieu Doucet <doucetm@…>, 16 years ago
Dealt with error conditions, fixed the uncertainty on the output.
Property mode set to `100644`
File size: 22.3 KB

Line
1	"""
2	Module to perform P(r) inversion.
3	The module contains the Invertor class.
4	"""
5	from sans.pr.core.pr_inversion import Cinvertor
6	import numpy
7	import sys
8
9	def help():
10	"""
11	Provide general online help text
12	Future work: extend this function to allow topic selection
13	"""
14	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15	info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each base functions "
17	info_txt += "in the expansion is found by performing a least square fit with the "
18	info_txt += "following fit function:\n\n"
19	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 + Reg_term\n\n"
20	info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21	info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22	info_txt += "The Reg_term term is a regularization term set to the second derivative "
23	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24	info_txt += "The following are user inputs:\n\n"
25	info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26	info_txt += " - Regularization constant: a multiplicative constant to set the size of "
27	info_txt += "the regularization term.\n\n"
28	info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n"
29
30	return info_txt
31
32
33	class Invertor(Cinvertor):
34	"""
35	Invertor class to perform P(r) inversion
36
37	The problem is solved by posing the problem as Ax = b,
38	where x is the set of coefficients we are looking for.
39
40	Npts is the number of points.
41
42	In the following i refers to the ith base function coefficient.
43	The matrix has its entries j in its first Npts rows set to
44	A[j][i] = (Fourier transformed base function for point j)
45
46	We them choose a number of r-points, n_r, to evaluate the second
47	derivative of P(r) at. This is used as our regularization term.
48	For a vector r of length n_r, the following n_r rows are set to
49	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
50
51	The vector b has its first Npts entries set to
52	b[j] = (I(q) observed for point j)
53
54	The following n_r entries are set to zero.
55
56	The result is found by using scipy.linalg.basic.lstsq to invert
57	the matrix and find the coefficients x.
58
59	Methods inherited from Cinvertor:
60	- get_peaks(pars): returns the number of P(r) peaks
61	- oscillations(pars): returns the oscillation parameters for the output P(r)
62	- get_positive(pars): returns the fraction of P(r) that is above zero
63	- get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64	"""
65	## Chisqr of the last computation
66	chi2 = 0
67	## Time elapsed for last computation
68	elapsed = 0
69	## Alpha to get the reg term the same size as the signal
70	suggested_alpha = 0
71	## Last number of base functions used
72	nfunc = 0
73	## Last output values
74	out = None
75	## Last errors on output values
76	cov = None
77
78	def __init__(self):
79	Cinvertor.__init__(self)
80
81	def __setattr__(self, name, value):
82	"""
83	Set the value of an attribute.
84	Access the parent class methods for
85	x, y, err, d_max, q_min, q_max and alpha
86	"""
87	if name=='x':
88	if 0.0 in value:
89	raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
90	return self.set_x(value)
91	elif name=='y':
92	return self.set_y(value)
93	elif name=='err':
94	value2 = abs(value)
95	return self.set_err(value2)
96	elif name=='d_max':
97	return self.set_dmax(value)
98	elif name=='q_min':
99	if value==None:
100	return self.set_qmin(-1.0)
101	return self.set_qmin(value)
102	elif name=='q_max':
103	if value==None:
104	return self.set_qmax(-1.0)
105	return self.set_qmax(value)
106	elif name=='alpha':
107	return self.set_alpha(value)
108
109	return Cinvertor.__setattr__(self, name, value)
110
111	def __getattr__(self, name):
112	"""
113	Return the value of an attribute
114	For the moment x, y, err and d_max are write-only
115	TODO: change that!
116	"""
117	import numpy
118	if name=='x':
119	out = numpy.ones(self.get_nx())
120	self.get_x(out)
121	return out
122	elif name=='y':
123	out = numpy.ones(self.get_ny())
124	self.get_y(out)
125	return out
126	elif name=='err':
127	out = numpy.ones(self.get_nerr())
128	self.get_err(out)
129	return out
130	elif name=='d_max':
131	return self.get_dmax()
132	elif name=='q_min':
133	qmin = self.get_qmin()
134	if qmin<0:
135	return None
136	return qmin
137	elif name=='q_max':
138	qmax = self.get_qmax()
139	if qmax<0:
140	return None
141	return qmax
142	elif name=='alpha':
143	return self.get_alpha()
144	elif name in self.__dict__:
145	return self.__dict__[name]
146	return None
147
148	def clone(self):
149	"""
150	Return a clone of this instance
151	"""
152	invertor = Invertor()
153	invertor.chi2 = self.chi2
154	invertor.elapsed = self.elapsed
155	invertor.alpha = self.alpha
156	invertor.d_max = self.d_max
157	invertor.q_min = self.q_min
158	invertor.q_max = self.q_max
159
160	invertor.x = self.x
161	invertor.y = self.y
162	invertor.err = self.err
163
164	return invertor
165
166	def invert(self, nfunc=10, nr=20):
167	"""
168	Perform inversion to P(r)
169
170	The problem is solved by posing the problem as Ax = b,
171	where x is the set of coefficients we are looking for.
172
173	Npts is the number of points.
174
175	In the following i refers to the ith base function coefficient.
176	The matrix has its entries j in its first Npts rows set to
177	A[i][j] = (Fourier transformed base function for point j)
178
179	We them choose a number of r-points, n_r, to evaluate the second
180	derivative of P(r) at. This is used as our regularization term.
181	For a vector r of length n_r, the following n_r rows are set to
182	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
183
184	The vector b has its first Npts entries set to
185	b[j] = (I(q) observed for point j)
186
187	The following n_r entries are set to zero.
188
189	The result is found by using scipy.linalg.basic.lstsq to invert
190	the matrix and find the coefficients x.
191
192	@param nfunc: number of base functions to use.
193	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
194	@return: c_out, c_cov - the coefficients with covariance matrix
195	"""
196	#TODO: call the pyhton implementation for now. In the future, translate this to C.
197	return self.lstsq(nfunc, nr=nr)
198
199	def invert_optimize(self, nfunc=10, nr=20):
200	"""
201	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
202
203	This probably produce more reliable results, but is much slower.
204	The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
205	where the reg_term is given by Svergun: it is the integral of the square of the first derivative
206	of P(r), d(P(r))/dr, integrated over the full range of r.
207
208	@param nfunc: number of base functions to use.
209	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
210	@return: c_out, c_cov - the coefficients with covariance matrix
211	"""
212
213	from scipy import optimize
214	import time
215
216	self.nfunc = nfunc
217	# First, check that the current data is valid
218	if self.is_valid()<=0:
219	raise RuntimeError, "Invertor.invert: Data array are of different length"
220
221	p = numpy.ones(nfunc)
222	t_0 = time.time()
223	out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
224
225	# Compute chi^2
226	res = self.residuals(out)
227	chisqr = 0
228	for i in range(len(res)):
229	chisqr += res[i]
230
231	self.chi2 = chisqr
232
233	# Store computation time
234	self.elapsed = time.time() - t_0
235
236	return out, cov_x
237
238	def pr_fit(self, nfunc=5):
239	"""
240	This is a direct fit to a given P(r). It assumes that the y data
241	is set to some P(r) distribution that we are trying to reproduce
242	with a set of base functions.
243
244	This method is provided as a test.
245	"""
246	from scipy import optimize
247
248	# First, check that the current data is valid
249	if self.is_valid()<=0:
250	raise RuntimeError, "Invertor.invert: Data arrays are of different length"
251
252	p = numpy.ones(nfunc)
253	t_0 = time.time()
254	out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
255
256	# Compute chi^2
257	res = self.pr_residuals(out)
258	chisqr = 0
259	for i in range(len(res)):
260	chisqr += res[i]
261
262	self.chisqr = chisqr
263
264	# Store computation time
265	self.elapsed = time.time() - t_0
266
267	return out, cov_x
268
269	def pr_err(self, c, c_cov, r):
270	"""
271	Returns the value of P(r) for a given r, and base function
272	coefficients, with error.
273
274	@param c: base function coefficients
275	@param c_cov: covariance matrice of the base function coefficients
276	@param r: r-value to evaluate P(r) at
277	@return: P(r)
278	"""
279	return self.get_pr_err(c, c_cov, r)
280
281	def _accept_q(self, q):
282	"""
283	Check q-value against user-defined range
284	"""
285	if not self.q_min==None and q<self.q_min:
286	return False
287	if not self.q_max==None and q>self.q_max:
288	return False
289	return True
290
291	def lstsq(self, nfunc=5, nr=20):
292	#TODO: do this on the C side
293	#
294	# To make sure an array is contiguous:
295	# blah = numpy.ascontiguousarray(blah_original)
296	# ... before passing it to C
297	"""
298	The problem is solved by posing the problem as Ax = b,
299	where x is the set of coefficients we are looking for.
300
301	Npts is the number of points.
302
303	In the following i refers to the ith base function coefficient.
304	The matrix has its entries j in its first Npts rows set to
305	A[i][j] = (Fourier transformed base function for point j)
306
307	We them choose a number of r-points, n_r, to evaluate the second
308	derivative of P(r) at. This is used as our regularization term.
309	For a vector r of length n_r, the following n_r rows are set to
310	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
311
312	The vector b has its first Npts entries set to
313	b[j] = (I(q) observed for point j)
314
315	The following n_r entries are set to zero.
316
317	The result is found by using scipy.linalg.basic.lstsq to invert
318	the matrix and find the coefficients x.
319
320	@param nfunc: number of base functions to use.
321	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
322
323	If the result does not allow us to compute the covariance matrix,
324	a matrix filled with zeros will be returned.
325
326	"""
327	import math
328	from scipy.linalg.basic import lstsq
329
330	if self.is_valid()<0:
331	raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
332
333	self.nfunc = nfunc
334	# a -- An M x N matrix.
335	# b -- An M x nrhs matrix or M vector.
336	npts = len(self.x)
337	nq = nr
338	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
339	if sqrt_alpha<0.0:
340	nq = 0
341
342	a = numpy.zeros([npts+nq, nfunc])
343	b = numpy.zeros(npts+nq)
344	err = numpy.zeros([nfunc, nfunc])
345
346	for j in range(nfunc):
347	for i in range(npts):
348	if self._accept_q(self.x[i]):
349	a[i][j] = self.basefunc_ft(self.d_max, j+1, self.x[i])/self.err[i]
350
351	#TODO: refactor this: i_q should really be i_r
352	for i_q in range(nq):
353	r = self.d_max/nq*i_q
354	#a[i_q+npts][j] = sqrt_alpha * 1.0/nqself.d_max2.0math.fabs(math.sin(math.pi(j+1)r/self.d_max) + math.pi(j+1)r/self.d_max math.cos(math.pi(j+1)r/self.d_max))
355	a[i_q+npts][j] = sqrt_alpha * 1.0/nqself.d_max2.0(2.0math.pi(j+1)/self.d_maxmath.cos(math.pi(j+1)r/self.d_max) + math.pi*2(j+1)*2r/self.d_max*2 math.sin(math.pi(j+1)r/self.d_max))
356
357	for i in range(npts):
358	if self._accept_q(self.x[i]):
359	b[i] = self.y[i]/self.err[i]
360
361	c, chi2, rank, n = lstsq(a, b)
362	# Sanity check
363	try:
364	float(chi2)
365	except:
366	chi2 = -1.0
367	self.chi2 = chi2
368
369	at = numpy.transpose(a)
370	inv_cov = numpy.zeros([nfunc,nfunc])
371	for i in range(nfunc):
372	for j in range(nfunc):
373	inv_cov[i][j] = 0.0
374	for k in range(npts+nr):
375	#if self._accept_q(self.x[i]):
376	inv_cov[i][j] += at[i][k]*a[k][j]
377
378	# Compute the reg term size for the output
379	sum_sig = 0.0
380	sum_reg = 0.0
381	for j in range(nfunc):
382	for i in range(npts):
383	if self._accept_q(self.x[i]):
384	sum_sig += (a[i][j])**2
385	for i in range(nq):
386	sum_reg += (a[i+npts][j])**2
387
388	if math.fabs(self.alpha)>0:
389	new_alpha = sum_sig/(sum_reg/self.alpha)
390	else:
391	new_alpha = 0.0
392	self.suggested_alpha = new_alpha
393
394	try:
395	cov = numpy.linalg.pinv(inv_cov)
396	err = math.fabs(chi2/float(npts-nfunc)) * cov
397	except:
398	# We were not able to estimate the errors,
399	# returns an empty covariance matrix
400	print "lstsq:", sys.exc_value
401	print chi2
402	pass
403
404	# Keep a copy of the last output
405	self.out = c
406	self.cov = err
407
408	return c, err
409
410	def estimate_alpha(self, nfunc):
411	"""
412	Returns a reasonable guess for the
413	regularization constant alpha
414
415	@return: alpha, message, elapsed
416
417	where alpha is the estimate for alpha,
418	message is a message for the user,
419	elapsed is the computation time
420	"""
421	import time
422	try:
423	pr = self.clone()
424
425	# T_0 for computation time
426	starttime = time.time()
427	elapsed = 0
428
429	# If the current alpha is zero, try
430	# another value
431	if pr.alpha<=0:
432	pr.alpha = 0.0001
433
434	# Perform inversion to find the largest alpha
435	out, cov = pr.lstsq(nfunc)
436	elapsed = time.time()-starttime
437	initial_alpha = pr.alpha
438	initial_peaks = pr.get_peaks(out)
439
440	# Try the inversion with the estimated alpha
441	pr.alpha = pr.suggested_alpha
442	out, cov = pr.lstsq(nfunc)
443
444	npeaks = pr.get_peaks(out)
445	# if more than one peak to start with
446	# just return the estimate
447	if npeaks>1:
448	message = "Your P(r) is not smooth, please check your inversion parameters"
449	return pr.suggested_alpha, message, elapsed
450	else:
451
452	# Look at smaller values
453	# We assume that for the suggested alpha, we have 1 peak
454	# if not, send a message to change parameters
455	alpha = pr.suggested_alpha
456	best_alpha = pr.suggested_alpha
457	found = False
458	for i in range(10):
459	pr.alpha = (0.33)*(i+1)alpha
460	out, cov = pr.lstsq(nfunc)
461
462	peaks = pr.get_peaks(out)
463	if peaks>1:
464	found = True
465	break
466	best_alpha = pr.alpha
467
468	# If we didn't find a turning point for alpha and
469	# the initial alpha already had only one peak,
470	# just return that
471	if not found and initial_peaks==1 and initial_alpha<best_alpha:
472	best_alpha = initial_alpha
473
474	# Check whether the size makes sense
475	message=''
476
477	if not found:
478	message = "None"
479	elif best_alpha>=0.5*pr.suggested_alpha:
480	# best alpha is too big, return a
481	# reasonable value
482	message = "The estimated alpha for your system is too large. "
483	message += "Try increasing your maximum distance."
484
485	return best_alpha, message, elapsed
486
487	except:
488	message = "Invertor.estimate_alpha: %s" % sys.exc_value
489	return 0, message, elapsed
490
491
492	def to_file(self, path, npts=100):
493	"""
494	Save the state to a file that will be readable
495	by SliceView.
496	@param path: path of the file to write
497	@param npts: number of P(r) points to be written
498	"""
499	import pylab
500
501	file = open(path, 'w')
502	file.write("#d_max=%g\n" % self.d_max)
503	file.write("#nfunc=%g\n" % self.nfunc)
504	file.write("#alpha=%g\n" % self.alpha)
505	file.write("#chi2=%g\n" % self.chi2)
506	file.write("#elapsed=%g\n" % self.elapsed)
507	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
508	if not self.out==None:
509	if len(self.out)==len(self.cov):
510	for i in range(len(self.out)):
511	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
512	file.write("<r> <Pr> <dPr>\n")
513	r = pylab.arange(0.0, self.d_max, self.d_max/npts)
514
515	for r_i in r:
516	(value, err) = self.pr_err(self.out, self.cov, r_i)
517	file.write("%g %g %g\n" % (r_i, value, err))
518
519	file.close()
520
521
522	def from_file(self, path):
523	"""
524	Load the state of the Invertor from a file,
525	to be able to generate P(r) from a set of
526	parameters.
527	@param path: path of the file to load
528	"""
529	import os
530	import re
531	if os.path.isfile(path):
532	try:
533	fd = open(path, 'r')
534
535	buff = fd.read()
536	lines = buff.split('\n')
537	for line in lines:
538	if line.startswith('#d_max='):
539	toks = line.split('=')
540	self.d_max = float(toks[1])
541	elif line.startswith('#nfunc='):
542	toks = line.split('=')
543	self.nfunc = int(toks[1])
544	self.out = numpy.zeros(self.nfunc)
545	self.cov = numpy.zeros([self.nfunc, self.nfunc])
546	elif line.startswith('#alpha='):
547	toks = line.split('=')
548	self.alpha = float(toks[1])
549	elif line.startswith('#chi2='):
550	toks = line.split('=')
551	self.chi2 = float(toks[1])
552	elif line.startswith('#elapsed='):
553	toks = line.split('=')
554	self.elapsed = float(toks[1])
555	elif line.startswith('#alpha_estimate='):
556	toks = line.split('=')
557	self.suggested_alpha = float(toks[1])
558
559	# Now read in the parameters
560	elif line.startswith('#C_'):
561	toks = line.split('=')
562	p = re.compile('#C_([0-9]+)')
563	m = p.search(toks[0])
564	toks2 = toks[1].split('+-')
565	i = int(m.group(1))
566	self.out[i] = float(toks2[0])
567
568	self.cov[i][i] = float(toks2[1])
569
570	except:
571	raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
572	else:
573	raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path)
574
575
576
577
578	if __name__ == "__main__":
579	o = Invertor()
580
581
582
583
584

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/pr_inversion/invertor.py @ 7d3b2ba

Download in other formats: