invertor.py @ 1a5c946

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since 1a5c946 was 1a5c946, checked in by Mathieu Doucet <doucetm@…>, 16 years ago
Fixed bug in error estimation
Property mode set to `100644`
File size: 21.6 KB

Line
1	"""
2	Module to perform P(r) inversion.
3	The module contains the Invertor class.
4	"""
5	from sans.pr.core.pr_inversion import Cinvertor
6	import numpy
7	import sys
8
9	def help():
10	"""
11	Provide general online help text
12	Future work: extend this function to allow topic selection
13	"""
14	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15	info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each base functions "
17	info_txt += "in the expansion is found by performing a least square fit with the "
18	info_txt += "following fit function:\n\n"
19	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 + Reg_term\n\n"
20	info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21	info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22	info_txt += "The Reg_term term is a regularization term set to the second derivative "
23	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24	info_txt += "The following are user inputs:\n\n"
25	info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26	info_txt += " - Regularization constant: a multiplicative constant to set the size of "
27	info_txt += "the regularization term.\n\n"
28	info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n"
29
30	return info_txt
31
32
33	class Invertor(Cinvertor):
34	"""
35	Invertor class to perform P(r) inversion
36
37	The problem is solved by posing the problem as Ax = b,
38	where x is the set of coefficients we are looking for.
39
40	Npts is the number of points.
41
42	In the following i refers to the ith base function coefficient.
43	The matrix has its entries j in its first Npts rows set to
44	A[j][i] = (Fourier transformed base function for point j)
45
46	We them choose a number of r-points, n_r, to evaluate the second
47	derivative of P(r) at. This is used as our regularization term.
48	For a vector r of length n_r, the following n_r rows are set to
49	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
50
51	The vector b has its first Npts entries set to
52	b[j] = (I(q) observed for point j)
53
54	The following n_r entries are set to zero.
55
56	The result is found by using scipy.linalg.basic.lstsq to invert
57	the matrix and find the coefficients x.
58
59	Methods inherited from Cinvertor:
60	- get_peaks(pars): returns the number of P(r) peaks
61	- oscillations(pars): returns the oscillation parameters for the output P(r)
62	- get_positive(pars): returns the fraction of P(r) that is above zero
63	- get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64	"""
65	## Chisqr of the last computation
66	chi2 = 0
67	## Time elapsed for last computation
68	elapsed = 0
69	## Alpha to get the reg term the same size as the signal
70	suggested_alpha = 0
71	## Last number of base functions used
72	nfunc = 0
73	## Last output values
74	out = None
75	## Last errors on output values
76	cov = None
77
78	def __init__(self):
79	Cinvertor.__init__(self)
80
81	def __setattr__(self, name, value):
82	"""
83	Set the value of an attribute.
84	Access the parent class methods for
85	x, y, err, d_max, q_min, q_max and alpha
86	"""
87	if name=='x':
88	if 0.0 in value:
89	raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
90	return self.set_x(value)
91	elif name=='y':
92	return self.set_y(value)
93	elif name=='err':
94	return self.set_err(value)
95	elif name=='d_max':
96	return self.set_dmax(value)
97	elif name=='q_min':
98	if value==None:
99	return self.set_qmin(-1.0)
100	return self.set_qmin(value)
101	elif name=='q_max':
102	if value==None:
103	return self.set_qmax(-1.0)
104	return self.set_qmax(value)
105	elif name=='alpha':
106	return self.set_alpha(value)
107
108	return Cinvertor.__setattr__(self, name, value)
109
110	def __getattr__(self, name):
111	"""
112	Return the value of an attribute
113	For the moment x, y, err and d_max are write-only
114	TODO: change that!
115	"""
116	import numpy
117	if name=='x':
118	out = numpy.ones(self.get_nx())
119	self.get_x(out)
120	return out
121	elif name=='y':
122	out = numpy.ones(self.get_ny())
123	self.get_y(out)
124	return out
125	elif name=='err':
126	out = numpy.ones(self.get_nerr())
127	self.get_err(out)
128	return out
129	elif name=='d_max':
130	return self.get_dmax()
131	elif name=='q_min':
132	qmin = self.get_qmin()
133	if qmin<0:
134	return None
135	return qmin
136	elif name=='q_max':
137	qmax = self.get_qmax()
138	if qmax<0:
139	return None
140	return qmax
141	elif name=='alpha':
142	return self.get_alpha()
143	elif name in self.__dict__:
144	return self.__dict__[name]
145	return None
146
147	def clone(self):
148	"""
149	Return a clone of this instance
150	"""
151	invertor = Invertor()
152	invertor.chi2 = self.chi2
153	invertor.elapsed = self.elapsed
154	invertor.alpha = self.alpha
155	invertor.d_max = self.d_max
156	invertor.q_min = self.q_min
157	invertor.q_max = self.q_max
158
159	invertor.x = self.x
160	invertor.y = self.y
161	invertor.err = self.err
162
163	return invertor
164
165	def invert(self, nfunc=10, nr=20):
166	"""
167	Perform inversion to P(r)
168
169	The problem is solved by posing the problem as Ax = b,
170	where x is the set of coefficients we are looking for.
171
172	Npts is the number of points.
173
174	In the following i refers to the ith base function coefficient.
175	The matrix has its entries j in its first Npts rows set to
176	A[i][j] = (Fourier transformed base function for point j)
177
178	We them choose a number of r-points, n_r, to evaluate the second
179	derivative of P(r) at. This is used as our regularization term.
180	For a vector r of length n_r, the following n_r rows are set to
181	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
182
183	The vector b has its first Npts entries set to
184	b[j] = (I(q) observed for point j)
185
186	The following n_r entries are set to zero.
187
188	The result is found by using scipy.linalg.basic.lstsq to invert
189	the matrix and find the coefficients x.
190
191	@param nfunc: number of base functions to use.
192	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
193	@return: c_out, c_cov - the coefficients with covariance matrix
194	"""
195	#TODO: call the pyhton implementation for now. In the future, translate this to C.
196	return self.lstsq(nfunc, nr=nr)
197
198	def invert_optimize(self, nfunc=10, nr=20):
199	"""
200	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
201
202	This probably produce more reliable results, but is much slower.
203	The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
204	where the reg_term is given by Svergun: it is the integral of the square of the first derivative
205	of P(r), d(P(r))/dr, integrated over the full range of r.
206
207	@param nfunc: number of base functions to use.
208	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
209	@return: c_out, c_cov - the coefficients with covariance matrix
210	"""
211
212	from scipy import optimize
213	import time
214
215	self.nfunc = nfunc
216	# First, check that the current data is valid
217	if self.is_valid()<=0:
218	raise RuntimeError, "Invertor.invert: Data array are of different length"
219
220	p = numpy.ones(nfunc)
221	t_0 = time.time()
222	out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
223
224	# Compute chi^2
225	res = self.residuals(out)
226	chisqr = 0
227	for i in range(len(res)):
228	chisqr += res[i]
229
230	self.chi2 = chisqr
231
232	# Store computation time
233	self.elapsed = time.time() - t_0
234
235	return out, cov_x
236
237	def pr_fit(self, nfunc=5):
238	"""
239	This is a direct fit to a given P(r). It assumes that the y data
240	is set to some P(r) distribution that we are trying to reproduce
241	with a set of base functions.
242
243	This method is provided as a test.
244	"""
245	from scipy import optimize
246
247	# First, check that the current data is valid
248	if self.is_valid()<=0:
249	raise RuntimeError, "Invertor.invert: Data arrays are of different length"
250
251	p = numpy.ones(nfunc)
252	t_0 = time.time()
253	out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
254
255	# Compute chi^2
256	res = self.pr_residuals(out)
257	chisqr = 0
258	for i in range(len(res)):
259	chisqr += res[i]
260
261	self.chisqr = chisqr
262
263	# Store computation time
264	self.elapsed = time.time() - t_0
265
266	return out, cov_x
267
268	def pr_err(self, c, c_cov, r):
269	"""
270	Returns the value of P(r) for a given r, and base function
271	coefficients, with error.
272
273	@param c: base function coefficients
274	@param c_cov: covariance matrice of the base function coefficients
275	@param r: r-value to evaluate P(r) at
276	@return: P(r)
277	"""
278	return self.get_pr_err(c, c_cov, r)
279
280	def _accept_q(self, q):
281	"""
282	Check q-value against user-defined range
283	"""
284	if not self.q_min==None and q<self.q_min:
285	return False
286	if not self.q_max==None and q>self.q_max:
287	return False
288	return True
289
290	def lstsq(self, nfunc=5, nr=20):
291	#TODO: do this on the C side
292	#
293	# To make sure an array is contiguous:
294	# blah = numpy.ascontiguousarray(blah_original)
295	# ... before passing it to C
296	"""
297	The problem is solved by posing the problem as Ax = b,
298	where x is the set of coefficients we are looking for.
299
300	Npts is the number of points.
301
302	In the following i refers to the ith base function coefficient.
303	The matrix has its entries j in its first Npts rows set to
304	A[i][j] = (Fourier transformed base function for point j)
305
306	We them choose a number of r-points, n_r, to evaluate the second
307	derivative of P(r) at. This is used as our regularization term.
308	For a vector r of length n_r, the following n_r rows are set to
309	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
310
311	The vector b has its first Npts entries set to
312	b[j] = (I(q) observed for point j)
313
314	The following n_r entries are set to zero.
315
316	The result is found by using scipy.linalg.basic.lstsq to invert
317	the matrix and find the coefficients x.
318
319	@param nfunc: number of base functions to use.
320	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
321	"""
322	import math
323	from scipy.linalg.basic import lstsq
324
325	self.nfunc = nfunc
326	# a -- An M x N matrix.
327	# b -- An M x nrhs matrix or M vector.
328	npts = len(self.x)
329	nq = nr
330	sqrt_alpha = math.sqrt(self.alpha)
331
332	a = numpy.zeros([npts+nq, nfunc])
333	b = numpy.zeros(npts+nq)
334	err = numpy.zeros(nfunc)
335
336	for j in range(nfunc):
337	for i in range(npts):
338	if self._accept_q(self.x[i]):
339	a[i][j] = self.basefunc_ft(self.d_max, j+1, self.x[i])/self.err[i]
340
341	#TODO: refactor this: i_q should really be i_r
342	for i_q in range(nq):
343	r = self.d_max/nq*i_q
344	#a[i_q+npts][j] = sqrt_alpha * 1.0/nqself.d_max2.0math.fabs(math.sin(math.pi(j+1)r/self.d_max) + math.pi(j+1)r/self.d_max math.cos(math.pi(j+1)r/self.d_max))
345	a[i_q+npts][j] = sqrt_alpha * 1.0/nqself.d_max2.0(2.0math.pi(j+1)/self.d_maxmath.cos(math.pi(j+1)r/self.d_max) + math.pi*2(j+1)*2r/self.d_max*2 math.sin(math.pi(j+1)r/self.d_max))
346
347	for i in range(npts):
348	if self._accept_q(self.x[i]):
349	b[i] = self.y[i]/self.err[i]
350
351	c, chi2, rank, n = lstsq(a, b)
352	self.chi2 = chi2
353
354	at = numpy.transpose(a)
355	inv_cov = numpy.zeros([nfunc,nfunc])
356	for i in range(nfunc):
357	for j in range(nfunc):
358	inv_cov[i][j] = 0.0
359	for k in range(npts+nr):
360	if self._accept_q(self.x[i]):
361	inv_cov[i][j] = at[i][k]*a[k][j]
362
363	# Compute the reg term size for the output
364	sum_sig = 0.0
365	sum_reg = 0.0
366	for j in range(nfunc):
367	for i in range(npts):
368	if self._accept_q(self.x[i]):
369	sum_sig += (a[i][j])**2
370	for i in range(nq):
371	sum_reg += (a[i+npts][j])**2
372
373	if math.fabs(self.alpha)>0:
374	new_alpha = sum_sig/(sum_reg/self.alpha)
375	else:
376	new_alpha = 0.0
377	self.suggested_alpha = new_alpha
378
379	try:
380	err = math.fabs(chi2/(npts-nfunc))* inv_cov
381	except:
382	print "Error estimating uncertainties"
383
384	# Keep a copy of the last output
385	self.out = c
386	self.cov = err
387
388	return c, err
389
390	def estimate_alpha(self, nfunc):
391	"""
392	Returns a reasonable guess for the
393	regularization constant alpha
394
395	@return: alpha, message, elapsed
396
397	where alpha is the estimate for alpha,
398	message is a message for the user,
399	elapsed is the computation time
400	"""
401	import time
402	try:
403	pr = self.clone()
404
405	# T_0 for computation time
406	starttime = time.time()
407
408	# If the current alpha is zero, try
409	# another value
410	if pr.alpha<=0:
411	pr.alpha = 0.0001
412
413	# Perform inversion to find the largest alpha
414	out, cov = pr.lstsq(nfunc)
415	elapsed = time.time()-starttime
416	initial_alpha = pr.alpha
417	initial_peaks = pr.get_peaks(out)
418
419	# Try the inversion with the estimated alpha
420	pr.alpha = pr.suggested_alpha
421	out, cov = pr.lstsq(nfunc)
422
423	npeaks = pr.get_peaks(out)
424	# if more than one peak to start with
425	# just return the estimate
426	if npeaks>1:
427	message = "Your P(r) is not smooth, please check your inversion parameters"
428	return pr.suggested_alpha, message, elapsed
429	else:
430
431	# Look at smaller values
432	# We assume that for the suggested alpha, we have 1 peak
433	# if not, send a message to change parameters
434	alpha = pr.suggested_alpha
435	best_alpha = pr.suggested_alpha
436	found = False
437	for i in range(10):
438	pr.alpha = (0.33)*(i+1)alpha
439	out, cov = pr.lstsq(nfunc)
440
441	peaks = pr.get_peaks(out)
442	if peaks>1:
443	found = True
444	break
445	best_alpha = pr.alpha
446
447	# If we didn't find a turning point for alpha and
448	# the initial alpha already had only one peak,
449	# just return that
450	if not found and initial_peaks==1 and initial_alpha<best_alpha:
451	best_alpha = initial_alpha
452
453	# Check whether the size makes sense
454	message=''
455
456	if not found:
457	message = "None"
458	elif best_alpha>=0.5*pr.suggested_alpha:
459	# best alpha is too big, return a
460	# reasonable value
461	message = "The estimated alpha for your system is too large. "
462	message += "Try increasing your maximum distance."
463
464	return best_alpha, message, elapsed
465
466	except:
467	message = "Invertor.estimate_alpha: %s" % sys.exc_value
468	return 0, message, elapsed
469
470
471	def to_file(self, path, npts=100):
472	"""
473	Save the state to a file that will be readable
474	by SliceView.
475	@param path: path of the file to write
476	@param npts: number of P(r) points to be written
477	"""
478	import pylab
479
480	file = open(path, 'w')
481	file.write("#d_max=%g\n" % self.d_max)
482	file.write("#nfunc=%g\n" % self.nfunc)
483	file.write("#alpha=%g\n" % self.alpha)
484	file.write("#chi2=%g\n" % self.chi2)
485	file.write("#elapsed=%g\n" % self.elapsed)
486	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
487	if not self.out==None:
488	if len(self.out)==len(self.cov):
489	for i in range(len(self.out)):
490	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
491	file.write("<r> <Pr> <dPr>\n")
492	r = pylab.arange(0.0, self.d_max, self.d_max/npts)
493
494	for r_i in r:
495	(value, err) = self.pr_err(self.out, self.cov, r_i)
496	file.write("%g %g %g\n" % (r_i, value, err))
497
498	file.close()
499
500
501	def from_file(self, path):
502	"""
503	Load the state of the Invertor from a file,
504	to be able to generate P(r) from a set of
505	parameters.
506	@param path: path of the file to load
507	"""
508	import os
509	import re
510	if os.path.isfile(path):
511	try:
512	fd = open(path, 'r')
513
514	buff = fd.read()
515	lines = buff.split('\n')
516	for line in lines:
517	if line.startswith('#d_max='):
518	toks = line.split('=')
519	self.d_max = float(toks[1])
520	elif line.startswith('#nfunc='):
521	toks = line.split('=')
522	self.nfunc = int(toks[1])
523	self.out = numpy.zeros(self.nfunc)
524	self.cov = numpy.zeros([self.nfunc, self.nfunc])
525	elif line.startswith('#alpha='):
526	toks = line.split('=')
527	self.alpha = float(toks[1])
528	elif line.startswith('#chi2='):
529	toks = line.split('=')
530	self.chi2 = float(toks[1])
531	elif line.startswith('#elapsed='):
532	toks = line.split('=')
533	self.elapsed = float(toks[1])
534	elif line.startswith('#alpha_estimate='):
535	toks = line.split('=')
536	self.suggested_alpha = float(toks[1])
537
538	# Now read in the parameters
539	elif line.startswith('#C_'):
540	toks = line.split('=')
541	p = re.compile('#C_([0-9]+)')
542	m = p.search(toks[0])
543	toks2 = toks[1].split('+-')
544	i = int(m.group(1))
545	self.out[i] = float(toks2[0])
546
547	self.cov[i][i] = float(toks2[1])
548
549	except:
550	raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
551	else:
552	raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path)
553
554
555
556
557	if __name__ == "__main__":
558	o = Invertor()
559
560
561
562
563

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/pr_inversion/invertor.py @ 1a5c946

Download in other formats: