invertor.py @ e39640f

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since e39640f was e39640f, checked in by Mathieu Doucet <doucetm@…>, 16 years ago
pr_inversion 0.1
Property mode set to `100644`
File size: 21.6 KB

Line
1	"""
2	Module to perform P(r) inversion.
3	The module contains the Invertor class.
4	"""
5	from sans.pr.core.pr_inversion import Cinvertor
6	import numpy
7	import sys
8
9	def help():
10	"""
11	Provide general online help text
12	Future work: extend this function to allow topic selection
13	"""
14	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15	info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each base functions "
17	info_txt += "in the expansion is found by performing a least square fit with the "
18	info_txt += "following fit function:\n\n"
19	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 + Reg_term\n\n"
20	info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21	info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22	info_txt += "The Reg_term term is a regularization term set to the second derivative "
23	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24	info_txt += "The following are user inputs:\n\n"
25	info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26	info_txt += " - Regularization constant: a multiplicative constant to set the size of "
27	info_txt += "the regularization term.\n\n"
28	info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n"
29
30	return info_txt
31
32
33	class Invertor(Cinvertor):
34	"""
35	Invertor class to perform P(r) inversion
36
37	The problem is solved by posing the problem as Ax = b,
38	where x is the set of coefficients we are looking for.
39
40	Npts is the number of points.
41
42	In the following i refers to the ith base function coefficient.
43	The matrix has its entries j in its first Npts rows set to
44	A[j][i] = (Fourier transformed base function for point j)
45
46	We them choose a number of r-points, n_r, to evaluate the second
47	derivative of P(r) at. This is used as our regularization term.
48	For a vector r of length n_r, the following n_r rows are set to
49	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
50
51	The vector b has its first Npts entries set to
52	b[j] = (I(q) observed for point j)
53
54	The following n_r entries are set to zero.
55
56	The result is found by using scipy.linalg.basic.lstsq to invert
57	the matrix and find the coefficients x.
58
59	Methods inherited from Cinvertor:
60	- get_peaks(pars): returns the number of P(r) peaks
61	- oscillations(pars): returns the oscillation parameters for the output P(r)
62	- get_positive(pars): returns the fraction of P(r) that is above zero
63	- get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64	"""
65	## Chisqr of the last computation
66	chi2 = 0
67	## Time elapsed for last computation
68	elapsed = 0
69	## Alpha to get the reg term the same size as the signal
70	suggested_alpha = 0
71	## Last number of base functions used
72	nfunc = 0
73	## Last output values
74	out = None
75	## Last errors on output values
76	cov = None
77
78	def __init__(self):
79	Cinvertor.__init__(self)
80
81	def __setattr__(self, name, value):
82	"""
83	Set the value of an attribute.
84	Access the parent class methods for
85	x, y, err, d_max, q_min, q_max and alpha
86	"""
87	if name=='x':
88	if 0.0 in value:
89	raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
90	return self.set_x(value)
91	elif name=='y':
92	return self.set_y(value)
93	elif name=='err':
94	return self.set_err(value)
95	elif name=='d_max':
96	return self.set_dmax(value)
97	elif name=='q_min':
98	if value==None:
99	return self.set_qmin(-1.0)
100	return self.set_qmin(value)
101	elif name=='q_max':
102	if value==None:
103	return self.set_qmax(-1.0)
104	return self.set_qmax(value)
105	elif name=='alpha':
106	return self.set_alpha(value)
107
108	return Cinvertor.__setattr__(self, name, value)
109
110	def __getattr__(self, name):
111	"""
112	Return the value of an attribute
113	For the moment x, y, err and d_max are write-only
114	TODO: change that!
115	"""
116	import numpy
117	if name=='x':
118	out = numpy.ones(self.get_nx())
119	self.get_x(out)
120	return out
121	elif name=='y':
122	out = numpy.ones(self.get_ny())
123	self.get_y(out)
124	return out
125	elif name=='err':
126	out = numpy.ones(self.get_nerr())
127	self.get_err(out)
128	return out
129	elif name=='d_max':
130	return self.get_dmax()
131	elif name=='q_min':
132	qmin = self.get_qmin()
133	if qmin<0:
134	return None
135	return qmin
136	elif name=='q_max':
137	qmax = self.get_qmax()
138	if qmax<0:
139	return None
140	return qmax
141	elif name=='alpha':
142	return self.get_alpha()
143	elif name in self.__dict__:
144	return self.__dict__[name]
145	return None
146
147	def clone(self):
148	"""
149	Return a clone of this instance
150	"""
151	invertor = Invertor()
152	invertor.chi2 = self.chi2
153	invertor.elapsed = self.elapsed
154	invertor.alpha = self.alpha
155	invertor.d_max = self.d_max
156	invertor.q_min = self.q_min
157	invertor.q_max = self.q_max
158
159	invertor.x = self.x
160	invertor.y = self.y
161	invertor.err = self.err
162
163	return invertor
164
165	def invert(self, nfunc=10, nr=20):
166	"""
167	Perform inversion to P(r)
168
169	The problem is solved by posing the problem as Ax = b,
170	where x is the set of coefficients we are looking for.
171
172	Npts is the number of points.
173
174	In the following i refers to the ith base function coefficient.
175	The matrix has its entries j in its first Npts rows set to
176	A[i][j] = (Fourier transformed base function for point j)
177
178	We them choose a number of r-points, n_r, to evaluate the second
179	derivative of P(r) at. This is used as our regularization term.
180	For a vector r of length n_r, the following n_r rows are set to
181	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
182
183	The vector b has its first Npts entries set to
184	b[j] = (I(q) observed for point j)
185
186	The following n_r entries are set to zero.
187
188	The result is found by using scipy.linalg.basic.lstsq to invert
189	the matrix and find the coefficients x.
190
191	@param nfunc: number of base functions to use.
192	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
193	@return: c_out, c_cov - the coefficients with covariance matrix
194	"""
195	#TODO: call the pyhton implementation for now. In the future, translate this to C.
196	return self.lstsq(nfunc, nr=nr)
197
198	def invert_optimize(self, nfunc=10, nr=20):
199	"""
200	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
201
202	This probably produce more reliable results, but is much slower.
203	The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
204	where the reg_term is given by Svergun: it is the integral of the square of the first derivative
205	of P(r), d(P(r))/dr, integrated over the full range of r.
206
207	@param nfunc: number of base functions to use.
208	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
209	@return: c_out, c_cov - the coefficients with covariance matrix
210	"""
211
212	from scipy import optimize
213	import time
214
215	self.nfunc = nfunc
216	# First, check that the current data is valid
217	if self.is_valid()<=0:
218	raise RuntimeError, "Invertor.invert: Data array are of different length"
219
220	p = numpy.ones(nfunc)
221	t_0 = time.time()
222	out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
223
224	# Compute chi^2
225	res = self.residuals(out)
226	chisqr = 0
227	for i in range(len(res)):
228	chisqr += res[i]
229
230	self.chi2 = chisqr
231
232	# Store computation time
233	self.elapsed = time.time() - t_0
234
235	return out, cov_x
236
237	def pr_fit(self, nfunc=5):
238	"""
239	This is a direct fit to a given P(r). It assumes that the y data
240	is set to some P(r) distribution that we are trying to reproduce
241	with a set of base functions.
242
243	This method is provided as a test.
244	"""
245	from scipy import optimize
246
247	# First, check that the current data is valid
248	if self.is_valid()<=0:
249	raise RuntimeError, "Invertor.invert: Data arrays are of different length"
250
251	p = numpy.ones(nfunc)
252	t_0 = time.time()
253	out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
254
255	# Compute chi^2
256	res = self.pr_residuals(out)
257	chisqr = 0
258	for i in range(len(res)):
259	chisqr += res[i]
260
261	self.chisqr = chisqr
262
263	# Store computation time
264	self.elapsed = time.time() - t_0
265
266	return out, cov_x
267
268	def pr_err(self, c, c_cov, r):
269	"""
270	Returns the value of P(r) for a given r, and base function
271	coefficients, with error.
272
273	@param c: base function coefficients
274	@param c_cov: covariance matrice of the base function coefficients
275	@param r: r-value to evaluate P(r) at
276	@return: P(r)
277	"""
278	return self.get_pr_err(c, c_cov, r)
279
280	def _accept_q(self, q):
281	"""
282	Check q-value against user-defined range
283	"""
284	if not self.q_min==None and q<self.q_min:
285	return False
286	if not self.q_max==None and q>self.q_max:
287	return False
288	return True
289
290	def lstsq(self, nfunc=5, nr=20):
291	#TODO: do this on the C side
292	#
293	# To make sure an array is contiguous:
294	# blah = numpy.ascontiguousarray(blah_original)
295	# ... before passing it to C
296	"""
297	The problem is solved by posing the problem as Ax = b,
298	where x is the set of coefficients we are looking for.
299
300	Npts is the number of points.
301
302	In the following i refers to the ith base function coefficient.
303	The matrix has its entries j in its first Npts rows set to
304	A[i][j] = (Fourier transformed base function for point j)
305
306	We them choose a number of r-points, n_r, to evaluate the second
307	derivative of P(r) at. This is used as our regularization term.
308	For a vector r of length n_r, the following n_r rows are set to
309	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
310
311	The vector b has its first Npts entries set to
312	b[j] = (I(q) observed for point j)
313
314	The following n_r entries are set to zero.
315
316	The result is found by using scipy.linalg.basic.lstsq to invert
317	the matrix and find the coefficients x.
318
319	@param nfunc: number of base functions to use.
320	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
321	"""
322	import math
323	from scipy.linalg.basic import lstsq
324
325	self.nfunc = nfunc
326	# a -- An M x N matrix.
327	# b -- An M x nrhs matrix or M vector.
328	npts = len(self.x)
329	nq = nr
330	sqrt_alpha = math.sqrt(self.alpha)
331
332	a = numpy.zeros([npts+nq, nfunc])
333	b = numpy.zeros(npts+nq)
334	err = numpy.zeros(nfunc)
335
336	for j in range(nfunc):
337	for i in range(npts):
338	if self._accept_q(self.x[i]):
339	a[i][j] = self.basefunc_ft(self.d_max, j+1, self.x[i])/self.err[i]
340
341	#TODO: refactor this: i_q should really be i_r
342	for i_q in range(nq):
343	r = self.d_max/nq*i_q
344	#a[i_q+npts][j] = sqrt_alpha * 1.0/nqself.d_max2.0math.fabs(math.sin(math.pi(j+1)r/self.d_max) + math.pi(j+1)r/self.d_max math.cos(math.pi(j+1)r/self.d_max))
345	a[i_q+npts][j] = sqrt_alpha * 1.0/nqself.d_max2.0(2.0math.pi(j+1)/self.d_maxmath.cos(math.pi(j+1)r/self.d_max) + math.pi*2(j+1)*2r/self.d_max*2 math.sin(math.pi(j+1)r/self.d_max))
346
347	for i in range(npts):
348	if self._accept_q(self.x[i]):
349	b[i] = self.y[i]/self.err[i]
350
351	c, chi2, rank, n = lstsq(a, b)
352	self.chi2 = chi2
353
354	at = numpy.transpose(a)
355	inv_cov = numpy.zeros([nfunc,nfunc])
356	for i in range(nfunc):
357	for j in range(nfunc):
358	inv_cov[i][j] = 0.0
359	for k in range(npts+nr):
360	if self._accept_q(self.x[i]):
361	inv_cov[i][j] = at[i][k]*a[k][j]
362
363	# Compute the reg term size for the output
364	sum_sig = 0.0
365	sum_reg = 0.0
366	for j in range(nfunc):
367	for i in range(npts):
368	if self._accept_q(self.x[i]):
369	sum_sig += (a[i][j])**2
370	for i in range(nq):
371	sum_reg += (a[i+npts][j])**2
372
373	if math.fabs(self.alpha)>0:
374	new_alpha = sum_sig/(sum_reg/self.alpha)
375	else:
376	new_alpha = 0.0
377	self.suggested_alpha = new_alpha
378
379	try:
380	err = math.fabs(chi2/(npts-nfunc))* inv_cov
381	except:
382	print "Error estimating uncertainties"
383
384	# Keep a copy of the last output
385	self.out = c
386	self.cov = err
387
388	return c, err
389
390	def estimate_alpha(self, nfunc):
391	"""
392	Returns a reasonable guess for the
393	regularization constant alpha
394
395	@return: alpha, message, elapsed
396
397	where alpha is the estimate for alpha,
398	message is a message for the user,
399	elapsed is the computation time
400	"""
401	import time
402	try:
403	pr = self.clone()
404
405	# T_0 for computation time
406	starttime = time.time()
407	elapsed = 0
408
409	# If the current alpha is zero, try
410	# another value
411	if pr.alpha<=0:
412	pr.alpha = 0.0001
413
414	# Perform inversion to find the largest alpha
415	out, cov = pr.lstsq(nfunc)
416	elapsed = time.time()-starttime
417	initial_alpha = pr.alpha
418	initial_peaks = pr.get_peaks(out)
419
420	# Try the inversion with the estimated alpha
421	pr.alpha = pr.suggested_alpha
422	out, cov = pr.lstsq(nfunc)
423
424	npeaks = pr.get_peaks(out)
425	# if more than one peak to start with
426	# just return the estimate
427	if npeaks>1:
428	message = "Your P(r) is not smooth, please check your inversion parameters"
429	return pr.suggested_alpha, message, elapsed
430	else:
431
432	# Look at smaller values
433	# We assume that for the suggested alpha, we have 1 peak
434	# if not, send a message to change parameters
435	alpha = pr.suggested_alpha
436	best_alpha = pr.suggested_alpha
437	found = False
438	for i in range(10):
439	pr.alpha = (0.33)*(i+1)alpha
440	out, cov = pr.lstsq(nfunc)
441
442	peaks = pr.get_peaks(out)
443	if peaks>1:
444	found = True
445	break
446	best_alpha = pr.alpha
447
448	# If we didn't find a turning point for alpha and
449	# the initial alpha already had only one peak,
450	# just return that
451	if not found and initial_peaks==1 and initial_alpha<best_alpha:
452	best_alpha = initial_alpha
453
454	# Check whether the size makes sense
455	message=''
456
457	if not found:
458	message = "None"
459	elif best_alpha>=0.5*pr.suggested_alpha:
460	# best alpha is too big, return a
461	# reasonable value
462	message = "The estimated alpha for your system is too large. "
463	message += "Try increasing your maximum distance."
464
465	return best_alpha, message, elapsed
466
467	except:
468	message = "Invertor.estimate_alpha: %s" % sys.exc_value
469	return 0, message, elapsed
470
471
472	def to_file(self, path, npts=100):
473	"""
474	Save the state to a file that will be readable
475	by SliceView.
476	@param path: path of the file to write
477	@param npts: number of P(r) points to be written
478	"""
479	import pylab
480
481	file = open(path, 'w')
482	file.write("#d_max=%g\n" % self.d_max)
483	file.write("#nfunc=%g\n" % self.nfunc)
484	file.write("#alpha=%g\n" % self.alpha)
485	file.write("#chi2=%g\n" % self.chi2)
486	file.write("#elapsed=%g\n" % self.elapsed)
487	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
488	if not self.out==None:
489	if len(self.out)==len(self.cov):
490	for i in range(len(self.out)):
491	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
492	file.write("<r> <Pr> <dPr>\n")
493	r = pylab.arange(0.0, self.d_max, self.d_max/npts)
494
495	for r_i in r:
496	(value, err) = self.pr_err(self.out, self.cov, r_i)
497	file.write("%g %g %g\n" % (r_i, value, err))
498
499	file.close()
500
501
502	def from_file(self, path):
503	"""
504	Load the state of the Invertor from a file,
505	to be able to generate P(r) from a set of
506	parameters.
507	@param path: path of the file to load
508	"""
509	import os
510	import re
511	if os.path.isfile(path):
512	try:
513	fd = open(path, 'r')
514
515	buff = fd.read()
516	lines = buff.split('\n')
517	for line in lines:
518	if line.startswith('#d_max='):
519	toks = line.split('=')
520	self.d_max = float(toks[1])
521	elif line.startswith('#nfunc='):
522	toks = line.split('=')
523	self.nfunc = int(toks[1])
524	self.out = numpy.zeros(self.nfunc)
525	self.cov = numpy.zeros([self.nfunc, self.nfunc])
526	elif line.startswith('#alpha='):
527	toks = line.split('=')
528	self.alpha = float(toks[1])
529	elif line.startswith('#chi2='):
530	toks = line.split('=')
531	self.chi2 = float(toks[1])
532	elif line.startswith('#elapsed='):
533	toks = line.split('=')
534	self.elapsed = float(toks[1])
535	elif line.startswith('#alpha_estimate='):
536	toks = line.split('=')
537	self.suggested_alpha = float(toks[1])
538
539	# Now read in the parameters
540	elif line.startswith('#C_'):
541	toks = line.split('=')
542	p = re.compile('#C_([0-9]+)')
543	m = p.search(toks[0])
544	toks2 = toks[1].split('+-')
545	i = int(m.group(1))
546	self.out[i] = float(toks2[0])
547
548	self.cov[i][i] = float(toks2[1])
549
550	except:
551	raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
552	else:
553	raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path)
554
555
556
557
558	if __name__ == "__main__":
559	o = Invertor()
560
561
562
563
564

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/pr_inversion/invertor.py @ e39640f

Download in other formats: