invertor.py @ 3b69ca6

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since 3b69ca6 was 6e0f53a, checked in by Mathieu Doucet <doucetm@…>, 16 years ago
Minor change
Property mode set to `100644`
File size: 25.5 KB

Line
1	"""
2	Module to perform P(r) inversion.
3	The module contains the Invertor class.
4	"""
5	from sans.pr.core.pr_inversion import Cinvertor
6	import numpy
7	import sys
8	import math, time
9	from scipy.linalg.basic import lstsq
10
11	def help():
12	"""
13	Provide general online help text
14	Future work: extend this function to allow topic selection
15	"""
16	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
17	info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
18	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each base functions "
19	info_txt += "in the expansion is found by performing a least square fit with the "
20	info_txt += "following fit function:\n\n"
21	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 + Reg_term\n\n"
22	info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
23	info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
24	info_txt += "The Reg_term term is a regularization term set to the second derivative "
25	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
26	info_txt += "The following are user inputs:\n\n"
27	info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n"
28	info_txt += " - Regularization constant: a multiplicative constant to set the size of "
29	info_txt += "the regularization term.\n\n"
30	info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n"
31
32	return info_txt
33
34
35	class Invertor(Cinvertor):
36	"""
37	Invertor class to perform P(r) inversion
38
39	The problem is solved by posing the problem as Ax = b,
40	where x is the set of coefficients we are looking for.
41
42	Npts is the number of points.
43
44	In the following i refers to the ith base function coefficient.
45	The matrix has its entries j in its first Npts rows set to
46	A[j][i] = (Fourier transformed base function for point j)
47
48	We them choose a number of r-points, n_r, to evaluate the second
49	derivative of P(r) at. This is used as our regularization term.
50	For a vector r of length n_r, the following n_r rows are set to
51	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
52
53	The vector b has its first Npts entries set to
54	b[j] = (I(q) observed for point j)
55
56	The following n_r entries are set to zero.
57
58	The result is found by using scipy.linalg.basic.lstsq to invert
59	the matrix and find the coefficients x.
60
61	Methods inherited from Cinvertor:
62	- get_peaks(pars): returns the number of P(r) peaks
63	- oscillations(pars): returns the oscillation parameters for the output P(r)
64	- get_positive(pars): returns the fraction of P(r) that is above zero
65	- get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
66	"""
67	## Chisqr of the last computation
68	chi2 = 0
69	## Time elapsed for last computation
70	elapsed = 0
71	## Alpha to get the reg term the same size as the signal
72	suggested_alpha = 0
73	## Last number of base functions used
74	nfunc = 10
75	## Last output values
76	out = None
77	## Last errors on output values
78	cov = None
79	## Background value
80	background = 0
81
82
83	def __init__(self):
84	Cinvertor.__init__(self)
85
86	def __setattr__(self, name, value):
87	"""
88	Set the value of an attribute.
89	Access the parent class methods for
90	x, y, err, d_max, q_min, q_max and alpha
91	"""
92	if name=='x':
93	if 0.0 in value:
94	raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
95	return self.set_x(value)
96	elif name=='y':
97	return self.set_y(value)
98	elif name=='err':
99	value2 = abs(value)
100	return self.set_err(value2)
101	elif name=='d_max':
102	return self.set_dmax(value)
103	elif name=='q_min':
104	if value==None:
105	return self.set_qmin(-1.0)
106	return self.set_qmin(value)
107	elif name=='q_max':
108	if value==None:
109	return self.set_qmax(-1.0)
110	return self.set_qmax(value)
111	elif name=='alpha':
112	return self.set_alpha(value)
113	elif name=='slit_height':
114	return self.set_slit_height(value)
115	elif name=='slit_width':
116	return self.set_slit_width(value)
117	elif name=='has_bck':
118	if value==True:
119	return self.set_has_bck(1)
120	elif value==False:
121	return self.set_has_bck(0)
122	else:
123	raise ValueError, "Invertor: has_bck can only be True or False"
124
125	return Cinvertor.__setattr__(self, name, value)
126
127	def __getattr__(self, name):
128	"""
129	Return the value of an attribute
130	"""
131	import numpy
132	if name=='x':
133	out = numpy.ones(self.get_nx())
134	self.get_x(out)
135	return out
136	elif name=='y':
137	out = numpy.ones(self.get_ny())
138	self.get_y(out)
139	return out
140	elif name=='err':
141	out = numpy.ones(self.get_nerr())
142	self.get_err(out)
143	return out
144	elif name=='d_max':
145	return self.get_dmax()
146	elif name=='q_min':
147	qmin = self.get_qmin()
148	if qmin<0:
149	return None
150	return qmin
151	elif name=='q_max':
152	qmax = self.get_qmax()
153	if qmax<0:
154	return None
155	return qmax
156	elif name=='alpha':
157	return self.get_alpha()
158	elif name=='slit_height':
159	return self.get_slit_height()
160	elif name=='slit_width':
161	return self.get_slit_width()
162	elif name=='has_bck':
163	value = self.get_has_bck()
164	if value==1:
165	return True
166	else:
167	return False
168	elif name in self.__dict__:
169	return self.__dict__[name]
170	return None
171
172	def clone(self):
173	"""
174	Return a clone of this instance
175	"""
176	invertor = Invertor()
177	invertor.chi2 = self.chi2
178	invertor.elapsed = self.elapsed
179	invertor.nfunc = self.nfunc
180	invertor.alpha = self.alpha
181	invertor.d_max = self.d_max
182	invertor.q_min = self.q_min
183	invertor.q_max = self.q_max
184
185	invertor.x = self.x
186	invertor.y = self.y
187	invertor.err = self.err
188	invertor.has_bck = self.has_bck
189	invertor.slit_height = self.slit_height
190	invertor.slit_width = self.slit_width
191
192	return invertor
193
194	def invert(self, nfunc=10, nr=20):
195	"""
196	Perform inversion to P(r)
197
198	The problem is solved by posing the problem as Ax = b,
199	where x is the set of coefficients we are looking for.
200
201	Npts is the number of points.
202
203	In the following i refers to the ith base function coefficient.
204	The matrix has its entries j in its first Npts rows set to
205	A[i][j] = (Fourier transformed base function for point j)
206
207	We them choose a number of r-points, n_r, to evaluate the second
208	derivative of P(r) at. This is used as our regularization term.
209	For a vector r of length n_r, the following n_r rows are set to
210	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
211
212	The vector b has its first Npts entries set to
213	b[j] = (I(q) observed for point j)
214
215	The following n_r entries are set to zero.
216
217	The result is found by using scipy.linalg.basic.lstsq to invert
218	the matrix and find the coefficients x.
219
220	@param nfunc: number of base functions to use.
221	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
222	@return: c_out, c_cov - the coefficients with covariance matrix
223	"""
224	# Reset the background value before proceeding
225	self.background = 0.0
226	return self.lstsq(nfunc, nr=nr)
227
228	def iq(self, out, q):
229	"""
230	Function to call to evaluate the scattering intensity
231	@param args: c-parameters, and q
232	@return: I(q)
233	"""
234	return Cinvertor.iq(self, out, q)+self.background
235
236	def invert_optimize(self, nfunc=10, nr=20):
237	"""
238	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
239
240	This probably produce more reliable results, but is much slower.
241	The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
242	where the reg_term is given by Svergun: it is the integral of the square of the first derivative
243	of P(r), d(P(r))/dr, integrated over the full range of r.
244
245	@param nfunc: number of base functions to use.
246	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
247	@return: c_out, c_cov - the coefficients with covariance matrix
248	"""
249
250	from scipy import optimize
251	import time
252
253	self.nfunc = nfunc
254	# First, check that the current data is valid
255	if self.is_valid()<=0:
256	raise RuntimeError, "Invertor.invert: Data array are of different length"
257
258	p = numpy.ones(nfunc)
259	t_0 = time.time()
260	out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
261
262	# Compute chi^2
263	res = self.residuals(out)
264	chisqr = 0
265	for i in range(len(res)):
266	chisqr += res[i]
267
268	self.chi2 = chisqr
269
270	# Store computation time
271	self.elapsed = time.time() - t_0
272
273	return out, cov_x
274
275	def pr_fit(self, nfunc=5):
276	"""
277	This is a direct fit to a given P(r). It assumes that the y data
278	is set to some P(r) distribution that we are trying to reproduce
279	with a set of base functions.
280
281	This method is provided as a test.
282	"""
283	from scipy import optimize
284
285	# First, check that the current data is valid
286	if self.is_valid()<=0:
287	raise RuntimeError, "Invertor.invert: Data arrays are of different length"
288
289	p = numpy.ones(nfunc)
290	t_0 = time.time()
291	out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
292
293	# Compute chi^2
294	res = self.pr_residuals(out)
295	chisqr = 0
296	for i in range(len(res)):
297	chisqr += res[i]
298
299	self.chisqr = chisqr
300
301	# Store computation time
302	self.elapsed = time.time() - t_0
303
304	return out, cov_x
305
306	def pr_err(self, c, c_cov, r):
307	"""
308	Returns the value of P(r) for a given r, and base function
309	coefficients, with error.
310
311	@param c: base function coefficients
312	@param c_cov: covariance matrice of the base function coefficients
313	@param r: r-value to evaluate P(r) at
314	@return: P(r)
315	"""
316	return self.get_pr_err(c, c_cov, r)
317
318	def _accept_q(self, q):
319	"""
320	Check q-value against user-defined range
321	"""
322	if not self.q_min==None and q<self.q_min:
323	return False
324	if not self.q_max==None and q>self.q_max:
325	return False
326	return True
327
328	def lstsq(self, nfunc=5, nr=20):
329	"""
330	The problem is solved by posing the problem as Ax = b,
331	where x is the set of coefficients we are looking for.
332
333	Npts is the number of points.
334
335	In the following i refers to the ith base function coefficient.
336	The matrix has its entries j in its first Npts rows set to
337	A[i][j] = (Fourier transformed base function for point j)
338
339	We them choose a number of r-points, n_r, to evaluate the second
340	derivative of P(r) at. This is used as our regularization term.
341	For a vector r of length n_r, the following n_r rows are set to
342	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
343
344	The vector b has its first Npts entries set to
345	b[j] = (I(q) observed for point j)
346
347	The following n_r entries are set to zero.
348
349	The result is found by using scipy.linalg.basic.lstsq to invert
350	the matrix and find the coefficients x.
351
352	@param nfunc: number of base functions to use.
353	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
354
355	If the result does not allow us to compute the covariance matrix,
356	a matrix filled with zeros will be returned.
357
358	"""
359	# Note: To make sure an array is contiguous:
360	# blah = numpy.ascontiguousarray(blah_original)
361	# ... before passing it to C
362
363	if self.is_valid()<0:
364	raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
365
366	self.nfunc = nfunc
367	# a -- An M x N matrix.
368	# b -- An M x nrhs matrix or M vector.
369	npts = len(self.x)
370	nq = nr
371	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
372	if sqrt_alpha<0.0:
373	nq = 0
374
375	# If we need to fit the background, add a term
376	if self.has_bck==True:
377	nfunc_0 = nfunc
378	nfunc += 1
379
380	a = numpy.zeros([npts+nq, nfunc])
381	b = numpy.zeros(npts+nq)
382	err = numpy.zeros([nfunc, nfunc])
383
384	# Construct the a matrix and b vector that represent the problem
385	t_0 = time.time()
386	self._get_matrix(nfunc, nq, a, b)
387
388	# Perform the inversion (least square fit)
389	c, chi2, rank, n = lstsq(a, b)
390	# Sanity check
391	try:
392	float(chi2)
393	except:
394	chi2 = -1.0
395	self.chi2 = chi2
396
397	inv_cov = numpy.zeros([nfunc,nfunc])
398	# Get the covariance matrix, defined as inv_cov = a_transposed * a
399	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
400
401	# Compute the reg term size for the output
402	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
403
404	if math.fabs(self.alpha)>0:
405	new_alpha = sum_sig/(sum_reg/self.alpha)
406	else:
407	new_alpha = 0.0
408	self.suggested_alpha = new_alpha
409
410	try:
411	cov = numpy.linalg.pinv(inv_cov)
412	err = math.fabs(chi2/float(npts-nfunc)) * cov
413	except:
414	# We were not able to estimate the errors
415	# Return an empty error matrix
416	pass
417
418	# Keep a copy of the last output
419	if self.has_bck==False:
420	self.background = 0
421	self.out = c
422	self.cov = err
423	else:
424	self.background = c[0]
425
426	err_0 = numpy.zeros([nfunc, nfunc])
427	c_0 = numpy.zeros(nfunc)
428
429	for i in range(nfunc_0):
430	c_0[i] = c[i+1]
431	for j in range(nfunc_0):
432	err_0[i][j] = err[i+1][j+1]
433
434	self.out = c_0
435	self.cov = err_0
436
437	return self.out, self.cov
438
439	def estimate_numterms(self, isquit_func=None):
440	"""
441	Returns a reasonable guess for the
442	number of terms
443	@param isquit_func: reference to thread function to call to
444	check whether the computation needs to
445	be stopped.
446
447	@return: number of terms, alpha, message
448	"""
449	from num_term import Num_terms
450	estimator = Num_terms(self.clone())
451	try:
452	return estimator.num_terms(isquit_func)
453	except:
454	# If we fail, estimate alpha and return the default
455	# number of terms
456	best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
457	return self.nfunc, best_alpha, "Could not estimate number of terms"
458
459	def estimate_alpha(self, nfunc):
460	"""
461	Returns a reasonable guess for the
462	regularization constant alpha
463
464	@param nfunc: number of terms to use in the expansion.
465	@return: alpha, message, elapsed
466
467	where alpha is the estimate for alpha,
468	message is a message for the user,
469	elapsed is the computation time
470	"""
471	import time
472	try:
473	pr = self.clone()
474
475	# T_0 for computation time
476	starttime = time.time()
477	elapsed = 0
478
479	# If the current alpha is zero, try
480	# another value
481	if pr.alpha<=0:
482	pr.alpha = 0.0001
483
484	# Perform inversion to find the largest alpha
485	out, cov = pr.invert(nfunc)
486	elapsed = time.time()-starttime
487	initial_alpha = pr.alpha
488	initial_peaks = pr.get_peaks(out)
489
490	# Try the inversion with the estimated alpha
491	pr.alpha = pr.suggested_alpha
492	out, cov = pr.invert(nfunc)
493
494	npeaks = pr.get_peaks(out)
495	# if more than one peak to start with
496	# just return the estimate
497	if npeaks>1:
498	#message = "Your P(r) is not smooth, please check your inversion parameters"
499	message = None
500	return pr.suggested_alpha, message, elapsed
501	else:
502
503	# Look at smaller values
504	# We assume that for the suggested alpha, we have 1 peak
505	# if not, send a message to change parameters
506	alpha = pr.suggested_alpha
507	best_alpha = pr.suggested_alpha
508	found = False
509	for i in range(10):
510	pr.alpha = (0.33)*(i+1)alpha
511	out, cov = pr.invert(nfunc)
512
513	peaks = pr.get_peaks(out)
514	if peaks>1:
515	found = True
516	break
517	best_alpha = pr.alpha
518
519	# If we didn't find a turning point for alpha and
520	# the initial alpha already had only one peak,
521	# just return that
522	if not found and initial_peaks==1 and initial_alpha<best_alpha:
523	best_alpha = initial_alpha
524
525	# Check whether the size makes sense
526	message=''
527
528	if not found:
529	message = "None"
530	elif best_alpha>=0.5*pr.suggested_alpha:
531	# best alpha is too big, return a
532	# reasonable value
533	message = "The estimated alpha for your system is too large. "
534	message += "Try increasing your maximum distance."
535
536	return best_alpha, message, elapsed
537
538	except:
539	message = "Invertor.estimate_alpha: %s" % sys.exc_value
540	return 0, message, elapsed
541
542
543	def to_file(self, path, npts=100):
544	"""
545	Save the state to a file that will be readable
546	by SliceView.
547	@param path: path of the file to write
548	@param npts: number of P(r) points to be written
549	"""
550	import pylab
551
552	file = open(path, 'w')
553	file.write("#d_max=%g\n" % self.d_max)
554	file.write("#nfunc=%g\n" % self.nfunc)
555	file.write("#alpha=%g\n" % self.alpha)
556	file.write("#chi2=%g\n" % self.chi2)
557	file.write("#elapsed=%g\n" % self.elapsed)
558	file.write("#qmin=%s\n" % str(self.q_min))
559	file.write("#qmax=%s\n" % str(self.q_max))
560	file.write("#slit_height=%g\n" % self.slit_height)
561	file.write("#slit_width=%g\n" % self.slit_width)
562	file.write("#background=%g\n" % self.background)
563	if self.has_bck==True:
564	file.write("#has_bck=1\n")
565	else:
566	file.write("#has_bck=0\n")
567	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
568	if not self.out==None:
569	if len(self.out)==len(self.cov):
570	for i in range(len(self.out)):
571	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
572	file.write("<r> <Pr> <dPr>\n")
573	r = pylab.arange(0.0, self.d_max, self.d_max/npts)
574
575	for r_i in r:
576	(value, err) = self.pr_err(self.out, self.cov, r_i)
577	file.write("%g %g %g\n" % (r_i, value, err))
578
579	file.close()
580
581
582	def from_file(self, path):
583	"""
584	Load the state of the Invertor from a file,
585	to be able to generate P(r) from a set of
586	parameters.
587	@param path: path of the file to load
588	"""
589	import os
590	import re
591	if os.path.isfile(path):
592	try:
593	fd = open(path, 'r')
594
595	buff = fd.read()
596	lines = buff.split('\n')
597	for line in lines:
598	if line.startswith('#d_max='):
599	toks = line.split('=')
600	self.d_max = float(toks[1])
601	elif line.startswith('#nfunc='):
602	toks = line.split('=')
603	self.nfunc = int(toks[1])
604	self.out = numpy.zeros(self.nfunc)
605	self.cov = numpy.zeros([self.nfunc, self.nfunc])
606	elif line.startswith('#alpha='):
607	toks = line.split('=')
608	self.alpha = float(toks[1])
609	elif line.startswith('#chi2='):
610	toks = line.split('=')
611	self.chi2 = float(toks[1])
612	elif line.startswith('#elapsed='):
613	toks = line.split('=')
614	self.elapsed = float(toks[1])
615	elif line.startswith('#alpha_estimate='):
616	toks = line.split('=')
617	self.suggested_alpha = float(toks[1])
618	elif line.startswith('#qmin='):
619	toks = line.split('=')
620	try:
621	self.q_min = float(toks[1])
622	except:
623	self.q_min = None
624	elif line.startswith('#qmax='):
625	toks = line.split('=')
626	try:
627	self.q_max = float(toks[1])
628	except:
629	self.q_max = None
630	elif line.startswith('#slit_height='):
631	toks = line.split('=')
632	self.slit_height = float(toks[1])
633	elif line.startswith('#slit_width='):
634	toks = line.split('=')
635	self.slit_width = float(toks[1])
636	elif line.startswith('#background='):
637	toks = line.split('=')
638	self.background = float(toks[1])
639	elif line.startswith('#has_bck='):
640	toks = line.split('=')
641	if int(toks[1])==1:
642	self.has_bck=True
643	else:
644	self.has_bck=False
645
646	# Now read in the parameters
647	elif line.startswith('#C_'):
648	toks = line.split('=')
649	p = re.compile('#C_([0-9]+)')
650	m = p.search(toks[0])
651	toks2 = toks[1].split('+-')
652	i = int(m.group(1))
653	self.out[i] = float(toks2[0])
654
655	self.cov[i][i] = float(toks2[1])
656
657	except:
658	raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
659	else:
660	raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path)
661
662
663
664
665	if __name__ == "__main__":
666	o = Invertor()
667
668
669
670
671

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/pr_inversion/invertor.py @ 3b69ca6

Download in other formats: