invertor.py @ 792db7d5

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since 792db7d5 was 7578961, checked in by Mathieu Doucet <doucetm@…>, 16 years ago
Small improvements
Property mode set to `100644`
File size: 25.7 KB

Line
1	"""
2	Module to perform P(r) inversion.
3	The module contains the Invertor class.
4	"""
5	from sans.pr.core.pr_inversion import Cinvertor
6	import numpy
7	import sys
8	import math, time
9	from scipy.linalg.basic import lstsq
10
11	def help():
12	"""
13	Provide general online help text
14	Future work: extend this function to allow topic selection
15	"""
16	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
17	info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
18	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each base functions "
19	info_txt += "in the expansion is found by performing a least square fit with the "
20	info_txt += "following fit function:\n\n"
21	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 + Reg_term\n\n"
22	info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
23	info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
24	info_txt += "The Reg_term term is a regularization term set to the second derivative "
25	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
26	info_txt += "The following are user inputs:\n\n"
27	info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n"
28	info_txt += " - Regularization constant: a multiplicative constant to set the size of "
29	info_txt += "the regularization term.\n\n"
30	info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n"
31
32	return info_txt
33
34
35	class Invertor(Cinvertor):
36	"""
37	Invertor class to perform P(r) inversion
38
39	The problem is solved by posing the problem as Ax = b,
40	where x is the set of coefficients we are looking for.
41
42	Npts is the number of points.
43
44	In the following i refers to the ith base function coefficient.
45	The matrix has its entries j in its first Npts rows set to
46	A[j][i] = (Fourier transformed base function for point j)
47
48	We them choose a number of r-points, n_r, to evaluate the second
49	derivative of P(r) at. This is used as our regularization term.
50	For a vector r of length n_r, the following n_r rows are set to
51	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
52
53	The vector b has its first Npts entries set to
54	b[j] = (I(q) observed for point j)
55
56	The following n_r entries are set to zero.
57
58	The result is found by using scipy.linalg.basic.lstsq to invert
59	the matrix and find the coefficients x.
60
61	Methods inherited from Cinvertor:
62	- get_peaks(pars): returns the number of P(r) peaks
63	- oscillations(pars): returns the oscillation parameters for the output P(r)
64	- get_positive(pars): returns the fraction of P(r) that is above zero
65	- get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
66	"""
67	#TODO: Allow for slit smearing. Smear each base function once before filling
68	# the A matrix.
69
70	## Chisqr of the last computation
71	chi2 = 0
72	## Time elapsed for last computation
73	elapsed = 0
74	## Alpha to get the reg term the same size as the signal
75	suggested_alpha = 0
76	## Last number of base functions used
77	nfunc = 10
78	## Last output values
79	out = None
80	## Last errors on output values
81	cov = None
82	## Flag to allow I(q) data with constant background
83	#has_bck = False
84	## Background value
85	background = 0
86
87
88	def __init__(self):
89	Cinvertor.__init__(self)
90
91	def __setattr__(self, name, value):
92	"""
93	Set the value of an attribute.
94	Access the parent class methods for
95	x, y, err, d_max, q_min, q_max and alpha
96	"""
97	if name=='x':
98	if 0.0 in value:
99	raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
100	return self.set_x(value)
101	elif name=='y':
102	return self.set_y(value)
103	elif name=='err':
104	value2 = abs(value)
105	return self.set_err(value2)
106	elif name=='d_max':
107	return self.set_dmax(value)
108	elif name=='q_min':
109	if value==None:
110	return self.set_qmin(-1.0)
111	return self.set_qmin(value)
112	elif name=='q_max':
113	if value==None:
114	return self.set_qmax(-1.0)
115	return self.set_qmax(value)
116	elif name=='alpha':
117	return self.set_alpha(value)
118	elif name=='slit_height':
119	return self.set_slit_height(value)
120	elif name=='slit_width':
121	return self.set_slit_width(value)
122	elif name=='has_bck':
123	if value==True:
124	return self.set_has_bck(1)
125	elif value==False:
126	return self.set_has_bck(0)
127	else:
128	raise ValueError, "Invertor: has_bck can only be True or False"
129
130	return Cinvertor.__setattr__(self, name, value)
131
132	def __getattr__(self, name):
133	"""
134	Return the value of an attribute
135	For the moment x, y, err and d_max are write-only
136	TODO: change that!
137	"""
138	import numpy
139	if name=='x':
140	out = numpy.ones(self.get_nx())
141	self.get_x(out)
142	return out
143	elif name=='y':
144	out = numpy.ones(self.get_ny())
145	self.get_y(out)
146	return out
147	elif name=='err':
148	out = numpy.ones(self.get_nerr())
149	self.get_err(out)
150	return out
151	elif name=='d_max':
152	return self.get_dmax()
153	elif name=='q_min':
154	qmin = self.get_qmin()
155	if qmin<0:
156	return None
157	return qmin
158	elif name=='q_max':
159	qmax = self.get_qmax()
160	if qmax<0:
161	return None
162	return qmax
163	elif name=='alpha':
164	return self.get_alpha()
165	elif name=='slit_height':
166	return self.get_slit_height()
167	elif name=='slit_width':
168	return self.get_slit_width()
169	elif name=='has_bck':
170	value = self.get_has_bck()
171	if value==1:
172	return True
173	else:
174	return False
175	elif name in self.__dict__:
176	return self.__dict__[name]
177	return None
178
179	def clone(self):
180	"""
181	Return a clone of this instance
182	"""
183	invertor = Invertor()
184	invertor.chi2 = self.chi2
185	invertor.elapsed = self.elapsed
186	invertor.alpha = self.alpha
187	invertor.d_max = self.d_max
188	invertor.q_min = self.q_min
189	invertor.q_max = self.q_max
190
191	invertor.x = self.x
192	invertor.y = self.y
193	invertor.err = self.err
194	invertor.has_bck = self.has_bck
195	invertor.slit_height = self.slit_height
196	invertor.slit_width = self.slit_width
197
198	return invertor
199
200	def invert(self, nfunc=10, nr=20):
201	"""
202	Perform inversion to P(r)
203
204	The problem is solved by posing the problem as Ax = b,
205	where x is the set of coefficients we are looking for.
206
207	Npts is the number of points.
208
209	In the following i refers to the ith base function coefficient.
210	The matrix has its entries j in its first Npts rows set to
211	A[i][j] = (Fourier transformed base function for point j)
212
213	We them choose a number of r-points, n_r, to evaluate the second
214	derivative of P(r) at. This is used as our regularization term.
215	For a vector r of length n_r, the following n_r rows are set to
216	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
217
218	The vector b has its first Npts entries set to
219	b[j] = (I(q) observed for point j)
220
221	The following n_r entries are set to zero.
222
223	The result is found by using scipy.linalg.basic.lstsq to invert
224	the matrix and find the coefficients x.
225
226	@param nfunc: number of base functions to use.
227	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
228	@return: c_out, c_cov - the coefficients with covariance matrix
229	"""
230	# Reset the background value before proceeding
231	self.background = 0.0
232	return self.lstsq(nfunc, nr=nr)
233
234	def iq(self, out, q):
235	"""
236	Function to call to evaluate the scattering intensity
237	@param args: c-parameters, and q
238	@return: I(q)
239	"""
240	return Cinvertor.iq(self, out, q)+self.background
241
242	def invert_optimize(self, nfunc=10, nr=20):
243	"""
244	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
245
246	This probably produce more reliable results, but is much slower.
247	The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
248	where the reg_term is given by Svergun: it is the integral of the square of the first derivative
249	of P(r), d(P(r))/dr, integrated over the full range of r.
250
251	@param nfunc: number of base functions to use.
252	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
253	@return: c_out, c_cov - the coefficients with covariance matrix
254	"""
255
256	from scipy import optimize
257	import time
258
259	self.nfunc = nfunc
260	# First, check that the current data is valid
261	if self.is_valid()<=0:
262	raise RuntimeError, "Invertor.invert: Data array are of different length"
263
264	p = numpy.ones(nfunc)
265	t_0 = time.time()
266	out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
267
268	# Compute chi^2
269	res = self.residuals(out)
270	chisqr = 0
271	for i in range(len(res)):
272	chisqr += res[i]
273
274	self.chi2 = chisqr
275
276	# Store computation time
277	self.elapsed = time.time() - t_0
278
279	return out, cov_x
280
281	def pr_fit(self, nfunc=5):
282	"""
283	This is a direct fit to a given P(r). It assumes that the y data
284	is set to some P(r) distribution that we are trying to reproduce
285	with a set of base functions.
286
287	This method is provided as a test.
288	"""
289	from scipy import optimize
290
291	# First, check that the current data is valid
292	if self.is_valid()<=0:
293	raise RuntimeError, "Invertor.invert: Data arrays are of different length"
294
295	p = numpy.ones(nfunc)
296	t_0 = time.time()
297	out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
298
299	# Compute chi^2
300	res = self.pr_residuals(out)
301	chisqr = 0
302	for i in range(len(res)):
303	chisqr += res[i]
304
305	self.chisqr = chisqr
306
307	# Store computation time
308	self.elapsed = time.time() - t_0
309
310	return out, cov_x
311
312	def pr_err(self, c, c_cov, r):
313	"""
314	Returns the value of P(r) for a given r, and base function
315	coefficients, with error.
316
317	@param c: base function coefficients
318	@param c_cov: covariance matrice of the base function coefficients
319	@param r: r-value to evaluate P(r) at
320	@return: P(r)
321	"""
322	return self.get_pr_err(c, c_cov, r)
323
324	def _accept_q(self, q):
325	"""
326	Check q-value against user-defined range
327	"""
328	if not self.q_min==None and q<self.q_min:
329	return False
330	if not self.q_max==None and q>self.q_max:
331	return False
332	return True
333
334	def lstsq(self, nfunc=5, nr=20):
335	"""
336	The problem is solved by posing the problem as Ax = b,
337	where x is the set of coefficients we are looking for.
338
339	Npts is the number of points.
340
341	In the following i refers to the ith base function coefficient.
342	The matrix has its entries j in its first Npts rows set to
343	A[i][j] = (Fourier transformed base function for point j)
344
345	We them choose a number of r-points, n_r, to evaluate the second
346	derivative of P(r) at. This is used as our regularization term.
347	For a vector r of length n_r, the following n_r rows are set to
348	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
349
350	The vector b has its first Npts entries set to
351	b[j] = (I(q) observed for point j)
352
353	The following n_r entries are set to zero.
354
355	The result is found by using scipy.linalg.basic.lstsq to invert
356	the matrix and find the coefficients x.
357
358	@param nfunc: number of base functions to use.
359	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
360
361	If the result does not allow us to compute the covariance matrix,
362	a matrix filled with zeros will be returned.
363
364	"""
365	# Note: To make sure an array is contiguous:
366	# blah = numpy.ascontiguousarray(blah_original)
367	# ... before passing it to C
368
369	if self.is_valid()<0:
370	raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
371
372	self.nfunc = nfunc
373	# a -- An M x N matrix.
374	# b -- An M x nrhs matrix or M vector.
375	npts = len(self.x)
376	nq = nr
377	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
378	if sqrt_alpha<0.0:
379	nq = 0
380
381	# If we need to fit the background, add a term
382	if self.has_bck==True:
383	nfunc_0 = nfunc
384	nfunc += 1
385
386	a = numpy.zeros([npts+nq, nfunc])
387	b = numpy.zeros(npts+nq)
388	err = numpy.zeros([nfunc, nfunc])
389
390	# Construct the a matrix and b vector that represent the problem
391	t_0 = time.time()
392	self._get_matrix(nfunc, nq, a, b)
393
394	# Perform the inversion (least square fit)
395	c, chi2, rank, n = lstsq(a, b)
396	# Sanity check
397	try:
398	float(chi2)
399	except:
400	chi2 = -1.0
401	self.chi2 = chi2
402
403	inv_cov = numpy.zeros([nfunc,nfunc])
404	# Get the covariance matrix, defined as inv_cov = a_transposed * a
405	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
406
407	# Compute the reg term size for the output
408	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
409
410	if math.fabs(self.alpha)>0:
411	new_alpha = sum_sig/(sum_reg/self.alpha)
412	else:
413	new_alpha = 0.0
414	self.suggested_alpha = new_alpha
415
416	try:
417	cov = numpy.linalg.pinv(inv_cov)
418	err = math.fabs(chi2/float(npts-nfunc)) * cov
419	except:
420	# We were not able to estimate the errors
421	# Return an empty error matrix
422	pass
423
424	# Keep a copy of the last output
425	if self.has_bck==False:
426	self.background = 0
427	self.out = c
428	self.cov = err
429	else:
430	self.background = c[0]
431
432	err_0 = numpy.zeros([nfunc, nfunc])
433	c_0 = numpy.zeros(nfunc)
434
435	for i in range(nfunc_0):
436	c_0[i] = c[i+1]
437	for j in range(nfunc_0):
438	err_0[i][j] = err[i+1][j+1]
439
440	self.out = c_0
441	self.cov = err_0
442
443	return self.out, self.cov
444
445	def estimate_numterms(self, isquit_func=None):
446	"""
447	Returns a reasonable guess for the
448	number of terms
449	@param isquit_func: reference to thread function to call to
450	check whether the computation needs to
451	be stopped.
452
453	@return: number of terms, alpha, message
454	"""
455	from num_term import Num_terms
456	estimator = Num_terms(self.clone())
457	try:
458	return estimator.num_terms(isquit_func)
459	except:
460	# If we fail, estimate alpha and return the default
461	# number of terms
462	best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
463	return self.nfunc, best_alpha, "Could not estimate number of terms"
464
465	def estimate_alpha(self, nfunc):
466	"""
467	Returns a reasonable guess for the
468	regularization constant alpha
469
470	@param nfunc: number of terms to use in the expansion.
471	@return: alpha, message, elapsed
472
473	where alpha is the estimate for alpha,
474	message is a message for the user,
475	elapsed is the computation time
476	"""
477	import time
478	try:
479	pr = self.clone()
480
481	# T_0 for computation time
482	starttime = time.time()
483	elapsed = 0
484
485	# If the current alpha is zero, try
486	# another value
487	if pr.alpha<=0:
488	pr.alpha = 0.0001
489
490	# Perform inversion to find the largest alpha
491	out, cov = pr.invert(nfunc)
492	elapsed = time.time()-starttime
493	initial_alpha = pr.alpha
494	initial_peaks = pr.get_peaks(out)
495
496	# Try the inversion with the estimated alpha
497	pr.alpha = pr.suggested_alpha
498	out, cov = pr.invert(nfunc)
499
500	npeaks = pr.get_peaks(out)
501	# if more than one peak to start with
502	# just return the estimate
503	if npeaks>1:
504	#message = "Your P(r) is not smooth, please check your inversion parameters"
505	message = None
506	return pr.suggested_alpha, message, elapsed
507	else:
508
509	# Look at smaller values
510	# We assume that for the suggested alpha, we have 1 peak
511	# if not, send a message to change parameters
512	alpha = pr.suggested_alpha
513	best_alpha = pr.suggested_alpha
514	found = False
515	for i in range(10):
516	pr.alpha = (0.33)*(i+1)alpha
517	out, cov = pr.invert(nfunc)
518
519	peaks = pr.get_peaks(out)
520	if peaks>1:
521	found = True
522	break
523	best_alpha = pr.alpha
524
525	# If we didn't find a turning point for alpha and
526	# the initial alpha already had only one peak,
527	# just return that
528	if not found and initial_peaks==1 and initial_alpha<best_alpha:
529	best_alpha = initial_alpha
530
531	# Check whether the size makes sense
532	message=''
533
534	if not found:
535	message = "None"
536	elif best_alpha>=0.5*pr.suggested_alpha:
537	# best alpha is too big, return a
538	# reasonable value
539	message = "The estimated alpha for your system is too large. "
540	message += "Try increasing your maximum distance."
541
542	return best_alpha, message, elapsed
543
544	except:
545	message = "Invertor.estimate_alpha: %s" % sys.exc_value
546	return 0, message, elapsed
547
548
549	def to_file(self, path, npts=100):
550	"""
551	Save the state to a file that will be readable
552	by SliceView.
553	@param path: path of the file to write
554	@param npts: number of P(r) points to be written
555	"""
556	import pylab
557
558	file = open(path, 'w')
559	file.write("#d_max=%g\n" % self.d_max)
560	file.write("#nfunc=%g\n" % self.nfunc)
561	file.write("#alpha=%g\n" % self.alpha)
562	file.write("#chi2=%g\n" % self.chi2)
563	file.write("#elapsed=%g\n" % self.elapsed)
564	file.write("#qmin=%s\n" % str(self.q_min))
565	file.write("#qmax=%s\n" % str(self.q_max))
566	file.write("#slit_height=%g\n" % self.slit_height)
567	file.write("#slit_width=%g\n" % self.slit_width)
568	file.write("#background=%g\n" % self.background)
569	if self.has_bck==True:
570	file.write("#has_bck=1\n")
571	else:
572	file.write("#has_bck=0\n")
573	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
574	if not self.out==None:
575	if len(self.out)==len(self.cov):
576	for i in range(len(self.out)):
577	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
578	file.write("<r> <Pr> <dPr>\n")
579	r = pylab.arange(0.0, self.d_max, self.d_max/npts)
580
581	for r_i in r:
582	(value, err) = self.pr_err(self.out, self.cov, r_i)
583	file.write("%g %g %g\n" % (r_i, value, err))
584
585	file.close()
586
587
588	def from_file(self, path):
589	"""
590	Load the state of the Invertor from a file,
591	to be able to generate P(r) from a set of
592	parameters.
593	@param path: path of the file to load
594	"""
595	import os
596	import re
597	if os.path.isfile(path):
598	try:
599	fd = open(path, 'r')
600
601	buff = fd.read()
602	lines = buff.split('\n')
603	for line in lines:
604	if line.startswith('#d_max='):
605	toks = line.split('=')
606	self.d_max = float(toks[1])
607	elif line.startswith('#nfunc='):
608	toks = line.split('=')
609	self.nfunc = int(toks[1])
610	self.out = numpy.zeros(self.nfunc)
611	self.cov = numpy.zeros([self.nfunc, self.nfunc])
612	elif line.startswith('#alpha='):
613	toks = line.split('=')
614	self.alpha = float(toks[1])
615	elif line.startswith('#chi2='):
616	toks = line.split('=')
617	self.chi2 = float(toks[1])
618	elif line.startswith('#elapsed='):
619	toks = line.split('=')
620	self.elapsed = float(toks[1])
621	elif line.startswith('#alpha_estimate='):
622	toks = line.split('=')
623	self.suggested_alpha = float(toks[1])
624	elif line.startswith('#qmin='):
625	toks = line.split('=')
626	try:
627	self.q_min = float(toks[1])
628	except:
629	self.q_min = None
630	elif line.startswith('#qmax='):
631	toks = line.split('=')
632	try:
633	self.q_max = float(toks[1])
634	except:
635	self.q_max = None
636	elif line.startswith('#slit_height='):
637	toks = line.split('=')
638	self.slit_height = float(toks[1])
639	elif line.startswith('#slit_width='):
640	toks = line.split('=')
641	self.slit_width = float(toks[1])
642	elif line.startswith('#background='):
643	toks = line.split('=')
644	self.background = float(toks[1])
645	elif line.startswith('#has_bck='):
646	toks = line.split('=')
647	if int(toks[1])==1:
648	self.has_bck=True
649	else:
650	self.has_bck=False
651
652	# Now read in the parameters
653	elif line.startswith('#C_'):
654	toks = line.split('=')
655	p = re.compile('#C_([0-9]+)')
656	m = p.search(toks[0])
657	toks2 = toks[1].split('+-')
658	i = int(m.group(1))
659	self.out[i] = float(toks2[0])
660
661	self.cov[i][i] = float(toks2[1])
662
663	except:
664	raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
665	else:
666	raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path)
667
668
669
670
671	if __name__ == "__main__":
672	o = Invertor()
673
674
675
676
677

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/pr_inversion/invertor.py @ 792db7d5

Download in other formats: