invertor.py @ 9e85792

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since 9e85792 was f168d02, checked in by Mathieu Doucet <doucetm@…>, 16 years ago
Added slit smearing (still slow)
Property mode set to `100644`
File size: 26.4 KB

Line
1	"""
2	Module to perform P(r) inversion.
3	The module contains the Invertor class.
4	"""
5	from sans.pr.core.pr_inversion import Cinvertor
6	import numpy
7	import sys
8
9	def help():
10	"""
11	Provide general online help text
12	Future work: extend this function to allow topic selection
13	"""
14	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15	info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each base functions "
17	info_txt += "in the expansion is found by performing a least square fit with the "
18	info_txt += "following fit function:\n\n"
19	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 + Reg_term\n\n"
20	info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21	info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22	info_txt += "The Reg_term term is a regularization term set to the second derivative "
23	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24	info_txt += "The following are user inputs:\n\n"
25	info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26	info_txt += " - Regularization constant: a multiplicative constant to set the size of "
27	info_txt += "the regularization term.\n\n"
28	info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n"
29
30	return info_txt
31
32
33	class Invertor(Cinvertor):
34	"""
35	Invertor class to perform P(r) inversion
36
37	The problem is solved by posing the problem as Ax = b,
38	where x is the set of coefficients we are looking for.
39
40	Npts is the number of points.
41
42	In the following i refers to the ith base function coefficient.
43	The matrix has its entries j in its first Npts rows set to
44	A[j][i] = (Fourier transformed base function for point j)
45
46	We them choose a number of r-points, n_r, to evaluate the second
47	derivative of P(r) at. This is used as our regularization term.
48	For a vector r of length n_r, the following n_r rows are set to
49	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
50
51	The vector b has its first Npts entries set to
52	b[j] = (I(q) observed for point j)
53
54	The following n_r entries are set to zero.
55
56	The result is found by using scipy.linalg.basic.lstsq to invert
57	the matrix and find the coefficients x.
58
59	Methods inherited from Cinvertor:
60	- get_peaks(pars): returns the number of P(r) peaks
61	- oscillations(pars): returns the oscillation parameters for the output P(r)
62	- get_positive(pars): returns the fraction of P(r) that is above zero
63	- get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64	"""
65	#TODO: Allow for slit smearing. Smear each base function once before filling
66	# the A matrix.
67
68	## Chisqr of the last computation
69	chi2 = 0
70	## Time elapsed for last computation
71	elapsed = 0
72	## Alpha to get the reg term the same size as the signal
73	suggested_alpha = 0
74	## Last number of base functions used
75	nfunc = 10
76	## Last output values
77	out = None
78	## Last errors on output values
79	cov = None
80	## Flag to allow I(q) data with constant background
81	#has_bck = False
82	## Background value
83	background = 0
84
85
86	def __init__(self):
87	Cinvertor.__init__(self)
88
89	def __setattr__(self, name, value):
90	"""
91	Set the value of an attribute.
92	Access the parent class methods for
93	x, y, err, d_max, q_min, q_max and alpha
94	"""
95	if name=='x':
96	if 0.0 in value:
97	raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
98	return self.set_x(value)
99	elif name=='y':
100	return self.set_y(value)
101	elif name=='err':
102	value2 = abs(value)
103	return self.set_err(value2)
104	elif name=='d_max':
105	return self.set_dmax(value)
106	elif name=='q_min':
107	if value==None:
108	return self.set_qmin(-1.0)
109	return self.set_qmin(value)
110	elif name=='q_max':
111	if value==None:
112	return self.set_qmax(-1.0)
113	return self.set_qmax(value)
114	elif name=='alpha':
115	return self.set_alpha(value)
116	elif name=='slit_height':
117	return self.set_slit_height(value)
118	elif name=='slit_width':
119	return self.set_slit_width(value)
120	elif name=='has_bck':
121	if value==True:
122	return self.set_has_bck(1)
123	elif value==False:
124	return self.set_has_bck(0)
125	else:
126	raise ValueError, "Invertor: has_bck can only be True or False"
127
128	return Cinvertor.__setattr__(self, name, value)
129
130	def __getattr__(self, name):
131	"""
132	Return the value of an attribute
133	For the moment x, y, err and d_max are write-only
134	TODO: change that!
135	"""
136	import numpy
137	if name=='x':
138	out = numpy.ones(self.get_nx())
139	self.get_x(out)
140	return out
141	elif name=='y':
142	out = numpy.ones(self.get_ny())
143	self.get_y(out)
144	return out
145	elif name=='err':
146	out = numpy.ones(self.get_nerr())
147	self.get_err(out)
148	return out
149	elif name=='d_max':
150	return self.get_dmax()
151	elif name=='q_min':
152	qmin = self.get_qmin()
153	if qmin<0:
154	return None
155	return qmin
156	elif name=='q_max':
157	qmax = self.get_qmax()
158	if qmax<0:
159	return None
160	return qmax
161	elif name=='alpha':
162	return self.get_alpha()
163	elif name=='slit_height':
164	return self.get_slit_height()
165	elif name=='slit_width':
166	return self.get_slit_width()
167	elif name=='has_bck':
168	value = self.get_has_bck()
169	if value==1:
170	return True
171	else:
172	return False
173	elif name in self.__dict__:
174	return self.__dict__[name]
175	return None
176
177	def clone(self):
178	"""
179	Return a clone of this instance
180	"""
181	invertor = Invertor()
182	invertor.chi2 = self.chi2
183	invertor.elapsed = self.elapsed
184	invertor.alpha = self.alpha
185	invertor.d_max = self.d_max
186	invertor.q_min = self.q_min
187	invertor.q_max = self.q_max
188
189	invertor.x = self.x
190	invertor.y = self.y
191	invertor.err = self.err
192	invertor.has_bck = self.has_bck
193	invertor.slit_height = self.slit_height
194	invertor.slit_width = self.slit_width
195
196	return invertor
197
198	def invert(self, nfunc=10, nr=20):
199	"""
200	Perform inversion to P(r)
201
202	The problem is solved by posing the problem as Ax = b,
203	where x is the set of coefficients we are looking for.
204
205	Npts is the number of points.
206
207	In the following i refers to the ith base function coefficient.
208	The matrix has its entries j in its first Npts rows set to
209	A[i][j] = (Fourier transformed base function for point j)
210
211	We them choose a number of r-points, n_r, to evaluate the second
212	derivative of P(r) at. This is used as our regularization term.
213	For a vector r of length n_r, the following n_r rows are set to
214	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
215
216	The vector b has its first Npts entries set to
217	b[j] = (I(q) observed for point j)
218
219	The following n_r entries are set to zero.
220
221	The result is found by using scipy.linalg.basic.lstsq to invert
222	the matrix and find the coefficients x.
223
224	@param nfunc: number of base functions to use.
225	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
226	@return: c_out, c_cov - the coefficients with covariance matrix
227	"""
228	# Reset the background value before proceeding
229	self.background = 0.0
230	return self.lstsq(nfunc, nr=nr)
231
232	def iq(self, out, q):
233	"""
234	Function to call to evaluate the scattering intensity
235	@param args: c-parameters, and q
236	@return: I(q)
237	"""
238	return Cinvertor.iq(self, out, q)+self.background
239
240	def invert_optimize(self, nfunc=10, nr=20):
241	"""
242	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
243
244	This probably produce more reliable results, but is much slower.
245	The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
246	where the reg_term is given by Svergun: it is the integral of the square of the first derivative
247	of P(r), d(P(r))/dr, integrated over the full range of r.
248
249	@param nfunc: number of base functions to use.
250	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
251	@return: c_out, c_cov - the coefficients with covariance matrix
252	"""
253
254	from scipy import optimize
255	import time
256
257	self.nfunc = nfunc
258	# First, check that the current data is valid
259	if self.is_valid()<=0:
260	raise RuntimeError, "Invertor.invert: Data array are of different length"
261
262	p = numpy.ones(nfunc)
263	t_0 = time.time()
264	out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
265
266	# Compute chi^2
267	res = self.residuals(out)
268	chisqr = 0
269	for i in range(len(res)):
270	chisqr += res[i]
271
272	self.chi2 = chisqr
273
274	# Store computation time
275	self.elapsed = time.time() - t_0
276
277	return out, cov_x
278
279	def pr_fit(self, nfunc=5):
280	"""
281	This is a direct fit to a given P(r). It assumes that the y data
282	is set to some P(r) distribution that we are trying to reproduce
283	with a set of base functions.
284
285	This method is provided as a test.
286	"""
287	from scipy import optimize
288
289	# First, check that the current data is valid
290	if self.is_valid()<=0:
291	raise RuntimeError, "Invertor.invert: Data arrays are of different length"
292
293	p = numpy.ones(nfunc)
294	t_0 = time.time()
295	out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
296
297	# Compute chi^2
298	res = self.pr_residuals(out)
299	chisqr = 0
300	for i in range(len(res)):
301	chisqr += res[i]
302
303	self.chisqr = chisqr
304
305	# Store computation time
306	self.elapsed = time.time() - t_0
307
308	return out, cov_x
309
310	def pr_err(self, c, c_cov, r):
311	"""
312	Returns the value of P(r) for a given r, and base function
313	coefficients, with error.
314
315	@param c: base function coefficients
316	@param c_cov: covariance matrice of the base function coefficients
317	@param r: r-value to evaluate P(r) at
318	@return: P(r)
319	"""
320	return self.get_pr_err(c, c_cov, r)
321
322	def _accept_q(self, q):
323	"""
324	Check q-value against user-defined range
325	"""
326	if not self.q_min==None and q<self.q_min:
327	return False
328	if not self.q_max==None and q>self.q_max:
329	return False
330	return True
331
332	def lstsq(self, nfunc=5, nr=20):
333	#TODO: do this on the C side
334	#
335	# To make sure an array is contiguous:
336	# blah = numpy.ascontiguousarray(blah_original)
337	# ... before passing it to C
338	"""
339	The problem is solved by posing the problem as Ax = b,
340	where x is the set of coefficients we are looking for.
341
342	Npts is the number of points.
343
344	In the following i refers to the ith base function coefficient.
345	The matrix has its entries j in its first Npts rows set to
346	A[i][j] = (Fourier transformed base function for point j)
347
348	We them choose a number of r-points, n_r, to evaluate the second
349	derivative of P(r) at. This is used as our regularization term.
350	For a vector r of length n_r, the following n_r rows are set to
351	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
352
353	The vector b has its first Npts entries set to
354	b[j] = (I(q) observed for point j)
355
356	The following n_r entries are set to zero.
357
358	The result is found by using scipy.linalg.basic.lstsq to invert
359	the matrix and find the coefficients x.
360
361	@param nfunc: number of base functions to use.
362	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
363
364	If the result does not allow us to compute the covariance matrix,
365	a matrix filled with zeros will be returned.
366
367	"""
368	#TODO: Allow for background by starting at n=0 (since the base function
369	# is zero for n=0).
370	import math, time
371	from scipy.linalg.basic import lstsq
372
373	if self.is_valid()<0:
374	raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
375
376	self.nfunc = nfunc
377	# a -- An M x N matrix.
378	# b -- An M x nrhs matrix or M vector.
379	npts = len(self.x)
380	nq = nr
381	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
382	if sqrt_alpha<0.0:
383	nq = 0
384
385	# If we need to fit the background, add a term
386	if self.has_bck==True:
387	nfunc_0 = nfunc
388	nfunc += 1
389
390	a = numpy.zeros([npts+nq, nfunc])
391	b = numpy.zeros(npts+nq)
392	err = numpy.zeros([nfunc, nfunc])
393
394	# Construct the a matrix and b vector that represent the problem
395	t_0 = time.time()
396	self._get_matrix(nfunc, nq, a, b)
397	#print "elasped: ", time.time()-t_0
398
399	# Perform the inversion (least square fit)
400	c, chi2, rank, n = lstsq(a, b)
401	# Sanity check
402	try:
403	float(chi2)
404	except:
405	chi2 = -1.0
406	self.chi2 = chi2
407
408	inv_cov = numpy.zeros([nfunc,nfunc])
409	# Get the covariance matrix, defined as inv_cov = a_transposed * a
410	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
411
412	# Compute the reg term size for the output
413	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
414
415	if math.fabs(self.alpha)>0:
416	new_alpha = sum_sig/(sum_reg/self.alpha)
417	else:
418	new_alpha = 0.0
419	self.suggested_alpha = new_alpha
420
421	try:
422	cov = numpy.linalg.pinv(inv_cov)
423	err = math.fabs(chi2/float(npts-nfunc)) * cov
424	except:
425	# We were not able to estimate the errors,
426	# returns an empty covariance matrix
427	print "lstsq:", sys.exc_value
428	print chi2
429	pass
430
431	# Keep a copy of the last output
432	if self.has_bck==False:
433	self.background = 0
434	self.out = c
435	self.cov = err
436	else:
437	self.background = c[0]
438
439	err_0 = numpy.zeros([nfunc, nfunc])
440	c_0 = numpy.zeros(nfunc)
441
442	for i in range(nfunc_0):
443	c_0[i] = c[i+1]
444	for j in range(nfunc_0):
445	err_0[i][j] = err[i+1][j+1]
446
447	self.out = c_0
448	self.cov = err_0
449
450	return self.out, self.cov
451
452	def lstsq_bck(self, nfunc=5, nr=20):
453	#TODO: Allow for background by starting at n=0 (since the base function
454	# is zero for n=0).
455	import math
456	from scipy.linalg.basic import lstsq
457
458	if self.is_valid()<0:
459	raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
460
461	self.nfunc = nfunc
462	# a -- An M x N matrix.
463	# b -- An M x nrhs matrix or M vector.
464	npts = len(self.x)
465	nq = nr
466	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
467	if sqrt_alpha<0.0:
468	nq = 0
469
470	err_0 = numpy.zeros([nfunc, nfunc])
471	c_0 = numpy.zeros(nfunc)
472	nfunc_0 = nfunc
473	nfunc += 1
474
475	a = numpy.zeros([npts+nq, nfunc])
476	b = numpy.zeros(npts+nq)
477	err = numpy.zeros([nfunc, nfunc])
478
479	# Construct the a matrix and b vector that represent the problem
480	self._get_matrix(nfunc, nq, a, b)
481
482	c, chi2, rank, n = lstsq(a, b)
483	# Sanity check
484	try:
485	float(chi2)
486	except:
487	chi2 = -1.0
488	self.chi2 = chi2
489
490	inv_cov = numpy.zeros([nfunc,nfunc])
491	# Get the covariance matrix, defined as inv_cov = a_transposed * a
492	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
493
494	# Compute the reg term size for the output
495	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
496
497	if math.fabs(self.alpha)>0:
498	new_alpha = sum_sig/(sum_reg/self.alpha)
499	else:
500	new_alpha = 0.0
501	self.suggested_alpha = new_alpha
502
503	try:
504	cov = numpy.linalg.pinv(inv_cov)
505	err = math.fabs(chi2/float(npts-nfunc)) * cov
506	except:
507	# We were not able to estimate the errors,
508	# returns an empty covariance matrix
509	print "lstsq:", sys.exc_value
510	print chi2
511	pass
512
513	# Keep a copy of the last output
514
515	print "BACKGROUND =", c[0]
516	self.background = c[0]
517
518	for i in range(nfunc_0):
519	c_0[i] = c[i+1]
520	for j in range(nfunc_0):
521	err_0[i][j] = err[i+1][j+1]
522
523	self.out = c_0
524	self.cov = err_0
525
526	return c_0, err_0
527
528	def estimate_numterms(self, isquit_func=None):
529	"""
530	Returns a reasonable guess for the
531	number of terms
532
533	@return: number of terms, alpha, message
534	"""
535	from num_term import Num_terms
536	estimator = Num_terms(self.clone())
537	try:
538	return estimator.num_terms(isquit_func)
539	except:
540	# If we fail, estimate alpha and return the default
541	# number of terms
542	best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
543	return self.nfunc, best_alpha, "Could not estimate number of terms"
544
545	def estimate_alpha(self, nfunc):
546	"""
547	Returns a reasonable guess for the
548	regularization constant alpha
549
550	@return: alpha, message, elapsed
551
552	where alpha is the estimate for alpha,
553	message is a message for the user,
554	elapsed is the computation time
555	"""
556	import time
557	try:
558	pr = self.clone()
559
560	# T_0 for computation time
561	starttime = time.time()
562	elapsed = 0
563
564	# If the current alpha is zero, try
565	# another value
566	if pr.alpha<=0:
567	pr.alpha = 0.0001
568
569	# Perform inversion to find the largest alpha
570	out, cov = pr.invert(nfunc)
571	elapsed = time.time()-starttime
572	initial_alpha = pr.alpha
573	initial_peaks = pr.get_peaks(out)
574
575	# Try the inversion with the estimated alpha
576	pr.alpha = pr.suggested_alpha
577	out, cov = pr.invert(nfunc)
578
579	npeaks = pr.get_peaks(out)
580	# if more than one peak to start with
581	# just return the estimate
582	if npeaks>1:
583	#message = "Your P(r) is not smooth, please check your inversion parameters"
584	message = None
585	return pr.suggested_alpha, message, elapsed
586	else:
587
588	# Look at smaller values
589	# We assume that for the suggested alpha, we have 1 peak
590	# if not, send a message to change parameters
591	alpha = pr.suggested_alpha
592	best_alpha = pr.suggested_alpha
593	found = False
594	for i in range(10):
595	pr.alpha = (0.33)*(i+1)alpha
596	out, cov = pr.invert(nfunc)
597
598	peaks = pr.get_peaks(out)
599	if peaks>1:
600	found = True
601	break
602	best_alpha = pr.alpha
603
604	# If we didn't find a turning point for alpha and
605	# the initial alpha already had only one peak,
606	# just return that
607	if not found and initial_peaks==1 and initial_alpha<best_alpha:
608	best_alpha = initial_alpha
609
610	# Check whether the size makes sense
611	message=''
612
613	if not found:
614	message = "None"
615	elif best_alpha>=0.5*pr.suggested_alpha:
616	# best alpha is too big, return a
617	# reasonable value
618	message = "The estimated alpha for your system is too large. "
619	message += "Try increasing your maximum distance."
620
621	return best_alpha, message, elapsed
622
623	except:
624	message = "Invertor.estimate_alpha: %s" % sys.exc_value
625	return 0, message, elapsed
626
627
628	def to_file(self, path, npts=100):
629	"""
630	Save the state to a file that will be readable
631	by SliceView.
632	@param path: path of the file to write
633	@param npts: number of P(r) points to be written
634	"""
635	import pylab
636
637	file = open(path, 'w')
638	file.write("#d_max=%g\n" % self.d_max)
639	file.write("#nfunc=%g\n" % self.nfunc)
640	file.write("#alpha=%g\n" % self.alpha)
641	file.write("#chi2=%g\n" % self.chi2)
642	file.write("#elapsed=%g\n" % self.elapsed)
643	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
644	if not self.out==None:
645	if len(self.out)==len(self.cov):
646	for i in range(len(self.out)):
647	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
648	file.write("<r> <Pr> <dPr>\n")
649	r = pylab.arange(0.0, self.d_max, self.d_max/npts)
650
651	for r_i in r:
652	(value, err) = self.pr_err(self.out, self.cov, r_i)
653	file.write("%g %g %g\n" % (r_i, value, err))
654
655	file.close()
656
657
658	def from_file(self, path):
659	"""
660	Load the state of the Invertor from a file,
661	to be able to generate P(r) from a set of
662	parameters.
663	@param path: path of the file to load
664	"""
665	import os
666	import re
667	if os.path.isfile(path):
668	try:
669	fd = open(path, 'r')
670
671	buff = fd.read()
672	lines = buff.split('\n')
673	for line in lines:
674	if line.startswith('#d_max='):
675	toks = line.split('=')
676	self.d_max = float(toks[1])
677	elif line.startswith('#nfunc='):
678	toks = line.split('=')
679	self.nfunc = int(toks[1])
680	self.out = numpy.zeros(self.nfunc)
681	self.cov = numpy.zeros([self.nfunc, self.nfunc])
682	elif line.startswith('#alpha='):
683	toks = line.split('=')
684	self.alpha = float(toks[1])
685	elif line.startswith('#chi2='):
686	toks = line.split('=')
687	self.chi2 = float(toks[1])
688	elif line.startswith('#elapsed='):
689	toks = line.split('=')
690	self.elapsed = float(toks[1])
691	elif line.startswith('#alpha_estimate='):
692	toks = line.split('=')
693	self.suggested_alpha = float(toks[1])
694
695	# Now read in the parameters
696	elif line.startswith('#C_'):
697	toks = line.split('=')
698	p = re.compile('#C_([0-9]+)')
699	m = p.search(toks[0])
700	toks2 = toks[1].split('+-')
701	i = int(m.group(1))
702	self.out[i] = float(toks2[0])
703
704	self.cov[i][i] = float(toks2[1])
705
706	except:
707	raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
708	else:
709	raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path)
710
711
712
713
714	if __name__ == "__main__":
715	o = Invertor()
716
717
718
719
720

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/pr_inversion/invertor.py @ 9e85792

Download in other formats: