invertor.py @ a17ffdf

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since a17ffdf was e96a852, checked in by Mathieu Doucet <doucetm@…>, 16 years ago
From Raiza: n_term estimator
Property mode set to `100644`
File size: 25.9 KB

Line
1	"""
2	Module to perform P(r) inversion.
3	The module contains the Invertor class.
4	"""
5	from sans.pr.core.pr_inversion import Cinvertor
6	import numpy
7	import sys
8
9	def help():
10	"""
11	Provide general online help text
12	Future work: extend this function to allow topic selection
13	"""
14	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
15	info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
16	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each base functions "
17	info_txt += "in the expansion is found by performing a least square fit with the "
18	info_txt += "following fit function:\n\n"
19	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 + Reg_term\n\n"
20	info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
21	info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
22	info_txt += "The Reg_term term is a regularization term set to the second derivative "
23	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
24	info_txt += "The following are user inputs:\n\n"
25	info_txt += " - Number of terms: the number of base functions in the P(r) expansion.\n\n"
26	info_txt += " - Regularization constant: a multiplicative constant to set the size of "
27	info_txt += "the regularization term.\n\n"
28	info_txt += " - Maximum distance: the maximum distance between any two points in the system.\n"
29
30	return info_txt
31
32
33	class Invertor(Cinvertor):
34	"""
35	Invertor class to perform P(r) inversion
36
37	The problem is solved by posing the problem as Ax = b,
38	where x is the set of coefficients we are looking for.
39
40	Npts is the number of points.
41
42	In the following i refers to the ith base function coefficient.
43	The matrix has its entries j in its first Npts rows set to
44	A[j][i] = (Fourier transformed base function for point j)
45
46	We them choose a number of r-points, n_r, to evaluate the second
47	derivative of P(r) at. This is used as our regularization term.
48	For a vector r of length n_r, the following n_r rows are set to
49	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
50
51	The vector b has its first Npts entries set to
52	b[j] = (I(q) observed for point j)
53
54	The following n_r entries are set to zero.
55
56	The result is found by using scipy.linalg.basic.lstsq to invert
57	the matrix and find the coefficients x.
58
59	Methods inherited from Cinvertor:
60	- get_peaks(pars): returns the number of P(r) peaks
61	- oscillations(pars): returns the oscillation parameters for the output P(r)
62	- get_positive(pars): returns the fraction of P(r) that is above zero
63	- get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
64	"""
65	#TODO: Allow for slit smearing. Smear each base function once before filling
66	# the A matrix.
67
68	## Chisqr of the last computation
69	chi2 = 0
70	## Time elapsed for last computation
71	elapsed = 0
72	## Alpha to get the reg term the same size as the signal
73	suggested_alpha = 0
74	## Last number of base functions used
75	nfunc = 0
76	## Last output values
77	out = None
78	## Last errors on output values
79	cov = None
80	## Flag to allow I(q) data with constant background
81	#has_bck = False
82	## Background value
83	background = 0
84
85
86	def __init__(self):
87	Cinvertor.__init__(self)
88
89	def __setattr__(self, name, value):
90	"""
91	Set the value of an attribute.
92	Access the parent class methods for
93	x, y, err, d_max, q_min, q_max and alpha
94	"""
95	if name=='x':
96	if 0.0 in value:
97	raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
98	return self.set_x(value)
99	elif name=='y':
100	return self.set_y(value)
101	elif name=='err':
102	value2 = abs(value)
103	return self.set_err(value2)
104	elif name=='d_max':
105	return self.set_dmax(value)
106	elif name=='q_min':
107	if value==None:
108	return self.set_qmin(-1.0)
109	return self.set_qmin(value)
110	elif name=='q_max':
111	if value==None:
112	return self.set_qmax(-1.0)
113	return self.set_qmax(value)
114	elif name=='alpha':
115	return self.set_alpha(value)
116	elif name=='slit_height':
117	return self.set_slit_height(value)
118	elif name=='slit_width':
119	return self.set_slit_width(value)
120	elif name=='has_bck':
121	if value==True:
122	return self.set_has_bck(1)
123	elif value==False:
124	return self.set_has_bck(0)
125	else:
126	raise ValueError, "Invertor: has_bck can only be True or False"
127
128	return Cinvertor.__setattr__(self, name, value)
129
130	def __getattr__(self, name):
131	"""
132	Return the value of an attribute
133	For the moment x, y, err and d_max are write-only
134	TODO: change that!
135	"""
136	import numpy
137	if name=='x':
138	out = numpy.ones(self.get_nx())
139	self.get_x(out)
140	return out
141	elif name=='y':
142	out = numpy.ones(self.get_ny())
143	self.get_y(out)
144	return out
145	elif name=='err':
146	out = numpy.ones(self.get_nerr())
147	self.get_err(out)
148	return out
149	elif name=='d_max':
150	return self.get_dmax()
151	elif name=='q_min':
152	qmin = self.get_qmin()
153	if qmin<0:
154	return None
155	return qmin
156	elif name=='q_max':
157	qmax = self.get_qmax()
158	if qmax<0:
159	return None
160	return qmax
161	elif name=='alpha':
162	return self.get_alpha()
163	elif name=='slit_height':
164	return self.get_slit_height()
165	elif name=='slit_width':
166	return self.get_slit_width()
167	elif name=='has_bck':
168	value = self.get_has_bck()
169	if value==1:
170	return True
171	else:
172	return False
173	elif name in self.__dict__:
174	return self.__dict__[name]
175	return None
176
177	def clone(self):
178	"""
179	Return a clone of this instance
180	"""
181	invertor = Invertor()
182	invertor.chi2 = self.chi2
183	invertor.elapsed = self.elapsed
184	invertor.alpha = self.alpha
185	invertor.d_max = self.d_max
186	invertor.q_min = self.q_min
187	invertor.q_max = self.q_max
188
189	invertor.x = self.x
190	invertor.y = self.y
191	invertor.err = self.err
192	invertor.has_bck = self.has_bck
193
194	return invertor
195
196	def invert(self, nfunc=10, nr=20):
197	"""
198	Perform inversion to P(r)
199
200	The problem is solved by posing the problem as Ax = b,
201	where x is the set of coefficients we are looking for.
202
203	Npts is the number of points.
204
205	In the following i refers to the ith base function coefficient.
206	The matrix has its entries j in its first Npts rows set to
207	A[i][j] = (Fourier transformed base function for point j)
208
209	We them choose a number of r-points, n_r, to evaluate the second
210	derivative of P(r) at. This is used as our regularization term.
211	For a vector r of length n_r, the following n_r rows are set to
212	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
213
214	The vector b has its first Npts entries set to
215	b[j] = (I(q) observed for point j)
216
217	The following n_r entries are set to zero.
218
219	The result is found by using scipy.linalg.basic.lstsq to invert
220	the matrix and find the coefficients x.
221
222	@param nfunc: number of base functions to use.
223	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
224	@return: c_out, c_cov - the coefficients with covariance matrix
225	"""
226	# Reset the background value before proceeding
227	self.background = 0.0
228	return self.lstsq(nfunc, nr=nr)
229
230	def iq(self, out, q):
231	"""
232	Function to call to evaluate the scattering intensity
233	@param args: c-parameters, and q
234	@return: I(q)
235	"""
236	return Cinvertor.iq(self, out, q)+self.background
237
238	def invert_optimize(self, nfunc=10, nr=20):
239	"""
240	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
241
242	This probably produce more reliable results, but is much slower.
243	The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
244	where the reg_term is given by Svergun: it is the integral of the square of the first derivative
245	of P(r), d(P(r))/dr, integrated over the full range of r.
246
247	@param nfunc: number of base functions to use.
248	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
249	@return: c_out, c_cov - the coefficients with covariance matrix
250	"""
251
252	from scipy import optimize
253	import time
254
255	self.nfunc = nfunc
256	# First, check that the current data is valid
257	if self.is_valid()<=0:
258	raise RuntimeError, "Invertor.invert: Data array are of different length"
259
260	p = numpy.ones(nfunc)
261	t_0 = time.time()
262	out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
263
264	# Compute chi^2
265	res = self.residuals(out)
266	chisqr = 0
267	for i in range(len(res)):
268	chisqr += res[i]
269
270	self.chi2 = chisqr
271
272	# Store computation time
273	self.elapsed = time.time() - t_0
274
275	return out, cov_x
276
277	def pr_fit(self, nfunc=5):
278	"""
279	This is a direct fit to a given P(r). It assumes that the y data
280	is set to some P(r) distribution that we are trying to reproduce
281	with a set of base functions.
282
283	This method is provided as a test.
284	"""
285	from scipy import optimize
286
287	# First, check that the current data is valid
288	if self.is_valid()<=0:
289	raise RuntimeError, "Invertor.invert: Data arrays are of different length"
290
291	p = numpy.ones(nfunc)
292	t_0 = time.time()
293	out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
294
295	# Compute chi^2
296	res = self.pr_residuals(out)
297	chisqr = 0
298	for i in range(len(res)):
299	chisqr += res[i]
300
301	self.chisqr = chisqr
302
303	# Store computation time
304	self.elapsed = time.time() - t_0
305
306	return out, cov_x
307
308	def pr_err(self, c, c_cov, r):
309	"""
310	Returns the value of P(r) for a given r, and base function
311	coefficients, with error.
312
313	@param c: base function coefficients
314	@param c_cov: covariance matrice of the base function coefficients
315	@param r: r-value to evaluate P(r) at
316	@return: P(r)
317	"""
318	return self.get_pr_err(c, c_cov, r)
319
320	def _accept_q(self, q):
321	"""
322	Check q-value against user-defined range
323	"""
324	if not self.q_min==None and q<self.q_min:
325	return False
326	if not self.q_max==None and q>self.q_max:
327	return False
328	return True
329
330	def lstsq(self, nfunc=5, nr=20):
331	#TODO: do this on the C side
332	#
333	# To make sure an array is contiguous:
334	# blah = numpy.ascontiguousarray(blah_original)
335	# ... before passing it to C
336	"""
337	The problem is solved by posing the problem as Ax = b,
338	where x is the set of coefficients we are looking for.
339
340	Npts is the number of points.
341
342	In the following i refers to the ith base function coefficient.
343	The matrix has its entries j in its first Npts rows set to
344	A[i][j] = (Fourier transformed base function for point j)
345
346	We them choose a number of r-points, n_r, to evaluate the second
347	derivative of P(r) at. This is used as our regularization term.
348	For a vector r of length n_r, the following n_r rows are set to
349	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
350
351	The vector b has its first Npts entries set to
352	b[j] = (I(q) observed for point j)
353
354	The following n_r entries are set to zero.
355
356	The result is found by using scipy.linalg.basic.lstsq to invert
357	the matrix and find the coefficients x.
358
359	@param nfunc: number of base functions to use.
360	@param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
361
362	If the result does not allow us to compute the covariance matrix,
363	a matrix filled with zeros will be returned.
364
365	"""
366	#TODO: Allow for background by starting at n=0 (since the base function
367	# is zero for n=0).
368	import math, time
369	from scipy.linalg.basic import lstsq
370
371	if self.is_valid()<0:
372	raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
373
374	self.nfunc = nfunc
375	# a -- An M x N matrix.
376	# b -- An M x nrhs matrix or M vector.
377	npts = len(self.x)
378	nq = nr
379	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
380	if sqrt_alpha<0.0:
381	nq = 0
382
383	# If we need to fit the background, add a term
384	if self.has_bck==True:
385	nfunc_0 = nfunc
386	nfunc += 1
387
388	a = numpy.zeros([npts+nq, nfunc])
389	b = numpy.zeros(npts+nq)
390	err = numpy.zeros([nfunc, nfunc])
391
392	# Construct the a matrix and b vector that represent the problem
393	self._get_matrix(nfunc, nq, a, b)
394
395	# Perform the inversion (least square fit)
396	c, chi2, rank, n = lstsq(a, b)
397	# Sanity check
398	try:
399	float(chi2)
400	except:
401	chi2 = -1.0
402	self.chi2 = chi2
403
404	inv_cov = numpy.zeros([nfunc,nfunc])
405	# Get the covariance matrix, defined as inv_cov = a_transposed * a
406	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
407
408	# Compute the reg term size for the output
409	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
410
411	if math.fabs(self.alpha)>0:
412	new_alpha = sum_sig/(sum_reg/self.alpha)
413	else:
414	new_alpha = 0.0
415	self.suggested_alpha = new_alpha
416
417	try:
418	cov = numpy.linalg.pinv(inv_cov)
419	err = math.fabs(chi2/float(npts-nfunc)) * cov
420	except:
421	# We were not able to estimate the errors,
422	# returns an empty covariance matrix
423	print "lstsq:", sys.exc_value
424	print chi2
425	pass
426
427	# Keep a copy of the last output
428	if self.has_bck==False:
429	self.background = 0
430	self.out = c
431	self.cov = err
432	else:
433	self.background = c[0]
434
435	err_0 = numpy.zeros([nfunc, nfunc])
436	c_0 = numpy.zeros(nfunc)
437
438	for i in range(nfunc_0):
439	c_0[i] = c[i+1]
440	for j in range(nfunc_0):
441	err_0[i][j] = err[i+1][j+1]
442
443	self.out = c_0
444	self.cov = err_0
445
446	return self.out, self.cov
447
448	def lstsq_bck(self, nfunc=5, nr=20):
449	#TODO: Allow for background by starting at n=0 (since the base function
450	# is zero for n=0).
451	import math
452	from scipy.linalg.basic import lstsq
453
454	if self.is_valid()<0:
455	raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
456
457	self.nfunc = nfunc
458	# a -- An M x N matrix.
459	# b -- An M x nrhs matrix or M vector.
460	npts = len(self.x)
461	nq = nr
462	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
463	if sqrt_alpha<0.0:
464	nq = 0
465
466	err_0 = numpy.zeros([nfunc, nfunc])
467	c_0 = numpy.zeros(nfunc)
468	nfunc_0 = nfunc
469	nfunc += 1
470
471	a = numpy.zeros([npts+nq, nfunc])
472	b = numpy.zeros(npts+nq)
473	err = numpy.zeros([nfunc, nfunc])
474
475	# Construct the a matrix and b vector that represent the problem
476	self._get_matrix(nfunc, nq, a, b)
477
478	c, chi2, rank, n = lstsq(a, b)
479	# Sanity check
480	try:
481	float(chi2)
482	except:
483	chi2 = -1.0
484	self.chi2 = chi2
485
486	inv_cov = numpy.zeros([nfunc,nfunc])
487	# Get the covariance matrix, defined as inv_cov = a_transposed * a
488	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
489
490	# Compute the reg term size for the output
491	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
492
493	if math.fabs(self.alpha)>0:
494	new_alpha = sum_sig/(sum_reg/self.alpha)
495	else:
496	new_alpha = 0.0
497	self.suggested_alpha = new_alpha
498
499	try:
500	cov = numpy.linalg.pinv(inv_cov)
501	err = math.fabs(chi2/float(npts-nfunc)) * cov
502	except:
503	# We were not able to estimate the errors,
504	# returns an empty covariance matrix
505	print "lstsq:", sys.exc_value
506	print chi2
507	pass
508
509	# Keep a copy of the last output
510
511	print "BACKGROUND =", c[0]
512	self.background = c[0]
513
514	for i in range(nfunc_0):
515	c_0[i] = c[i+1]
516	for j in range(nfunc_0):
517	err_0[i][j] = err[i+1][j+1]
518
519	self.out = c_0
520	self.cov = err_0
521
522	return c_0, err_0
523
524	def estimate_numterms(self, isquit_func=None):
525	"""
526	Returns a reasonable guess for the
527	number of terms
528
529	@return: number of terms, alpha, message
530	"""
531	from num_term import Num_terms
532	estimator = Num_terms(self.clone())
533
534	return estimator.num_terms(isquit_func)
535
536	def estimate_alpha(self, nfunc):
537	"""
538	Returns a reasonable guess for the
539	regularization constant alpha
540
541	@return: alpha, message, elapsed
542
543	where alpha is the estimate for alpha,
544	message is a message for the user,
545	elapsed is the computation time
546	"""
547	import time
548	try:
549	pr = self.clone()
550
551	# T_0 for computation time
552	starttime = time.time()
553	elapsed = 0
554
555	# If the current alpha is zero, try
556	# another value
557	if pr.alpha<=0:
558	pr.alpha = 0.0001
559
560	# Perform inversion to find the largest alpha
561	out, cov = pr.invert(nfunc)
562	elapsed = time.time()-starttime
563	initial_alpha = pr.alpha
564	initial_peaks = pr.get_peaks(out)
565
566	# Try the inversion with the estimated alpha
567	pr.alpha = pr.suggested_alpha
568	out, cov = pr.invert(nfunc)
569
570	npeaks = pr.get_peaks(out)
571	# if more than one peak to start with
572	# just return the estimate
573	if npeaks>1:
574	message = "Your P(r) is not smooth, please check your inversion parameters"
575	return pr.suggested_alpha, message, elapsed
576	else:
577
578	# Look at smaller values
579	# We assume that for the suggested alpha, we have 1 peak
580	# if not, send a message to change parameters
581	alpha = pr.suggested_alpha
582	best_alpha = pr.suggested_alpha
583	found = False
584	for i in range(10):
585	pr.alpha = (0.33)*(i+1)alpha
586	out, cov = pr.invert(nfunc)
587
588	peaks = pr.get_peaks(out)
589	if peaks>1:
590	found = True
591	break
592	best_alpha = pr.alpha
593
594	# If we didn't find a turning point for alpha and
595	# the initial alpha already had only one peak,
596	# just return that
597	if not found and initial_peaks==1 and initial_alpha<best_alpha:
598	best_alpha = initial_alpha
599
600	# Check whether the size makes sense
601	message=''
602
603	if not found:
604	message = "None"
605	elif best_alpha>=0.5*pr.suggested_alpha:
606	# best alpha is too big, return a
607	# reasonable value
608	message = "The estimated alpha for your system is too large. "
609	message += "Try increasing your maximum distance."
610
611	return best_alpha, message, elapsed
612
613	except:
614	message = "Invertor.estimate_alpha: %s" % sys.exc_value
615	return 0, message, elapsed
616
617
618	def to_file(self, path, npts=100):
619	"""
620	Save the state to a file that will be readable
621	by SliceView.
622	@param path: path of the file to write
623	@param npts: number of P(r) points to be written
624	"""
625	import pylab
626
627	file = open(path, 'w')
628	file.write("#d_max=%g\n" % self.d_max)
629	file.write("#nfunc=%g\n" % self.nfunc)
630	file.write("#alpha=%g\n" % self.alpha)
631	file.write("#chi2=%g\n" % self.chi2)
632	file.write("#elapsed=%g\n" % self.elapsed)
633	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
634	if not self.out==None:
635	if len(self.out)==len(self.cov):
636	for i in range(len(self.out)):
637	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
638	file.write("<r> <Pr> <dPr>\n")
639	r = pylab.arange(0.0, self.d_max, self.d_max/npts)
640
641	for r_i in r:
642	(value, err) = self.pr_err(self.out, self.cov, r_i)
643	file.write("%g %g %g\n" % (r_i, value, err))
644
645	file.close()
646
647
648	def from_file(self, path):
649	"""
650	Load the state of the Invertor from a file,
651	to be able to generate P(r) from a set of
652	parameters.
653	@param path: path of the file to load
654	"""
655	import os
656	import re
657	if os.path.isfile(path):
658	try:
659	fd = open(path, 'r')
660
661	buff = fd.read()
662	lines = buff.split('\n')
663	for line in lines:
664	if line.startswith('#d_max='):
665	toks = line.split('=')
666	self.d_max = float(toks[1])
667	elif line.startswith('#nfunc='):
668	toks = line.split('=')
669	self.nfunc = int(toks[1])
670	self.out = numpy.zeros(self.nfunc)
671	self.cov = numpy.zeros([self.nfunc, self.nfunc])
672	elif line.startswith('#alpha='):
673	toks = line.split('=')
674	self.alpha = float(toks[1])
675	elif line.startswith('#chi2='):
676	toks = line.split('=')
677	self.chi2 = float(toks[1])
678	elif line.startswith('#elapsed='):
679	toks = line.split('=')
680	self.elapsed = float(toks[1])
681	elif line.startswith('#alpha_estimate='):
682	toks = line.split('=')
683	self.suggested_alpha = float(toks[1])
684
685	# Now read in the parameters
686	elif line.startswith('#C_'):
687	toks = line.split('=')
688	p = re.compile('#C_([0-9]+)')
689	m = p.search(toks[0])
690	toks2 = toks[1].split('+-')
691	i = int(m.group(1))
692	self.out[i] = float(toks2[0])
693
694	self.cov[i][i] = float(toks2[1])
695
696	except:
697	raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
698	else:
699	raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path)
700
701
702
703
704	if __name__ == "__main__":
705	o = Invertor()
706
707
708
709
710

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/pr_inversion/invertor.py @ a17ffdf

Download in other formats: