invertor.py @ a01c743

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since a01c743 was 1db4a53, checked in by Gervaise Alina <gervyh@…>, 14 years ago
working on pylint
Property mode set to `100644`
File size: 26.2 KB

Line
1	"""
2	Module to perform P(r) inversion.
3	The module contains the Invertor class.
4	"""
5
6	import numpy
7	import sys
8	import math
9	import time
10	import copy
11	import os
12	import re
13	from numpy.linalg import lstsq
14	from scipy import optimize
15	from sans.pr.core.pr_inversion import Cinvertor
16
17	def help():
18	"""
19	Provide general online help text
20	Future work: extend this function to allow topic selection
21	"""
22	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
23	info_txt += "(1980) 13, 168-175.\n\n"
24	info_txt += "P(r) is set to be equal to an expansion of base functions "
25	info_txt += "of the type "
26	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each "
27	info_txt += "base functions "
28	info_txt += "in the expansion is found by performing a least square fit "
29	info_txt += "with the "
30	info_txt += "following fit function:\n\n"
31	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 +"
32	info_txt += "Reg_term\n\n"
33	info_txt += "where I_meas(q) is the measured scattering intensity and "
34	info_txt += "I_th(q) is "
35	info_txt += "the prediction from the Fourier transform of the P(r) "
36	info_txt += "expansion. "
37	info_txt += "The Reg_term term is a regularization term set to the second"
38	info_txt += " derivative "
39	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce "
40	info_txt += "a smooth P(r) output.\n\n"
41	info_txt += "The following are user inputs:\n\n"
42	info_txt += " - Number of terms: the number of base functions in the P(r)"
43	info_txt += " expansion.\n\n"
44	info_txt += " - Regularization constant: a multiplicative constant "
45	info_txt += "to set the size of "
46	info_txt += "the regularization term.\n\n"
47	info_txt += " - Maximum distance: the maximum distance between any "
48	info_txt += "two points in the system.\n"
49
50	return info_txt
51
52
53	class Invertor(Cinvertor):
54	"""
55	Invertor class to perform P(r) inversion
56
57	The problem is solved by posing the problem as Ax = b,
58	where x is the set of coefficients we are looking for.
59
60	Npts is the number of points.
61
62	In the following i refers to the ith base function coefficient.
63	The matrix has its entries j in its first Npts rows set to
64	A[j][i] = (Fourier transformed base function for point j)
65
66	We them choose a number of r-points, n_r, to evaluate the second
67	derivative of P(r) at. This is used as our regularization term.
68	For a vector r of length n_r, the following n_r rows are set to
69	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2,
70	evaluated at r[j])
71
72	The vector b has its first Npts entries set to
73	b[j] = (I(q) observed for point j)
74
75	The following n_r entries are set to zero.
76
77	The result is found by using scipy.linalg.basic.lstsq to invert
78	the matrix and find the coefficients x.
79
80	Methods inherited from Cinvertor:
81	- get_peaks(pars): returns the number of P(r) peaks
82	- oscillations(pars): returns the oscillation parameters for the output P(r)
83	- get_positive(pars): returns the fraction of P(r) that is above zero
84	- get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
85	"""
86	## Chisqr of the last computation
87	chi2 = 0
88	## Time elapsed for last computation
89	elapsed = 0
90	## Alpha to get the reg term the same size as the signal
91	suggested_alpha = 0
92	## Last number of base functions used
93	nfunc = 10
94	## Last output values
95	out = None
96	## Last errors on output values
97	cov = None
98	## Background value
99	background = 0
100	## Information dictionary for application use
101	info = {}
102
103
104	def __init__(self):
105	Cinvertor.__init__(self)
106
107	def __setattr__(self, name, value):
108	"""
109	Set the value of an attribute.
110	Access the parent class methods for
111	x, y, err, d_max, q_min, q_max and alpha
112	"""
113	if name == 'x':
114	if 0.0 in value:
115	msg = "Invertor: one of your q-values is zero. "
116	msg += "Delete that entry before proceeding"
117	raise ValueError, msg
118	return self.set_x(value)
119	elif name == 'y':
120	return self.set_y(value)
121	elif name == 'err':
122	value2 = abs(value)
123	return self.set_err(value2)
124	elif name == 'd_max':
125	return self.set_dmax(value)
126	elif name == 'q_min':
127	if value == None:
128	return self.set_qmin(-1.0)
129	return self.set_qmin(value)
130	elif name == 'q_max':
131	if value == None:
132	return self.set_qmax(-1.0)
133	return self.set_qmax(value)
134	elif name == 'alpha':
135	return self.set_alpha(value)
136	elif name == 'slit_height':
137	return self.set_slit_height(value)
138	elif name == 'slit_width':
139	return self.set_slit_width(value)
140	elif name == 'has_bck':
141	if value == True:
142	return self.set_has_bck(1)
143	elif value == False:
144	return self.set_has_bck(0)
145	else:
146	raise ValueError, "Invertor: has_bck can only be True or False"
147
148	return Cinvertor.__setattr__(self, name, value)
149
150	def __getattr__(self, name):
151	"""
152	Return the value of an attribute
153	"""
154	#import numpy
155	if name == 'x':
156	out = numpy.ones(self.get_nx())
157	self.get_x(out)
158	return out
159	elif name == 'y':
160	out = numpy.ones(self.get_ny())
161	self.get_y(out)
162	return out
163	elif name == 'err':
164	out = numpy.ones(self.get_nerr())
165	self.get_err(out)
166	return out
167	elif name == 'd_max':
168	return self.get_dmax()
169	elif name == 'q_min':
170	qmin = self.get_qmin()
171	if qmin < 0:
172	return None
173	return qmin
174	elif name == 'q_max':
175	qmax = self.get_qmax()
176	if qmax < 0:
177	return None
178	return qmax
179	elif name == 'alpha':
180	return self.get_alpha()
181	elif name == 'slit_height':
182	return self.get_slit_height()
183	elif name == 'slit_width':
184	return self.get_slit_width()
185	elif name == 'has_bck':
186	value = self.get_has_bck()
187	if value == 1:
188	return True
189	else:
190	return False
191	elif name in self.__dict__:
192	return self.__dict__[name]
193	return None
194
195	def clone(self):
196	"""
197	Return a clone of this instance
198	"""
199	#import copy
200
201	invertor = Invertor()
202	invertor.chi2 = self.chi2
203	invertor.elapsed = self.elapsed
204	invertor.nfunc = self.nfunc
205	invertor.alpha = self.alpha
206	invertor.d_max = self.d_max
207	invertor.q_min = self.q_min
208	invertor.q_max = self.q_max
209
210	invertor.x = self.x
211	invertor.y = self.y
212	invertor.err = self.err
213	invertor.has_bck = self.has_bck
214	invertor.slit_height = self.slit_height
215	invertor.slit_width = self.slit_width
216
217	invertor.info = copy.deepcopy(self.info)
218
219	return invertor
220
221	def invert(self, nfunc=10, nr=20):
222	"""
223	Perform inversion to P(r)
224
225	The problem is solved by posing the problem as Ax = b,
226	where x is the set of coefficients we are looking for.
227
228	Npts is the number of points.
229
230	In the following i refers to the ith base function coefficient.
231	The matrix has its entries j in its first Npts rows set to
232	A[i][j] = (Fourier transformed base function for point j)
233
234	We them choose a number of r-points, n_r, to evaluate the second
235	derivative of P(r) at. This is used as our regularization term.
236	For a vector r of length n_r, the following n_r rows are set to
237	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
238
239	The vector b has its first Npts entries set to
240	b[j] = (I(q) observed for point j)
241
242	The following n_r entries are set to zero.
243
244	The result is found by using scipy.linalg.basic.lstsq to invert
245	the matrix and find the coefficients x.
246
247	:param nfunc: number of base functions to use.
248	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
249	:return: c_out, c_cov - the coefficients with covariance matrix
250
251	"""
252	# Reset the background value before proceeding
253	self.background = 0.0
254	return self.lstsq(nfunc, nr=nr)
255
256	def iq(self, out, q):
257	"""
258	Function to call to evaluate the scattering intensity
259
260	:param args: c-parameters, and q
261	:return: I(q)
262
263	"""
264	return Cinvertor.iq(self, out, q) + self.background
265
266	def invert_optimize(self, nfunc=10, nr=20):
267	"""
268	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
269
270	This probably produce more reliable results, but is much slower.
271	The minimization function is set to
272	sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
273	where the reg_term is given by Svergun: it is the integral of
274	the square of the first derivative
275	of P(r), d(P(r))/dr, integrated over the full range of r.
276
277	:param nfunc: number of base functions to use.
278	:param nr: number of r points to evaluate the 2nd derivative at
279	for the reg. term.
280
281	:return: c_out, c_cov - the coefficients with covariance matrix
282
283	"""
284
285	#from scipy import optimize
286	#import time
287
288	self.nfunc = nfunc
289	# First, check that the current data is valid
290	if self.is_valid() <= 0:
291	msg = "Invertor.invert: Data array are of different length"
292	raise RuntimeError, msg
293
294	p = numpy.ones(nfunc)
295	t_0 = time.time()
296	out, cov_x, _, _, _ = optimize.leastsq(self.residuals,
297	p, full_output=1,
298	warning=True)
299
300	# Compute chi^2
301	res = self.residuals(out)
302	chisqr = 0
303	for i in range(len(res)):
304	chisqr += res[i]
305
306	self.chi2 = chisqr
307
308	# Store computation time
309	self.elapsed = time.time() - t_0
310
311	return out, cov_x
312
313	def pr_fit(self, nfunc=5):
314	"""
315	This is a direct fit to a given P(r). It assumes that the y data
316	is set to some P(r) distribution that we are trying to reproduce
317	with a set of base functions.
318
319	This method is provided as a test.
320	"""
321	#from scipy import optimize
322
323	# First, check that the current data is valid
324	if self.is_valid() <= 0:
325	msg = "Invertor.invert: Data arrays are of different length"
326	raise RuntimeError, msg
327
328	p = numpy.ones(nfunc)
329	t_0 = time.time()
330	out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p,
331	full_output=1,
332	warning=True)
333
334	# Compute chi^2
335	res = self.pr_residuals(out)
336	chisqr = 0
337	for i in range(len(res)):
338	chisqr += res[i]
339
340	self.chisqr = chisqr
341
342	# Store computation time
343	self.elapsed = time.time() - t_0
344
345	return out, cov_x
346
347	def pr_err(self, c, c_cov, r):
348	"""
349	Returns the value of P(r) for a given r, and base function
350	coefficients, with error.
351
352	:param c: base function coefficients
353	:param c_cov: covariance matrice of the base function coefficients
354	:param r: r-value to evaluate P(r) at
355
356	:return: P(r)
357
358	"""
359	return self.get_pr_err(c, c_cov, r)
360
361	def _accept_q(self, q):
362	"""
363	Check q-value against user-defined range
364	"""
365	if not self.q_min == None and q < self.q_min:
366	return False
367	if not self.q_max == None and q > self.q_max:
368	return False
369	return True
370
371	def lstsq(self, nfunc=5, nr=20):
372	"""
373	The problem is solved by posing the problem as Ax = b,
374	where x is the set of coefficients we are looking for.
375
376	Npts is the number of points.
377
378	In the following i refers to the ith base function coefficient.
379	The matrix has its entries j in its first Npts rows set to
380	A[i][j] = (Fourier transformed base function for point j)
381
382	We them choose a number of r-points, n_r, to evaluate the second
383	derivative of P(r) at. This is used as our regularization term.
384	For a vector r of length n_r, the following n_r rows are set to
385	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2,
386	evaluated at r[j])
387
388	The vector b has its first Npts entries set to
389	b[j] = (I(q) observed for point j)
390
391	The following n_r entries are set to zero.
392
393	The result is found by using scipy.linalg.basic.lstsq to invert
394	the matrix and find the coefficients x.
395
396	:param nfunc: number of base functions to use.
397	:param nr: number of r points to evaluate the 2nd derivative at
398	for the reg. term.
399
400	If the result does not allow us to compute the covariance matrix,
401	a matrix filled with zeros will be returned.
402
403	"""
404	# Note: To make sure an array is contiguous:
405	# blah = numpy.ascontiguousarray(blah_original)
406	# ... before passing it to C
407
408	if self.is_valid() < 0:
409	msg = "Invertor: invalid data; incompatible data lengths."
410	raise RuntimeError, msg
411
412	self.nfunc = nfunc
413	# a -- An M x N matrix.
414	# b -- An M x nrhs matrix or M vector.
415	npts = len(self.x)
416	nq = nr
417	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
418	if sqrt_alpha < 0.0:
419	nq = 0
420
421	# If we need to fit the background, add a term
422	if self.has_bck == True:
423	nfunc_0 = nfunc
424	nfunc += 1
425
426	a = numpy.zeros([npts+nq, nfunc])
427	b = numpy.zeros(npts+nq)
428	err = numpy.zeros([nfunc, nfunc])
429
430	# Construct the a matrix and b vector that represent the problem
431	t_0 = time.time()
432	self._get_matrix(nfunc, nq, a, b)
433
434	# Perform the inversion (least square fit)
435	c, chi2, rank, n = lstsq(a, b)
436	# Sanity check
437	try:
438	float(chi2)
439	except:
440	chi2 = -1.0
441	self.chi2 = chi2
442
443	inv_cov = numpy.zeros([nfunc,nfunc])
444	# Get the covariance matrix, defined as inv_cov = a_transposed * a
445	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
446
447	# Compute the reg term size for the output
448	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
449
450	if math.fabs(self.alpha) > 0:
451	new_alpha = sum_sig/(sum_reg/self.alpha)
452	else:
453	new_alpha = 0.0
454	self.suggested_alpha = new_alpha
455
456	try:
457	cov = numpy.linalg.pinv(inv_cov)
458	err = math.fabs(chi2/float(npts-nfunc)) * cov
459	except:
460	# We were not able to estimate the errors
461	# Return an empty error matrix
462	pass
463
464	# Keep a copy of the last output
465	if self.has_bck == False:
466	self.background = 0
467	self.out = c
468	self.cov = err
469	else:
470	self.background = c[0]
471
472	err_0 = numpy.zeros([nfunc, nfunc])
473	c_0 = numpy.zeros(nfunc)
474
475	for i in range(nfunc_0):
476	c_0[i] = c[i+1]
477	for j in range(nfunc_0):
478	err_0[i][j] = err[i+1][j+1]
479
480	self.out = c_0
481	self.cov = err_0
482
483	return self.out, self.cov
484
485	def estimate_numterms(self, isquit_func=None):
486	"""
487	Returns a reasonable guess for the
488	number of terms
489
490	:param isquit_func: reference to thread function to call to
491	check whether the computation needs to
492	be stopped.
493
494	:return: number of terms, alpha, message
495
496	"""
497	from num_term import Num_terms
498	estimator = Num_terms(self.clone())
499	try:
500	return estimator.num_terms(isquit_func)
501	except:
502	# If we fail, estimate alpha and return the default
503	# number of terms
504	best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
505	return self.nfunc, best_alpha, "Could not estimate number of terms"
506
507	def estimate_alpha(self, nfunc):
508	"""
509	Returns a reasonable guess for the
510	regularization constant alpha
511
512	:param nfunc: number of terms to use in the expansion.
513
514	:return: alpha, message, elapsed
515
516	where alpha is the estimate for alpha,
517	message is a message for the user,
518	elapsed is the computation time
519	"""
520	#import time
521	try:
522	pr = self.clone()
523
524	# T_0 for computation time
525	starttime = time.time()
526	elapsed = 0
527
528	# If the current alpha is zero, try
529	# another value
530	if pr.alpha <= 0:
531	pr.alpha = 0.0001
532
533	# Perform inversion to find the largest alpha
534	out, cov = pr.invert(nfunc)
535	elapsed = time.time() - starttime
536	initial_alpha = pr.alpha
537	initial_peaks = pr.get_peaks(out)
538
539	# Try the inversion with the estimated alpha
540	pr.alpha = pr.suggested_alpha
541	out, cov = pr.invert(nfunc)
542
543	npeaks = pr.get_peaks(out)
544	# if more than one peak to start with
545	# just return the estimate
546	if npeaks > 1:
547	#message = "Your P(r) is not smooth,
548	#please check your inversion parameters"
549	message = None
550	return pr.suggested_alpha, message, elapsed
551	else:
552
553	# Look at smaller values
554	# We assume that for the suggested alpha, we have 1 peak
555	# if not, send a message to change parameters
556	alpha = pr.suggested_alpha
557	best_alpha = pr.suggested_alpha
558	found = False
559	for i in range(10):
560	pr.alpha = (0.33)*(i+1) alpha
561	out, cov = pr.invert(nfunc)
562
563	peaks = pr.get_peaks(out)
564	if peaks > 1:
565	found = True
566	break
567	best_alpha = pr.alpha
568
569	# If we didn't find a turning point for alpha and
570	# the initial alpha already had only one peak,
571	# just return that
572	if not found and initial_peaks == 1 and \
573	initial_alpha<best_alpha:
574	best_alpha = initial_alpha
575
576	# Check whether the size makes sense
577	message = ''
578
579	if not found:
580	message = None
581	elif best_alpha >= 0.5 * pr.suggested_alpha:
582	# best alpha is too big, return a
583	# reasonable value
584	message = "The estimated alpha for your system is too "
585	messsage += "large. "
586	message += "Try increasing your maximum distance."
587
588	return best_alpha, message, elapsed
589
590	except:
591	message = "Invertor.estimate_alpha: %s" % sys.exc_value
592	return 0, message, elapsed
593
594
595	def to_file(self, path, npts=100):
596	"""
597	Save the state to a file that will be readable
598	by SliceView.
599
600	:param path: path of the file to write
601	:param npts: number of P(r) points to be written
602
603	"""
604	file = open(path, 'w')
605	file.write("#d_max=%g\n" % self.d_max)
606	file.write("#nfunc=%g\n" % self.nfunc)
607	file.write("#alpha=%g\n" % self.alpha)
608	file.write("#chi2=%g\n" % self.chi2)
609	file.write("#elapsed=%g\n" % self.elapsed)
610	file.write("#qmin=%s\n" % str(self.q_min))
611	file.write("#qmax=%s\n" % str(self.q_max))
612	file.write("#slit_height=%g\n" % self.slit_height)
613	file.write("#slit_width=%g\n" % self.slit_width)
614	file.write("#background=%g\n" % self.background)
615	if self.has_bck == True:
616	file.write("#has_bck=1\n")
617	else:
618	file.write("#has_bck=0\n")
619	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
620	if not self.out == None:
621	if len(self.out) == len(self.cov):
622	for i in range(len(self.out)):
623	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
624	str(self.cov[i][i])))
625	file.write("<r> <Pr> <dPr>\n")
626	r = numpy.arange(0.0, self.d_max, self.d_max/npts)
627
628	for r_i in r:
629	(value, err) = self.pr_err(self.out, self.cov, r_i)
630	file.write("%g %g %g\n" % (r_i, value, err))
631
632	file.close()
633
634
635	def from_file(self, path):
636	"""
637	Load the state of the Invertor from a file,
638	to be able to generate P(r) from a set of
639	parameters.
640
641	:param path: path of the file to load
642
643	"""
644	#import os
645	#import re
646	if os.path.isfile(path):
647	try:
648	fd = open(path, 'r')
649
650	buff = fd.read()
651	lines = buff.split('\n')
652	for line in lines:
653	if line.startswith('#d_max='):
654	toks = line.split('=')
655	self.d_max = float(toks[1])
656	elif line.startswith('#nfunc='):
657	toks = line.split('=')
658	self.nfunc = int(toks[1])
659	self.out = numpy.zeros(self.nfunc)
660	self.cov = numpy.zeros([self.nfunc, self.nfunc])
661	elif line.startswith('#alpha='):
662	toks = line.split('=')
663	self.alpha = float(toks[1])
664	elif line.startswith('#chi2='):
665	toks = line.split('=')
666	self.chi2 = float(toks[1])
667	elif line.startswith('#elapsed='):
668	toks = line.split('=')
669	self.elapsed = float(toks[1])
670	elif line.startswith('#alpha_estimate='):
671	toks = line.split('=')
672	self.suggested_alpha = float(toks[1])
673	elif line.startswith('#qmin='):
674	toks = line.split('=')
675	try:
676	self.q_min = float(toks[1])
677	except:
678	self.q_min = None
679	elif line.startswith('#qmax='):
680	toks = line.split('=')
681	try:
682	self.q_max = float(toks[1])
683	except:
684	self.q_max = None
685	elif line.startswith('#slit_height='):
686	toks = line.split('=')
687	self.slit_height = float(toks[1])
688	elif line.startswith('#slit_width='):
689	toks = line.split('=')
690	self.slit_width = float(toks[1])
691	elif line.startswith('#background='):
692	toks = line.split('=')
693	self.background = float(toks[1])
694	elif line.startswith('#has_bck='):
695	toks = line.split('=')
696	if int(toks[1]) == 1:
697	self.has_bck = True
698	else:
699	self.has_bck = False
700
701	# Now read in the parameters
702	elif line.startswith('#C_'):
703	toks = line.split('=')
704	p = re.compile('#C_([0-9]+)')
705	m = p.search(toks[0])
706	toks2 = toks[1].split('+-')
707	i = int(m.group(1))
708	self.out[i] = float(toks2[0])
709
710	self.cov[i][i] = float(toks2[1])
711
712	except:
713	msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
714	raise RuntimeError, msg
715	else:
716	msg = "Invertor.from_file: '%s' is not a file" % str(path)
717	raise RuntimeError, msg
718
719
720	if __name__ == "__main__":
721	o = Invertor()
722
723
724
725
726

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/pr_inversion/invertor.py @ a01c743

Download in other formats: