invertor.py @ 0765c00

Last change on this file since 0765c00 was 9a5097c, checked in by andyfaff, 8 years ago
MAINT: import numpy as np
Property mode set to `100644`
File size: 25.3 KB

Line
1	# pylint: disable=invalid-name
2	"""
3	Module to perform P(r) inversion.
4	The module contains the Invertor class.
5
6	FIXME: The way the Invertor interacts with its C component should be cleaned up
7	"""
8
9	import numpy as np
10	import sys
11	import math
12	import time
13	import copy
14	import os
15	import re
16	import logging
17	from numpy.linalg import lstsq
18	from scipy import optimize
19	from sas.sascalc.pr.core.pr_inversion import Cinvertor
20
21	def help():
22	"""
23	Provide general online help text
24	Future work: extend this function to allow topic selection
25	"""
26	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
27	info_txt += "(1980) 13, 168-175.\n\n"
28	info_txt += "P(r) is set to be equal to an expansion of base functions "
29	info_txt += "of the type "
30	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each "
31	info_txt += "base functions "
32	info_txt += "in the expansion is found by performing a least square fit "
33	info_txt += "with the "
34	info_txt += "following fit function:\n\n"
35	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 +"
36	info_txt += "Reg_term\n\n"
37	info_txt += "where I_meas(q) is the measured scattering intensity and "
38	info_txt += "I_th(q) is "
39	info_txt += "the prediction from the Fourier transform of the P(r) "
40	info_txt += "expansion. "
41	info_txt += "The Reg_term term is a regularization term set to the second"
42	info_txt += " derivative "
43	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce "
44	info_txt += "a smooth P(r) output.\n\n"
45	info_txt += "The following are user inputs:\n\n"
46	info_txt += " - Number of terms: the number of base functions in the P(r)"
47	info_txt += " expansion.\n\n"
48	info_txt += " - Regularization constant: a multiplicative constant "
49	info_txt += "to set the size of "
50	info_txt += "the regularization term.\n\n"
51	info_txt += " - Maximum distance: the maximum distance between any "
52	info_txt += "two points in the system.\n"
53
54	return info_txt
55
56
57	class Invertor(Cinvertor):
58	"""
59	Invertor class to perform P(r) inversion
60
61	The problem is solved by posing the problem as Ax = b,
62	where x is the set of coefficients we are looking for.
63
64	Npts is the number of points.
65
66	In the following i refers to the ith base function coefficient.
67	The matrix has its entries j in its first Npts rows set to ::
68
69	A[j][i] = (Fourier transformed base function for point j)
70
71	We them choose a number of r-points, n_r, to evaluate the second
72	derivative of P(r) at. This is used as our regularization term.
73	For a vector r of length n_r, the following n_r rows are set to ::
74
75	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2,
76	evaluated at r[j])
77
78	The vector b has its first Npts entries set to ::
79
80	b[j] = (I(q) observed for point j)
81
82	The following n_r entries are set to zero.
83
84	The result is found by using scipy.linalg.basic.lstsq to invert
85	the matrix and find the coefficients x.
86
87	Methods inherited from Cinvertor:
88
89	* ``get_peaks(pars)``: returns the number of P(r) peaks
90	* ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
91	* ``get_positive(pars)``: returns the fraction of P(r) that is above zero
92	* ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
93	"""
94	## Chisqr of the last computation
95	chi2 = 0
96	## Time elapsed for last computation
97	elapsed = 0
98	## Alpha to get the reg term the same size as the signal
99	suggested_alpha = 0
100	## Last number of base functions used
101	nfunc = 10
102	## Last output values
103	out = None
104	## Last errors on output values
105	cov = None
106	## Background value
107	background = 0
108	## Information dictionary for application use
109	info = {}
110
111	def __init__(self):
112	Cinvertor.__init__(self)
113
114	def __setstate__(self, state):
115	"""
116	restore the state of invertor for pickle
117	"""
118	(self.__dict__, self.alpha, self.d_max,
119	self.q_min, self.q_max,
120	self.x, self.y,
121	self.err, self.has_bck,
122	self.slit_height, self.slit_width) = state
123
124	def __reduce_ex__(self, proto):
125	"""
126	Overwrite the __reduce_ex__
127	"""
128
129	state = (self.__dict__,
130	self.alpha, self.d_max,
131	self.q_min, self.q_max,
132	self.x, self.y,
133	self.err, self.has_bck,
134	self.slit_height, self.slit_width,
135	)
136	return (Invertor, tuple(), state, None, None)
137
138	def __setattr__(self, name, value):
139	"""
140	Set the value of an attribute.
141	Access the parent class methods for
142	x, y, err, d_max, q_min, q_max and alpha
143	"""
144	if name == 'x':
145	if 0.0 in value:
146	msg = "Invertor: one of your q-values is zero. "
147	msg += "Delete that entry before proceeding"
148	raise ValueError, msg
149	return self.set_x(value)
150	elif name == 'y':
151	return self.set_y(value)
152	elif name == 'err':
153	value2 = abs(value)
154	return self.set_err(value2)
155	elif name == 'd_max':
156	if value <= 0.0:
157	msg = "Invertor: d_max must be greater than zero."
158	msg += "Correct that entry before proceeding"
159	raise ValueError, msg
160	return self.set_dmax(value)
161	elif name == 'q_min':
162	if value == None:
163	return self.set_qmin(-1.0)
164	return self.set_qmin(value)
165	elif name == 'q_max':
166	if value == None:
167	return self.set_qmax(-1.0)
168	return self.set_qmax(value)
169	elif name == 'alpha':
170	return self.set_alpha(value)
171	elif name == 'slit_height':
172	return self.set_slit_height(value)
173	elif name == 'slit_width':
174	return self.set_slit_width(value)
175	elif name == 'has_bck':
176	if value == True:
177	return self.set_has_bck(1)
178	elif value == False:
179	return self.set_has_bck(0)
180	else:
181	raise ValueError, "Invertor: has_bck can only be True or False"
182
183	return Cinvertor.__setattr__(self, name, value)
184
185	def __getattr__(self, name):
186	"""
187	Return the value of an attribute
188	"""
189	#import numpy
190	if name == 'x':
191	out = np.ones(self.get_nx())
192	self.get_x(out)
193	return out
194	elif name == 'y':
195	out = np.ones(self.get_ny())
196	self.get_y(out)
197	return out
198	elif name == 'err':
199	out = np.ones(self.get_nerr())
200	self.get_err(out)
201	return out
202	elif name == 'd_max':
203	return self.get_dmax()
204	elif name == 'q_min':
205	qmin = self.get_qmin()
206	if qmin < 0:
207	return None
208	return qmin
209	elif name == 'q_max':
210	qmax = self.get_qmax()
211	if qmax < 0:
212	return None
213	return qmax
214	elif name == 'alpha':
215	return self.get_alpha()
216	elif name == 'slit_height':
217	return self.get_slit_height()
218	elif name == 'slit_width':
219	return self.get_slit_width()
220	elif name == 'has_bck':
221	value = self.get_has_bck()
222	if value == 1:
223	return True
224	else:
225	return False
226	elif name in self.__dict__:
227	return self.__dict__[name]
228	return None
229
230	def clone(self):
231	"""
232	Return a clone of this instance
233	"""
234	#import copy
235
236	invertor = Invertor()
237	invertor.chi2 = self.chi2
238	invertor.elapsed = self.elapsed
239	invertor.nfunc = self.nfunc
240	invertor.alpha = self.alpha
241	invertor.d_max = self.d_max
242	invertor.q_min = self.q_min
243	invertor.q_max = self.q_max
244
245	invertor.x = self.x
246	invertor.y = self.y
247	invertor.err = self.err
248	invertor.has_bck = self.has_bck
249	invertor.slit_height = self.slit_height
250	invertor.slit_width = self.slit_width
251
252	invertor.info = copy.deepcopy(self.info)
253
254	return invertor
255
256	def invert(self, nfunc=10, nr=20):
257	"""
258	Perform inversion to P(r)
259
260	The problem is solved by posing the problem as Ax = b,
261	where x is the set of coefficients we are looking for.
262
263	Npts is the number of points.
264
265	In the following i refers to the ith base function coefficient.
266	The matrix has its entries j in its first Npts rows set to ::
267
268	A[i][j] = (Fourier transformed base function for point j)
269
270	We them choose a number of r-points, n_r, to evaluate the second
271	derivative of P(r) at. This is used as our regularization term.
272	For a vector r of length n_r, the following n_r rows are set to ::
273
274	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
275
276	The vector b has its first Npts entries set to ::
277
278	b[j] = (I(q) observed for point j)
279
280	The following n_r entries are set to zero.
281
282	The result is found by using scipy.linalg.basic.lstsq to invert
283	the matrix and find the coefficients x.
284
285	:param nfunc: number of base functions to use.
286	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
287	:return: c_out, c_cov - the coefficients with covariance matrix
288	"""
289	# Reset the background value before proceeding
290	self.background = 0.0
291	return self.lstsq(nfunc, nr=nr)
292
293	def iq(self, out, q):
294	"""
295	Function to call to evaluate the scattering intensity
296
297	:param args: c-parameters, and q
298	:return: I(q)
299
300	"""
301	return Cinvertor.iq(self, out, q) + self.background
302
303	def invert_optimize(self, nfunc=10, nr=20):
304	"""
305	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
306
307	This probably produce more reliable results, but is much slower.
308	The minimization function is set to
309	sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
310	where the reg_term is given by Svergun: it is the integral of
311	the square of the first derivative
312	of P(r), d(P(r))/dr, integrated over the full range of r.
313
314	:param nfunc: number of base functions to use.
315	:param nr: number of r points to evaluate the 2nd derivative at
316	for the reg. term.
317
318	:return: c_out, c_cov - the coefficients with covariance matrix
319
320	"""
321	self.nfunc = nfunc
322	# First, check that the current data is valid
323	if self.is_valid() <= 0:
324	msg = "Invertor.invert: Data array are of different length"
325	raise RuntimeError, msg
326
327	p = np.ones(nfunc)
328	t_0 = time.time()
329	out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
330
331	# Compute chi^2
332	res = self.residuals(out)
333	chisqr = 0
334	for i in range(len(res)):
335	chisqr += res[i]
336
337	self.chi2 = chisqr
338
339	# Store computation time
340	self.elapsed = time.time() - t_0
341
342	if cov_x is None:
343	cov_x = np.ones([nfunc, nfunc])
344	cov_x *= math.fabs(chisqr)
345	return out, cov_x
346
347	def pr_fit(self, nfunc=5):
348	"""
349	This is a direct fit to a given P(r). It assumes that the y data
350	is set to some P(r) distribution that we are trying to reproduce
351	with a set of base functions.
352
353	This method is provided as a test.
354	"""
355	# First, check that the current data is valid
356	if self.is_valid() <= 0:
357	msg = "Invertor.invert: Data arrays are of different length"
358	raise RuntimeError, msg
359
360	p = np.ones(nfunc)
361	t_0 = time.time()
362	out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
363
364	# Compute chi^2
365	res = self.pr_residuals(out)
366	chisqr = 0
367	for i in range(len(res)):
368	chisqr += res[i]
369
370	self.chisqr = chisqr
371
372	# Store computation time
373	self.elapsed = time.time() - t_0
374
375	return out, cov_x
376
377	def pr_err(self, c, c_cov, r):
378	"""
379	Returns the value of P(r) for a given r, and base function
380	coefficients, with error.
381
382	:param c: base function coefficients
383	:param c_cov: covariance matrice of the base function coefficients
384	:param r: r-value to evaluate P(r) at
385
386	:return: P(r)
387
388	"""
389	return self.get_pr_err(c, c_cov, r)
390
391	def _accept_q(self, q):
392	"""
393	Check q-value against user-defined range
394	"""
395	if not self.q_min == None and q < self.q_min:
396	return False
397	if not self.q_max == None and q > self.q_max:
398	return False
399	return True
400
401	def lstsq(self, nfunc=5, nr=20):
402	"""
403	The problem is solved by posing the problem as Ax = b,
404	where x is the set of coefficients we are looking for.
405
406	Npts is the number of points.
407
408	In the following i refers to the ith base function coefficient.
409	The matrix has its entries j in its first Npts rows set to ::
410
411	A[i][j] = (Fourier transformed base function for point j)
412
413	We them choose a number of r-points, n_r, to evaluate the second
414	derivative of P(r) at. This is used as our regularization term.
415	For a vector r of length n_r, the following n_r rows are set to ::
416
417	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2,
418	evaluated at r[j])
419
420	The vector b has its first Npts entries set to ::
421
422	b[j] = (I(q) observed for point j)
423
424	The following n_r entries are set to zero.
425
426	The result is found by using scipy.linalg.basic.lstsq to invert
427	the matrix and find the coefficients x.
428
429	:param nfunc: number of base functions to use.
430	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
431
432	If the result does not allow us to compute the covariance matrix,
433	a matrix filled with zeros will be returned.
434
435	"""
436	# Note: To make sure an array is contiguous:
437	# blah = np.ascontiguousarray(blah_original)
438	# ... before passing it to C
439
440	if self.is_valid() < 0:
441	msg = "Invertor: invalid data; incompatible data lengths."
442	raise RuntimeError, msg
443
444	self.nfunc = nfunc
445	# a -- An M x N matrix.
446	# b -- An M x nrhs matrix or M vector.
447	npts = len(self.x)
448	nq = nr
449	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
450	if sqrt_alpha < 0.0:
451	nq = 0
452
453	# If we need to fit the background, add a term
454	if self.has_bck == True:
455	nfunc_0 = nfunc
456	nfunc += 1
457
458	a = np.zeros([npts + nq, nfunc])
459	b = np.zeros(npts + nq)
460	err = np.zeros([nfunc, nfunc])
461
462	# Construct the a matrix and b vector that represent the problem
463	t_0 = time.time()
464	try:
465	self._get_matrix(nfunc, nq, a, b)
466	except:
467	raise RuntimeError, "Invertor: could not invert I(Q)\n %s" % sys.exc_value
468
469	# Perform the inversion (least square fit)
470	c, chi2, _, _ = lstsq(a, b)
471	# Sanity check
472	try:
473	float(chi2)
474	except:
475	chi2 = -1.0
476	self.chi2 = chi2
477
478	inv_cov = np.zeros([nfunc, nfunc])
479	# Get the covariance matrix, defined as inv_cov = a_transposed * a
480	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
481
482	# Compute the reg term size for the output
483	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
484
485	if math.fabs(self.alpha) > 0:
486	new_alpha = sum_sig / (sum_reg / self.alpha)
487	else:
488	new_alpha = 0.0
489	self.suggested_alpha = new_alpha
490
491	try:
492	cov = np.linalg.pinv(inv_cov)
493	err = math.fabs(chi2 / float(npts - nfunc)) * cov
494	except:
495	# We were not able to estimate the errors
496	# Return an empty error matrix
497	logging.error(sys.exc_value)
498
499	# Keep a copy of the last output
500	if self.has_bck == False:
501	self.background = 0
502	self.out = c
503	self.cov = err
504	else:
505	self.background = c[0]
506
507	err_0 = np.zeros([nfunc, nfunc])
508	c_0 = np.zeros(nfunc)
509
510	for i in range(nfunc_0):
511	c_0[i] = c[i + 1]
512	for j in range(nfunc_0):
513	err_0[i][j] = err[i + 1][j + 1]
514
515	self.out = c_0
516	self.cov = err_0
517
518	# Store computation time
519	self.elapsed = time.time() - t_0
520
521	return self.out, self.cov
522
523	def estimate_numterms(self, isquit_func=None):
524	"""
525	Returns a reasonable guess for the
526	number of terms
527
528	:param isquit_func:
529	reference to thread function to call to check whether the computation needs to
530	be stopped.
531
532	:return: number of terms, alpha, message
533
534	"""
535	from num_term import NTermEstimator
536	estimator = NTermEstimator(self.clone())
537	try:
538	return estimator.num_terms(isquit_func)
539	except:
540	# If we fail, estimate alpha and return the default
541	# number of terms
542	best_alpha, _, _ = self.estimate_alpha(self.nfunc)
543	logging.warning("Invertor.estimate_numterms: %s" % sys.exc_value)
544	return self.nfunc, best_alpha, "Could not estimate number of terms"
545
546	def estimate_alpha(self, nfunc):
547	"""
548	Returns a reasonable guess for the
549	regularization constant alpha
550
551	:param nfunc: number of terms to use in the expansion.
552
553	:return: alpha, message, elapsed
554
555	where alpha is the estimate for alpha,
556	message is a message for the user,
557	elapsed is the computation time
558	"""
559	#import time
560	try:
561	pr = self.clone()
562
563	# T_0 for computation time
564	starttime = time.time()
565	elapsed = 0
566
567	# If the current alpha is zero, try
568	# another value
569	if pr.alpha <= 0:
570	pr.alpha = 0.0001
571
572	# Perform inversion to find the largest alpha
573	out, _ = pr.invert(nfunc)
574	elapsed = time.time() - starttime
575	initial_alpha = pr.alpha
576	initial_peaks = pr.get_peaks(out)
577
578	# Try the inversion with the estimated alpha
579	pr.alpha = pr.suggested_alpha
580	out, _ = pr.invert(nfunc)
581
582	npeaks = pr.get_peaks(out)
583	# if more than one peak to start with
584	# just return the estimate
585	if npeaks > 1:
586	#message = "Your P(r) is not smooth,
587	#please check your inversion parameters"
588	message = None
589	return pr.suggested_alpha, message, elapsed
590	else:
591
592	# Look at smaller values
593	# We assume that for the suggested alpha, we have 1 peak
594	# if not, send a message to change parameters
595	alpha = pr.suggested_alpha
596	best_alpha = pr.suggested_alpha
597	found = False
598	for i in range(10):
599	pr.alpha = (0.33) ** (i + 1) * alpha
600	out, _ = pr.invert(nfunc)
601
602	peaks = pr.get_peaks(out)
603	if peaks > 1:
604	found = True
605	break
606	best_alpha = pr.alpha
607
608	# If we didn't find a turning point for alpha and
609	# the initial alpha already had only one peak,
610	# just return that
611	if not found and initial_peaks == 1 and \
612	initial_alpha < best_alpha:
613	best_alpha = initial_alpha
614
615	# Check whether the size makes sense
616	message = ''
617
618	if not found:
619	message = None
620	elif best_alpha >= 0.5 * pr.suggested_alpha:
621	# best alpha is too big, return a
622	# reasonable value
623	message = "The estimated alpha for your system is too "
624	message += "large. "
625	message += "Try increasing your maximum distance."
626
627	return best_alpha, message, elapsed
628
629	except:
630	message = "Invertor.estimate_alpha: %s" % sys.exc_value
631	return 0, message, elapsed
632
633	def to_file(self, path, npts=100):
634	"""
635	Save the state to a file that will be readable
636	by SliceView.
637
638	:param path: path of the file to write
639	:param npts: number of P(r) points to be written
640
641	"""
642	file = open(path, 'w')
643	file.write("#d_max=%g\n" % self.d_max)
644	file.write("#nfunc=%g\n" % self.nfunc)
645	file.write("#alpha=%g\n" % self.alpha)
646	file.write("#chi2=%g\n" % self.chi2)
647	file.write("#elapsed=%g\n" % self.elapsed)
648	file.write("#qmin=%s\n" % str(self.q_min))
649	file.write("#qmax=%s\n" % str(self.q_max))
650	file.write("#slit_height=%g\n" % self.slit_height)
651	file.write("#slit_width=%g\n" % self.slit_width)
652	file.write("#background=%g\n" % self.background)
653	if self.has_bck == True:
654	file.write("#has_bck=1\n")
655	else:
656	file.write("#has_bck=0\n")
657	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
658	if not self.out == None:
659	if len(self.out) == len(self.cov):
660	for i in range(len(self.out)):
661	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
662	str(self.cov[i][i])))
663	file.write("<r> <Pr> <dPr>\n")
664	r = np.arange(0.0, self.d_max, self.d_max / npts)
665
666	for r_i in r:
667	(value, err) = self.pr_err(self.out, self.cov, r_i)
668	file.write("%g %g %g\n" % (r_i, value, err))
669
670	file.close()
671
672	def from_file(self, path):
673	"""
674	Load the state of the Invertor from a file,
675	to be able to generate P(r) from a set of
676	parameters.
677
678	:param path: path of the file to load
679
680	"""
681	#import os
682	#import re
683	if os.path.isfile(path):
684	try:
685	fd = open(path, 'r')
686
687	buff = fd.read()
688	lines = buff.split('\n')
689	for line in lines:
690	if line.startswith('#d_max='):
691	toks = line.split('=')
692	self.d_max = float(toks[1])
693	elif line.startswith('#nfunc='):
694	toks = line.split('=')
695	self.nfunc = int(toks[1])
696	self.out = np.zeros(self.nfunc)
697	self.cov = np.zeros([self.nfunc, self.nfunc])
698	elif line.startswith('#alpha='):
699	toks = line.split('=')
700	self.alpha = float(toks[1])
701	elif line.startswith('#chi2='):
702	toks = line.split('=')
703	self.chi2 = float(toks[1])
704	elif line.startswith('#elapsed='):
705	toks = line.split('=')
706	self.elapsed = float(toks[1])
707	elif line.startswith('#alpha_estimate='):
708	toks = line.split('=')
709	self.suggested_alpha = float(toks[1])
710	elif line.startswith('#qmin='):
711	toks = line.split('=')
712	try:
713	self.q_min = float(toks[1])
714	except:
715	self.q_min = None
716	elif line.startswith('#qmax='):
717	toks = line.split('=')
718	try:
719	self.q_max = float(toks[1])
720	except:
721	self.q_max = None
722	elif line.startswith('#slit_height='):
723	toks = line.split('=')
724	self.slit_height = float(toks[1])
725	elif line.startswith('#slit_width='):
726	toks = line.split('=')
727	self.slit_width = float(toks[1])
728	elif line.startswith('#background='):
729	toks = line.split('=')
730	self.background = float(toks[1])
731	elif line.startswith('#has_bck='):
732	toks = line.split('=')
733	if int(toks[1]) == 1:
734	self.has_bck = True
735	else:
736	self.has_bck = False
737
738	# Now read in the parameters
739	elif line.startswith('#C_'):
740	toks = line.split('=')
741	p = re.compile('#C_([0-9]+)')
742	m = p.search(toks[0])
743	toks2 = toks[1].split('+-')
744	i = int(m.group(1))
745	self.out[i] = float(toks2[0])
746
747	self.cov[i][i] = float(toks2[1])
748
749	except:
750	msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
751	raise RuntimeError, msg
752	else:
753	msg = "Invertor.from_file: '%s' is not a file" % str(path)
754	raise RuntimeError, msg

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/src/sas/sascalc/pr/invertor.py @ 0765c00

Download in other formats: