invertor.py @ b9d74f3

Last change on this file since b9d74f3 was b9d74f3, checked in by andyfaff, 8 years ago
MAINT: use raise Exception() not raise Exception
Property mode set to `100644`
File size: 25.3 KB

Line
1	# pylint: disable=invalid-name
2	"""
3	Module to perform P(r) inversion.
4	The module contains the Invertor class.
5
6	FIXME: The way the Invertor interacts with its C component should be cleaned up
7	"""
8
9	import numpy as np
10	import sys
11	import math
12	import time
13	import copy
14	import os
15	import re
16	import logging
17	from numpy.linalg import lstsq
18	from scipy import optimize
19	from sas.sascalc.pr.core.pr_inversion import Cinvertor
20
21	logger = logging.getLogger(__name__)
22
23	def help():
24	"""
25	Provide general online help text
26	Future work: extend this function to allow topic selection
27	"""
28	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
29	info_txt += "(1980) 13, 168-175.\n\n"
30	info_txt += "P(r) is set to be equal to an expansion of base functions "
31	info_txt += "of the type "
32	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each "
33	info_txt += "base functions "
34	info_txt += "in the expansion is found by performing a least square fit "
35	info_txt += "with the "
36	info_txt += "following fit function:\n\n"
37	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 +"
38	info_txt += "Reg_term\n\n"
39	info_txt += "where I_meas(q) is the measured scattering intensity and "
40	info_txt += "I_th(q) is "
41	info_txt += "the prediction from the Fourier transform of the P(r) "
42	info_txt += "expansion. "
43	info_txt += "The Reg_term term is a regularization term set to the second"
44	info_txt += " derivative "
45	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce "
46	info_txt += "a smooth P(r) output.\n\n"
47	info_txt += "The following are user inputs:\n\n"
48	info_txt += " - Number of terms: the number of base functions in the P(r)"
49	info_txt += " expansion.\n\n"
50	info_txt += " - Regularization constant: a multiplicative constant "
51	info_txt += "to set the size of "
52	info_txt += "the regularization term.\n\n"
53	info_txt += " - Maximum distance: the maximum distance between any "
54	info_txt += "two points in the system.\n"
55
56	return info_txt
57
58
59	class Invertor(Cinvertor):
60	"""
61	Invertor class to perform P(r) inversion
62
63	The problem is solved by posing the problem as Ax = b,
64	where x is the set of coefficients we are looking for.
65
66	Npts is the number of points.
67
68	In the following i refers to the ith base function coefficient.
69	The matrix has its entries j in its first Npts rows set to ::
70
71	A[j][i] = (Fourier transformed base function for point j)
72
73	We them choose a number of r-points, n_r, to evaluate the second
74	derivative of P(r) at. This is used as our regularization term.
75	For a vector r of length n_r, the following n_r rows are set to ::
76
77	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2,
78	evaluated at r[j])
79
80	The vector b has its first Npts entries set to ::
81
82	b[j] = (I(q) observed for point j)
83
84	The following n_r entries are set to zero.
85
86	The result is found by using scipy.linalg.basic.lstsq to invert
87	the matrix and find the coefficients x.
88
89	Methods inherited from Cinvertor:
90
91	* ``get_peaks(pars)``: returns the number of P(r) peaks
92	* ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
93	* ``get_positive(pars)``: returns the fraction of P(r) that is above zero
94	* ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
95	"""
96	## Chisqr of the last computation
97	chi2 = 0
98	## Time elapsed for last computation
99	elapsed = 0
100	## Alpha to get the reg term the same size as the signal
101	suggested_alpha = 0
102	## Last number of base functions used
103	nfunc = 10
104	## Last output values
105	out = None
106	## Last errors on output values
107	cov = None
108	## Background value
109	background = 0
110	## Information dictionary for application use
111	info = {}
112
113	def __init__(self):
114	Cinvertor.__init__(self)
115
116	def __setstate__(self, state):
117	"""
118	restore the state of invertor for pickle
119	"""
120	(self.__dict__, self.alpha, self.d_max,
121	self.q_min, self.q_max,
122	self.x, self.y,
123	self.err, self.has_bck,
124	self.slit_height, self.slit_width) = state
125
126	def __reduce_ex__(self, proto):
127	"""
128	Overwrite the __reduce_ex__
129	"""
130
131	state = (self.__dict__,
132	self.alpha, self.d_max,
133	self.q_min, self.q_max,
134	self.x, self.y,
135	self.err, self.has_bck,
136	self.slit_height, self.slit_width,
137	)
138	return (Invertor, tuple(), state, None, None)
139
140	def __setattr__(self, name, value):
141	"""
142	Set the value of an attribute.
143	Access the parent class methods for
144	x, y, err, d_max, q_min, q_max and alpha
145	"""
146	if name == 'x':
147	if 0.0 in value:
148	msg = "Invertor: one of your q-values is zero. "
149	msg += "Delete that entry before proceeding"
150	raise ValueError(msg)
151	return self.set_x(value)
152	elif name == 'y':
153	return self.set_y(value)
154	elif name == 'err':
155	value2 = abs(value)
156	return self.set_err(value2)
157	elif name == 'd_max':
158	if value <= 0.0:
159	msg = "Invertor: d_max must be greater than zero."
160	msg += "Correct that entry before proceeding"
161	raise ValueError(msg)
162	return self.set_dmax(value)
163	elif name == 'q_min':
164	if value is None:
165	return self.set_qmin(-1.0)
166	return self.set_qmin(value)
167	elif name == 'q_max':
168	if value is None:
169	return self.set_qmax(-1.0)
170	return self.set_qmax(value)
171	elif name == 'alpha':
172	return self.set_alpha(value)
173	elif name == 'slit_height':
174	return self.set_slit_height(value)
175	elif name == 'slit_width':
176	return self.set_slit_width(value)
177	elif name == 'has_bck':
178	if value == True:
179	return self.set_has_bck(1)
180	elif value == False:
181	return self.set_has_bck(0)
182	else:
183	raise ValueError("Invertor: has_bck can only be True or False")
184
185	return Cinvertor.__setattr__(self, name, value)
186
187	def __getattr__(self, name):
188	"""
189	Return the value of an attribute
190	"""
191	#import numpy
192	if name == 'x':
193	out = np.ones(self.get_nx())
194	self.get_x(out)
195	return out
196	elif name == 'y':
197	out = np.ones(self.get_ny())
198	self.get_y(out)
199	return out
200	elif name == 'err':
201	out = np.ones(self.get_nerr())
202	self.get_err(out)
203	return out
204	elif name == 'd_max':
205	return self.get_dmax()
206	elif name == 'q_min':
207	qmin = self.get_qmin()
208	if qmin < 0:
209	return None
210	return qmin
211	elif name == 'q_max':
212	qmax = self.get_qmax()
213	if qmax < 0:
214	return None
215	return qmax
216	elif name == 'alpha':
217	return self.get_alpha()
218	elif name == 'slit_height':
219	return self.get_slit_height()
220	elif name == 'slit_width':
221	return self.get_slit_width()
222	elif name == 'has_bck':
223	value = self.get_has_bck()
224	if value == 1:
225	return True
226	else:
227	return False
228	elif name in self.__dict__:
229	return self.__dict__[name]
230	return None
231
232	def clone(self):
233	"""
234	Return a clone of this instance
235	"""
236	#import copy
237
238	invertor = Invertor()
239	invertor.chi2 = self.chi2
240	invertor.elapsed = self.elapsed
241	invertor.nfunc = self.nfunc
242	invertor.alpha = self.alpha
243	invertor.d_max = self.d_max
244	invertor.q_min = self.q_min
245	invertor.q_max = self.q_max
246
247	invertor.x = self.x
248	invertor.y = self.y
249	invertor.err = self.err
250	invertor.has_bck = self.has_bck
251	invertor.slit_height = self.slit_height
252	invertor.slit_width = self.slit_width
253
254	invertor.info = copy.deepcopy(self.info)
255
256	return invertor
257
258	def invert(self, nfunc=10, nr=20):
259	"""
260	Perform inversion to P(r)
261
262	The problem is solved by posing the problem as Ax = b,
263	where x is the set of coefficients we are looking for.
264
265	Npts is the number of points.
266
267	In the following i refers to the ith base function coefficient.
268	The matrix has its entries j in its first Npts rows set to ::
269
270	A[i][j] = (Fourier transformed base function for point j)
271
272	We them choose a number of r-points, n_r, to evaluate the second
273	derivative of P(r) at. This is used as our regularization term.
274	For a vector r of length n_r, the following n_r rows are set to ::
275
276	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
277
278	The vector b has its first Npts entries set to ::
279
280	b[j] = (I(q) observed for point j)
281
282	The following n_r entries are set to zero.
283
284	The result is found by using scipy.linalg.basic.lstsq to invert
285	the matrix and find the coefficients x.
286
287	:param nfunc: number of base functions to use.
288	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
289	:return: c_out, c_cov - the coefficients with covariance matrix
290	"""
291	# Reset the background value before proceeding
292	self.background = 0.0
293	return self.lstsq(nfunc, nr=nr)
294
295	def iq(self, out, q):
296	"""
297	Function to call to evaluate the scattering intensity
298
299	:param args: c-parameters, and q
300	:return: I(q)
301
302	"""
303	return Cinvertor.iq(self, out, q) + self.background
304
305	def invert_optimize(self, nfunc=10, nr=20):
306	"""
307	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
308
309	This probably produce more reliable results, but is much slower.
310	The minimization function is set to
311	sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
312	where the reg_term is given by Svergun: it is the integral of
313	the square of the first derivative
314	of P(r), d(P(r))/dr, integrated over the full range of r.
315
316	:param nfunc: number of base functions to use.
317	:param nr: number of r points to evaluate the 2nd derivative at
318	for the reg. term.
319
320	:return: c_out, c_cov - the coefficients with covariance matrix
321
322	"""
323	self.nfunc = nfunc
324	# First, check that the current data is valid
325	if self.is_valid() <= 0:
326	msg = "Invertor.invert: Data array are of different length"
327	raise RuntimeError(msg)
328
329	p = np.ones(nfunc)
330	t_0 = time.time()
331	out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
332
333	# Compute chi^2
334	res = self.residuals(out)
335	chisqr = 0
336	for i in range(len(res)):
337	chisqr += res[i]
338
339	self.chi2 = chisqr
340
341	# Store computation time
342	self.elapsed = time.time() - t_0
343
344	if cov_x is None:
345	cov_x = np.ones([nfunc, nfunc])
346	cov_x *= math.fabs(chisqr)
347	return out, cov_x
348
349	def pr_fit(self, nfunc=5):
350	"""
351	This is a direct fit to a given P(r). It assumes that the y data
352	is set to some P(r) distribution that we are trying to reproduce
353	with a set of base functions.
354
355	This method is provided as a test.
356	"""
357	# First, check that the current data is valid
358	if self.is_valid() <= 0:
359	msg = "Invertor.invert: Data arrays are of different length"
360	raise RuntimeError(msg)
361
362	p = np.ones(nfunc)
363	t_0 = time.time()
364	out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
365
366	# Compute chi^2
367	res = self.pr_residuals(out)
368	chisqr = 0
369	for i in range(len(res)):
370	chisqr += res[i]
371
372	self.chisqr = chisqr
373
374	# Store computation time
375	self.elapsed = time.time() - t_0
376
377	return out, cov_x
378
379	def pr_err(self, c, c_cov, r):
380	"""
381	Returns the value of P(r) for a given r, and base function
382	coefficients, with error.
383
384	:param c: base function coefficients
385	:param c_cov: covariance matrice of the base function coefficients
386	:param r: r-value to evaluate P(r) at
387
388	:return: P(r)
389
390	"""
391	return self.get_pr_err(c, c_cov, r)
392
393	def _accept_q(self, q):
394	"""
395	Check q-value against user-defined range
396	"""
397	if self.q_min is not None and q < self.q_min:
398	return False
399	if self.q_max is not None and q > self.q_max:
400	return False
401	return True
402
403	def lstsq(self, nfunc=5, nr=20):
404	"""
405	The problem is solved by posing the problem as Ax = b,
406	where x is the set of coefficients we are looking for.
407
408	Npts is the number of points.
409
410	In the following i refers to the ith base function coefficient.
411	The matrix has its entries j in its first Npts rows set to ::
412
413	A[i][j] = (Fourier transformed base function for point j)
414
415	We them choose a number of r-points, n_r, to evaluate the second
416	derivative of P(r) at. This is used as our regularization term.
417	For a vector r of length n_r, the following n_r rows are set to ::
418
419	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2,
420	evaluated at r[j])
421
422	The vector b has its first Npts entries set to ::
423
424	b[j] = (I(q) observed for point j)
425
426	The following n_r entries are set to zero.
427
428	The result is found by using scipy.linalg.basic.lstsq to invert
429	the matrix and find the coefficients x.
430
431	:param nfunc: number of base functions to use.
432	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
433
434	If the result does not allow us to compute the covariance matrix,
435	a matrix filled with zeros will be returned.
436
437	"""
438	# Note: To make sure an array is contiguous:
439	# blah = np.ascontiguousarray(blah_original)
440	# ... before passing it to C
441
442	if self.is_valid() < 0:
443	msg = "Invertor: invalid data; incompatible data lengths."
444	raise RuntimeError(msg)
445
446	self.nfunc = nfunc
447	# a -- An M x N matrix.
448	# b -- An M x nrhs matrix or M vector.
449	npts = len(self.x)
450	nq = nr
451	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
452	if sqrt_alpha < 0.0:
453	nq = 0
454
455	# If we need to fit the background, add a term
456	if self.has_bck == True:
457	nfunc_0 = nfunc
458	nfunc += 1
459
460	a = np.zeros([npts + nq, nfunc])
461	b = np.zeros(npts + nq)
462	err = np.zeros([nfunc, nfunc])
463
464	# Construct the a matrix and b vector that represent the problem
465	t_0 = time.time()
466	try:
467	self._get_matrix(nfunc, nq, a, b)
468	except:
469	raise RuntimeError("Invertor: could not invert I(Q)\n %s" % sys.exc_value)
470
471	# Perform the inversion (least square fit)
472	c, chi2, _, _ = lstsq(a, b)
473	# Sanity check
474	try:
475	float(chi2)
476	except:
477	chi2 = -1.0
478	self.chi2 = chi2
479
480	inv_cov = np.zeros([nfunc, nfunc])
481	# Get the covariance matrix, defined as inv_cov = a_transposed * a
482	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
483
484	# Compute the reg term size for the output
485	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
486
487	if math.fabs(self.alpha) > 0:
488	new_alpha = sum_sig / (sum_reg / self.alpha)
489	else:
490	new_alpha = 0.0
491	self.suggested_alpha = new_alpha
492
493	try:
494	cov = np.linalg.pinv(inv_cov)
495	err = math.fabs(chi2 / float(npts - nfunc)) * cov
496	except:
497	# We were not able to estimate the errors
498	# Return an empty error matrix
499	logger.error(sys.exc_value)
500
501	# Keep a copy of the last output
502	if self.has_bck == False:
503	self.background = 0
504	self.out = c
505	self.cov = err
506	else:
507	self.background = c[0]
508
509	err_0 = np.zeros([nfunc, nfunc])
510	c_0 = np.zeros(nfunc)
511
512	for i in range(nfunc_0):
513	c_0[i] = c[i + 1]
514	for j in range(nfunc_0):
515	err_0[i][j] = err[i + 1][j + 1]
516
517	self.out = c_0
518	self.cov = err_0
519
520	# Store computation time
521	self.elapsed = time.time() - t_0
522
523	return self.out, self.cov
524
525	def estimate_numterms(self, isquit_func=None):
526	"""
527	Returns a reasonable guess for the
528	number of terms
529
530	:param isquit_func:
531	reference to thread function to call to check whether the computation needs to
532	be stopped.
533
534	:return: number of terms, alpha, message
535
536	"""
537	from num_term import NTermEstimator
538	estimator = NTermEstimator(self.clone())
539	try:
540	return estimator.num_terms(isquit_func)
541	except:
542	# If we fail, estimate alpha and return the default
543	# number of terms
544	best_alpha, _, _ = self.estimate_alpha(self.nfunc)
545	logger.warning("Invertor.estimate_numterms: %s" % sys.exc_value)
546	return self.nfunc, best_alpha, "Could not estimate number of terms"
547
548	def estimate_alpha(self, nfunc):
549	"""
550	Returns a reasonable guess for the
551	regularization constant alpha
552
553	:param nfunc: number of terms to use in the expansion.
554
555	:return: alpha, message, elapsed
556
557	where alpha is the estimate for alpha,
558	message is a message for the user,
559	elapsed is the computation time
560	"""
561	#import time
562	try:
563	pr = self.clone()
564
565	# T_0 for computation time
566	starttime = time.time()
567	elapsed = 0
568
569	# If the current alpha is zero, try
570	# another value
571	if pr.alpha <= 0:
572	pr.alpha = 0.0001
573
574	# Perform inversion to find the largest alpha
575	out, _ = pr.invert(nfunc)
576	elapsed = time.time() - starttime
577	initial_alpha = pr.alpha
578	initial_peaks = pr.get_peaks(out)
579
580	# Try the inversion with the estimated alpha
581	pr.alpha = pr.suggested_alpha
582	out, _ = pr.invert(nfunc)
583
584	npeaks = pr.get_peaks(out)
585	# if more than one peak to start with
586	# just return the estimate
587	if npeaks > 1:
588	#message = "Your P(r) is not smooth,
589	#please check your inversion parameters"
590	message = None
591	return pr.suggested_alpha, message, elapsed
592	else:
593
594	# Look at smaller values
595	# We assume that for the suggested alpha, we have 1 peak
596	# if not, send a message to change parameters
597	alpha = pr.suggested_alpha
598	best_alpha = pr.suggested_alpha
599	found = False
600	for i in range(10):
601	pr.alpha = (0.33) ** (i + 1) * alpha
602	out, _ = pr.invert(nfunc)
603
604	peaks = pr.get_peaks(out)
605	if peaks > 1:
606	found = True
607	break
608	best_alpha = pr.alpha
609
610	# If we didn't find a turning point for alpha and
611	# the initial alpha already had only one peak,
612	# just return that
613	if not found and initial_peaks == 1 and \
614	initial_alpha < best_alpha:
615	best_alpha = initial_alpha
616
617	# Check whether the size makes sense
618	message = ''
619
620	if not found:
621	message = None
622	elif best_alpha >= 0.5 * pr.suggested_alpha:
623	# best alpha is too big, return a
624	# reasonable value
625	message = "The estimated alpha for your system is too "
626	message += "large. "
627	message += "Try increasing your maximum distance."
628
629	return best_alpha, message, elapsed
630
631	except:
632	message = "Invertor.estimate_alpha: %s" % sys.exc_value
633	return 0, message, elapsed
634
635	def to_file(self, path, npts=100):
636	"""
637	Save the state to a file that will be readable
638	by SliceView.
639
640	:param path: path of the file to write
641	:param npts: number of P(r) points to be written
642
643	"""
644	file = open(path, 'w')
645	file.write("#d_max=%g\n" % self.d_max)
646	file.write("#nfunc=%g\n" % self.nfunc)
647	file.write("#alpha=%g\n" % self.alpha)
648	file.write("#chi2=%g\n" % self.chi2)
649	file.write("#elapsed=%g\n" % self.elapsed)
650	file.write("#qmin=%s\n" % str(self.q_min))
651	file.write("#qmax=%s\n" % str(self.q_max))
652	file.write("#slit_height=%g\n" % self.slit_height)
653	file.write("#slit_width=%g\n" % self.slit_width)
654	file.write("#background=%g\n" % self.background)
655	if self.has_bck == True:
656	file.write("#has_bck=1\n")
657	else:
658	file.write("#has_bck=0\n")
659	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
660	if self.out is not None:
661	if len(self.out) == len(self.cov):
662	for i in range(len(self.out)):
663	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
664	str(self.cov[i][i])))
665	file.write("<r> <Pr> <dPr>\n")
666	r = np.arange(0.0, self.d_max, self.d_max / npts)
667
668	for r_i in r:
669	(value, err) = self.pr_err(self.out, self.cov, r_i)
670	file.write("%g %g %g\n" % (r_i, value, err))
671
672	file.close()
673
674	def from_file(self, path):
675	"""
676	Load the state of the Invertor from a file,
677	to be able to generate P(r) from a set of
678	parameters.
679
680	:param path: path of the file to load
681
682	"""
683	#import os
684	#import re
685	if os.path.isfile(path):
686	try:
687	fd = open(path, 'r')
688
689	buff = fd.read()
690	lines = buff.split('\n')
691	for line in lines:
692	if line.startswith('#d_max='):
693	toks = line.split('=')
694	self.d_max = float(toks[1])
695	elif line.startswith('#nfunc='):
696	toks = line.split('=')
697	self.nfunc = int(toks[1])
698	self.out = np.zeros(self.nfunc)
699	self.cov = np.zeros([self.nfunc, self.nfunc])
700	elif line.startswith('#alpha='):
701	toks = line.split('=')
702	self.alpha = float(toks[1])
703	elif line.startswith('#chi2='):
704	toks = line.split('=')
705	self.chi2 = float(toks[1])
706	elif line.startswith('#elapsed='):
707	toks = line.split('=')
708	self.elapsed = float(toks[1])
709	elif line.startswith('#alpha_estimate='):
710	toks = line.split('=')
711	self.suggested_alpha = float(toks[1])
712	elif line.startswith('#qmin='):
713	toks = line.split('=')
714	try:
715	self.q_min = float(toks[1])
716	except:
717	self.q_min = None
718	elif line.startswith('#qmax='):
719	toks = line.split('=')
720	try:
721	self.q_max = float(toks[1])
722	except:
723	self.q_max = None
724	elif line.startswith('#slit_height='):
725	toks = line.split('=')
726	self.slit_height = float(toks[1])
727	elif line.startswith('#slit_width='):
728	toks = line.split('=')
729	self.slit_width = float(toks[1])
730	elif line.startswith('#background='):
731	toks = line.split('=')
732	self.background = float(toks[1])
733	elif line.startswith('#has_bck='):
734	toks = line.split('=')
735	if int(toks[1]) == 1:
736	self.has_bck = True
737	else:
738	self.has_bck = False
739
740	# Now read in the parameters
741	elif line.startswith('#C_'):
742	toks = line.split('=')
743	p = re.compile('#C_([0-9]+)')
744	m = p.search(toks[0])
745	toks2 = toks[1].split('+-')
746	i = int(m.group(1))
747	self.out[i] = float(toks2[0])
748
749	self.cov[i][i] = float(toks2[1])
750
751	except:
752	msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
753	raise RuntimeError(msg)
754	else:
755	msg = "Invertor.from_file: '%s' is not a file" % str(path)
756	raise RuntimeError(msg)

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/src/sas/sascalc/pr/invertor.py @ b9d74f3

Download in other formats: