invertor.py @ afb311e

Last change on this file since afb311e was 2469df7, checked in by Paul Kienzle <pkienzle@…>, 7 years ago
lint: update 'if x==True/False?' to 'if x/not x:'
Property mode set to `100644`
File size: 25.3 KB

Line
1	# pylint: disable=invalid-name
2	"""
3	Module to perform P(r) inversion.
4	The module contains the Invertor class.
5
6	FIXME: The way the Invertor interacts with its C component should be cleaned up
7	"""
8
9	import numpy as np
10	import sys
11	import math
12	import time
13	import copy
14	import os
15	import re
16	import logging
17	from numpy.linalg import lstsq
18	from scipy import optimize
19	from sas.sascalc.pr.core.pr_inversion import Cinvertor
20
21	logger = logging.getLogger(__name__)
22
23	def help():
24	"""
25	Provide general online help text
26	Future work: extend this function to allow topic selection
27	"""
28	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
29	info_txt += "(1980) 13, 168-175.\n\n"
30	info_txt += "P(r) is set to be equal to an expansion of base functions "
31	info_txt += "of the type "
32	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each "
33	info_txt += "base functions "
34	info_txt += "in the expansion is found by performing a least square fit "
35	info_txt += "with the "
36	info_txt += "following fit function:\n\n"
37	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 +"
38	info_txt += "Reg_term\n\n"
39	info_txt += "where I_meas(q) is the measured scattering intensity and "
40	info_txt += "I_th(q) is "
41	info_txt += "the prediction from the Fourier transform of the P(r) "
42	info_txt += "expansion. "
43	info_txt += "The Reg_term term is a regularization term set to the second"
44	info_txt += " derivative "
45	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce "
46	info_txt += "a smooth P(r) output.\n\n"
47	info_txt += "The following are user inputs:\n\n"
48	info_txt += " - Number of terms: the number of base functions in the P(r)"
49	info_txt += " expansion.\n\n"
50	info_txt += " - Regularization constant: a multiplicative constant "
51	info_txt += "to set the size of "
52	info_txt += "the regularization term.\n\n"
53	info_txt += " - Maximum distance: the maximum distance between any "
54	info_txt += "two points in the system.\n"
55
56	return info_txt
57
58
59	class Invertor(Cinvertor):
60	"""
61	Invertor class to perform P(r) inversion
62
63	The problem is solved by posing the problem as Ax = b,
64	where x is the set of coefficients we are looking for.
65
66	Npts is the number of points.
67
68	In the following i refers to the ith base function coefficient.
69	The matrix has its entries j in its first Npts rows set to ::
70
71	A[j][i] = (Fourier transformed base function for point j)
72
73	We them choose a number of r-points, n_r, to evaluate the second
74	derivative of P(r) at. This is used as our regularization term.
75	For a vector r of length n_r, the following n_r rows are set to ::
76
77	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2,
78	evaluated at r[j])
79
80	The vector b has its first Npts entries set to ::
81
82	b[j] = (I(q) observed for point j)
83
84	The following n_r entries are set to zero.
85
86	The result is found by using scipy.linalg.basic.lstsq to invert
87	the matrix and find the coefficients x.
88
89	Methods inherited from Cinvertor:
90
91	* ``get_peaks(pars)``: returns the number of P(r) peaks
92	* ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
93	* ``get_positive(pars)``: returns the fraction of P(r) that is above zero
94	* ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
95	"""
96	## Chisqr of the last computation
97	chi2 = 0
98	## Time elapsed for last computation
99	elapsed = 0
100	## Alpha to get the reg term the same size as the signal
101	suggested_alpha = 0
102	## Last number of base functions used
103	nfunc = 10
104	## Last output values
105	out = None
106	## Last errors on output values
107	cov = None
108	## Background value
109	background = 0
110	## Information dictionary for application use
111	info = {}
112
113	def __init__(self):
114	Cinvertor.__init__(self)
115
116	def __setstate__(self, state):
117	"""
118	restore the state of invertor for pickle
119	"""
120	(self.__dict__, self.alpha, self.d_max,
121	self.q_min, self.q_max,
122	self.x, self.y,
123	self.err, self.est_bck,
124	self.slit_height, self.slit_width) = state
125
126	def __reduce_ex__(self, proto):
127	"""
128	Overwrite the __reduce_ex__
129	"""
130
131	state = (self.__dict__,
132	self.alpha, self.d_max,
133	self.q_min, self.q_max,
134	self.x, self.y,
135	self.err, self.est_bck,
136	self.slit_height, self.slit_width,
137	)
138	return (Invertor, tuple(), state, None, None)
139
140	def __setattr__(self, name, value):
141	"""
142	Set the value of an attribute.
143	Access the parent class methods for
144	x, y, err, d_max, q_min, q_max and alpha
145	"""
146	if name == 'x':
147	if 0.0 in value:
148	msg = "Invertor: one of your q-values is zero. "
149	msg += "Delete that entry before proceeding"
150	raise ValueError(msg)
151	return self.set_x(value)
152	elif name == 'y':
153	return self.set_y(value)
154	elif name == 'err':
155	value2 = abs(value)
156	return self.set_err(value2)
157	elif name == 'd_max':
158	if value <= 0.0:
159	msg = "Invertor: d_max must be greater than zero."
160	msg += "Correct that entry before proceeding"
161	raise ValueError(msg)
162	return self.set_dmax(value)
163	elif name == 'q_min':
164	if value is None:
165	return self.set_qmin(-1.0)
166	return self.set_qmin(value)
167	elif name == 'q_max':
168	if value is None:
169	return self.set_qmax(-1.0)
170	return self.set_qmax(value)
171	elif name == 'alpha':
172	return self.set_alpha(value)
173	elif name == 'slit_height':
174	return self.set_slit_height(value)
175	elif name == 'slit_width':
176	return self.set_slit_width(value)
177	elif name == 'est_bck':
178	if value == True:
179	return self.set_est_bck(1)
180	elif value == False:
181	return self.set_est_bck(0)
182	else:
183	raise ValueError("Invertor: est_bck can only be True or False")
184
185	return Cinvertor.__setattr__(self, name, value)
186
187	def __getattr__(self, name):
188	"""
189	Return the value of an attribute
190	"""
191	#import numpy
192	if name == 'x':
193	out = np.ones(self.get_nx())
194	self.get_x(out)
195	return out
196	elif name == 'y':
197	out = np.ones(self.get_ny())
198	self.get_y(out)
199	return out
200	elif name == 'err':
201	out = np.ones(self.get_nerr())
202	self.get_err(out)
203	return out
204	elif name == 'd_max':
205	return self.get_dmax()
206	elif name == 'q_min':
207	qmin = self.get_qmin()
208	if qmin < 0:
209	return None
210	return qmin
211	elif name == 'q_max':
212	qmax = self.get_qmax()
213	if qmax < 0:
214	return None
215	return qmax
216	elif name == 'alpha':
217	return self.get_alpha()
218	elif name == 'slit_height':
219	return self.get_slit_height()
220	elif name == 'slit_width':
221	return self.get_slit_width()
222	elif name == 'est_bck':
223	value = self.get_est_bck()
224	return value == 1
225	elif name in self.__dict__:
226	return self.__dict__[name]
227	return None
228
229	def clone(self):
230	"""
231	Return a clone of this instance
232	"""
233	#import copy
234
235	invertor = Invertor()
236	invertor.chi2 = self.chi2
237	invertor.elapsed = self.elapsed
238	invertor.nfunc = self.nfunc
239	invertor.alpha = self.alpha
240	invertor.d_max = self.d_max
241	invertor.q_min = self.q_min
242	invertor.q_max = self.q_max
243
244	invertor.x = self.x
245	invertor.y = self.y
246	invertor.err = self.err
247	invertor.est_bck = self.est_bck
248	invertor.background = self.background
249	invertor.slit_height = self.slit_height
250	invertor.slit_width = self.slit_width
251
252	invertor.info = copy.deepcopy(self.info)
253
254	return invertor
255
256	def invert(self, nfunc=10, nr=20):
257	"""
258	Perform inversion to P(r)
259
260	The problem is solved by posing the problem as Ax = b,
261	where x is the set of coefficients we are looking for.
262
263	Npts is the number of points.
264
265	In the following i refers to the ith base function coefficient.
266	The matrix has its entries j in its first Npts rows set to ::
267
268	A[i][j] = (Fourier transformed base function for point j)
269
270	We them choose a number of r-points, n_r, to evaluate the second
271	derivative of P(r) at. This is used as our regularization term.
272	For a vector r of length n_r, the following n_r rows are set to ::
273
274	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
275
276	The vector b has its first Npts entries set to ::
277
278	b[j] = (I(q) observed for point j)
279
280	The following n_r entries are set to zero.
281
282	The result is found by using scipy.linalg.basic.lstsq to invert
283	the matrix and find the coefficients x.
284
285	:param nfunc: number of base functions to use.
286	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
287	:return: c_out, c_cov - the coefficients with covariance matrix
288	"""
289	# Reset the background value before proceeding
290	# self.background = 0.0
291	if not self.est_bck:
292	self.y -= self.background
293	out, cov = self.lstsq(nfunc, nr=nr)
294	if not self.est_bck:
295	self.y += self.background
296	return out, cov
297
298	def iq(self, out, q):
299	"""
300	Function to call to evaluate the scattering intensity
301
302	:param args: c-parameters, and q
303	:return: I(q)
304
305	"""
306	return Cinvertor.iq(self, out, q) + self.background
307
308	def invert_optimize(self, nfunc=10, nr=20):
309	"""
310	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
311
312	This probably produce more reliable results, but is much slower.
313	The minimization function is set to
314	sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
315	where the reg_term is given by Svergun: it is the integral of
316	the square of the first derivative
317	of P(r), d(P(r))/dr, integrated over the full range of r.
318
319	:param nfunc: number of base functions to use.
320	:param nr: number of r points to evaluate the 2nd derivative at
321	for the reg. term.
322
323	:return: c_out, c_cov - the coefficients with covariance matrix
324
325	"""
326	self.nfunc = nfunc
327	# First, check that the current data is valid
328	if self.is_valid() <= 0:
329	msg = "Invertor.invert: Data array are of different length"
330	raise RuntimeError(msg)
331
332	p = np.ones(nfunc)
333	t_0 = time.time()
334	out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
335
336	# Compute chi^2
337	res = self.residuals(out)
338	chisqr = 0
339	for i in range(len(res)):
340	chisqr += res[i]
341
342	self.chi2 = chisqr
343
344	# Store computation time
345	self.elapsed = time.time() - t_0
346
347	if cov_x is None:
348	cov_x = np.ones([nfunc, nfunc])
349	cov_x *= math.fabs(chisqr)
350	return out, cov_x
351
352	def pr_fit(self, nfunc=5):
353	"""
354	This is a direct fit to a given P(r). It assumes that the y data
355	is set to some P(r) distribution that we are trying to reproduce
356	with a set of base functions.
357
358	This method is provided as a test.
359	"""
360	# First, check that the current data is valid
361	if self.is_valid() <= 0:
362	msg = "Invertor.invert: Data arrays are of different length"
363	raise RuntimeError(msg)
364
365	p = np.ones(nfunc)
366	t_0 = time.time()
367	out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
368
369	# Compute chi^2
370	res = self.pr_residuals(out)
371	chisqr = 0
372	for i in range(len(res)):
373	chisqr += res[i]
374
375	self.chisqr = chisqr
376
377	# Store computation time
378	self.elapsed = time.time() - t_0
379
380	return out, cov_x
381
382	def pr_err(self, c, c_cov, r):
383	"""
384	Returns the value of P(r) for a given r, and base function
385	coefficients, with error.
386
387	:param c: base function coefficients
388	:param c_cov: covariance matrice of the base function coefficients
389	:param r: r-value to evaluate P(r) at
390
391	:return: P(r)
392
393	"""
394	return self.get_pr_err(c, c_cov, r)
395
396	def _accept_q(self, q):
397	"""
398	Check q-value against user-defined range
399	"""
400	if self.q_min is not None and q < self.q_min:
401	return False
402	if self.q_max is not None and q > self.q_max:
403	return False
404	return True
405
406	def lstsq(self, nfunc=5, nr=20):
407	"""
408	The problem is solved by posing the problem as Ax = b,
409	where x is the set of coefficients we are looking for.
410
411	Npts is the number of points.
412
413	In the following i refers to the ith base function coefficient.
414	The matrix has its entries j in its first Npts rows set to ::
415
416	A[i][j] = (Fourier transformed base function for point j)
417
418	We them choose a number of r-points, n_r, to evaluate the second
419	derivative of P(r) at. This is used as our regularization term.
420	For a vector r of length n_r, the following n_r rows are set to ::
421
422	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2,
423	evaluated at r[j])
424
425	The vector b has its first Npts entries set to ::
426
427	b[j] = (I(q) observed for point j)
428
429	The following n_r entries are set to zero.
430
431	The result is found by using scipy.linalg.basic.lstsq to invert
432	the matrix and find the coefficients x.
433
434	:param nfunc: number of base functions to use.
435	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
436
437	If the result does not allow us to compute the covariance matrix,
438	a matrix filled with zeros will be returned.
439
440	"""
441	# Note: To make sure an array is contiguous:
442	# blah = np.ascontiguousarray(blah_original)
443	# ... before passing it to C
444
445	if self.is_valid() < 0:
446	msg = "Invertor: invalid data; incompatible data lengths."
447	raise RuntimeError(msg)
448
449	self.nfunc = nfunc
450	# a -- An M x N matrix.
451	# b -- An M x nrhs matrix or M vector.
452	npts = len(self.x)
453	nq = nr
454	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
455	if sqrt_alpha < 0.0:
456	nq = 0
457
458	# If we need to fit the background, add a term
459	if self.est_bck:
460	nfunc_0 = nfunc
461	nfunc += 1
462
463	a = np.zeros([npts + nq, nfunc])
464	b = np.zeros(npts + nq)
465	err = np.zeros([nfunc, nfunc])
466
467	# Construct the a matrix and b vector that represent the problem
468	t_0 = time.time()
469	try:
470	self._get_matrix(nfunc, nq, a, b)
471	except Exception as exc:
472	raise RuntimeError("Invertor: could not invert I(Q)\n %s" % str(exc))
473
474	# Perform the inversion (least square fit)
475	c, chi2, _, _ = lstsq(a, b)
476	# Sanity check
477	try:
478	float(chi2)
479	except:
480	chi2 = -1.0
481	self.chi2 = chi2
482
483	inv_cov = np.zeros([nfunc, nfunc])
484	# Get the covariance matrix, defined as inv_cov = a_transposed * a
485	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
486
487	# Compute the reg term size for the output
488	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
489
490	if math.fabs(self.alpha) > 0:
491	new_alpha = sum_sig / (sum_reg / self.alpha)
492	else:
493	new_alpha = 0.0
494	self.suggested_alpha = new_alpha
495
496	try:
497	cov = np.linalg.pinv(inv_cov)
498	err = math.fabs(chi2 / float(npts - nfunc)) * cov
499	except:
500	# We were not able to estimate the errors
501	# Return an empty error matrix
502	logger.error(sys.exc_value)
503
504	# Keep a copy of the last output
505	if not self.est_bck:
506	self.out = c
507	self.cov = err
508	else:
509	self.background = c[0]
510
511	err_0 = np.zeros([nfunc, nfunc])
512	c_0 = np.zeros(nfunc)
513
514	for i in range(nfunc_0):
515	c_0[i] = c[i + 1]
516	for j in range(nfunc_0):
517	err_0[i][j] = err[i + 1][j + 1]
518
519	self.out = c_0
520	self.cov = err_0
521
522	# Store computation time
523	self.elapsed = time.time() - t_0
524
525	return self.out, self.cov
526
527	def estimate_numterms(self, isquit_func=None):
528	"""
529	Returns a reasonable guess for the
530	number of terms
531
532	:param isquit_func:
533	reference to thread function to call to check whether the computation needs to
534	be stopped.
535
536	:return: number of terms, alpha, message
537
538	"""
539	from num_term import NTermEstimator
540	estimator = NTermEstimator(self.clone())
541	try:
542	return estimator.num_terms(isquit_func)
543	except:
544	# If we fail, estimate alpha and return the default
545	# number of terms
546	best_alpha, _, _ = self.estimate_alpha(self.nfunc)
547	logger.warning("Invertor.estimate_numterms: %s" % sys.exc_value)
548	return self.nfunc, best_alpha, "Could not estimate number of terms"
549
550	def estimate_alpha(self, nfunc):
551	"""
552	Returns a reasonable guess for the
553	regularization constant alpha
554
555	:param nfunc: number of terms to use in the expansion.
556
557	:return: alpha, message, elapsed
558
559	where alpha is the estimate for alpha,
560	message is a message for the user,
561	elapsed is the computation time
562	"""
563	#import time
564	try:
565	pr = self.clone()
566
567	# T_0 for computation time
568	starttime = time.time()
569	elapsed = 0
570
571	# If the current alpha is zero, try
572	# another value
573	if pr.alpha <= 0:
574	pr.alpha = 0.0001
575
576	# Perform inversion to find the largest alpha
577	out, _ = pr.invert(nfunc)
578	elapsed = time.time() - starttime
579	initial_alpha = pr.alpha
580	initial_peaks = pr.get_peaks(out)
581
582	# Try the inversion with the estimated alpha
583	pr.alpha = pr.suggested_alpha
584	out, _ = pr.invert(nfunc)
585
586	npeaks = pr.get_peaks(out)
587	# if more than one peak to start with
588	# just return the estimate
589	if npeaks > 1:
590	#message = "Your P(r) is not smooth,
591	#please check your inversion parameters"
592	message = None
593	return pr.suggested_alpha, message, elapsed
594	else:
595
596	# Look at smaller values
597	# We assume that for the suggested alpha, we have 1 peak
598	# if not, send a message to change parameters
599	alpha = pr.suggested_alpha
600	best_alpha = pr.suggested_alpha
601	found = False
602	for i in range(10):
603	pr.alpha = (0.33) ** (i + 1) * alpha
604	out, _ = pr.invert(nfunc)
605
606	peaks = pr.get_peaks(out)
607	if peaks > 1:
608	found = True
609	break
610	best_alpha = pr.alpha
611
612	# If we didn't find a turning point for alpha and
613	# the initial alpha already had only one peak,
614	# just return that
615	if not found and initial_peaks == 1 and \
616	initial_alpha < best_alpha:
617	best_alpha = initial_alpha
618
619	# Check whether the size makes sense
620	message = ''
621
622	if not found:
623	message = None
624	elif best_alpha >= 0.5 * pr.suggested_alpha:
625	# best alpha is too big, return a
626	# reasonable value
627	message = "The estimated alpha for your system is too "
628	message += "large. "
629	message += "Try increasing your maximum distance."
630
631	return best_alpha, message, elapsed
632
633	except:
634	message = "Invertor.estimate_alpha: %s" % sys.exc_value
635	return 0, message, elapsed
636
637	def to_file(self, path, npts=100):
638	"""
639	Save the state to a file that will be readable
640	by SliceView.
641
642	:param path: path of the file to write
643	:param npts: number of P(r) points to be written
644
645	"""
646	file = open(path, 'w')
647	file.write("#d_max=%g\n" % self.d_max)
648	file.write("#nfunc=%g\n" % self.nfunc)
649	file.write("#alpha=%g\n" % self.alpha)
650	file.write("#chi2=%g\n" % self.chi2)
651	file.write("#elapsed=%g\n" % self.elapsed)
652	file.write("#qmin=%s\n" % str(self.q_min))
653	file.write("#qmax=%s\n" % str(self.q_max))
654	file.write("#slit_height=%g\n" % self.slit_height)
655	file.write("#slit_width=%g\n" % self.slit_width)
656	file.write("#background=%g\n" % self.background)
657	if self.est_bck:
658	file.write("#has_bck=1\n")
659	else:
660	file.write("#has_bck=0\n")
661	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
662	if self.out is not None:
663	if len(self.out) == len(self.cov):
664	for i in range(len(self.out)):
665	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
666	str(self.cov[i][i])))
667	file.write("<r> <Pr> <dPr>\n")
668	r = np.arange(0.0, self.d_max, self.d_max / npts)
669
670	for r_i in r:
671	(value, err) = self.pr_err(self.out, self.cov, r_i)
672	file.write("%g %g %g\n" % (r_i, value, err))
673
674	file.close()
675
676	def from_file(self, path):
677	"""
678	Load the state of the Invertor from a file,
679	to be able to generate P(r) from a set of
680	parameters.
681
682	:param path: path of the file to load
683
684	"""
685	#import os
686	#import re
687	if os.path.isfile(path):
688	try:
689	fd = open(path, 'r')
690
691	buff = fd.read()
692	lines = buff.split('\n')
693	for line in lines:
694	if line.startswith('#d_max='):
695	toks = line.split('=')
696	self.d_max = float(toks[1])
697	elif line.startswith('#nfunc='):
698	toks = line.split('=')
699	self.nfunc = int(toks[1])
700	self.out = np.zeros(self.nfunc)
701	self.cov = np.zeros([self.nfunc, self.nfunc])
702	elif line.startswith('#alpha='):
703	toks = line.split('=')
704	self.alpha = float(toks[1])
705	elif line.startswith('#chi2='):
706	toks = line.split('=')
707	self.chi2 = float(toks[1])
708	elif line.startswith('#elapsed='):
709	toks = line.split('=')
710	self.elapsed = float(toks[1])
711	elif line.startswith('#alpha_estimate='):
712	toks = line.split('=')
713	self.suggested_alpha = float(toks[1])
714	elif line.startswith('#qmin='):
715	toks = line.split('=')
716	try:
717	self.q_min = float(toks[1])
718	except:
719	self.q_min = None
720	elif line.startswith('#qmax='):
721	toks = line.split('=')
722	try:
723	self.q_max = float(toks[1])
724	except:
725	self.q_max = None
726	elif line.startswith('#slit_height='):
727	toks = line.split('=')
728	self.slit_height = float(toks[1])
729	elif line.startswith('#slit_width='):
730	toks = line.split('=')
731	self.slit_width = float(toks[1])
732	elif line.startswith('#background='):
733	toks = line.split('=')
734	self.background = float(toks[1])
735	elif line.startswith('#has_bck='):
736	toks = line.split('=')
737	self.est_bck = int(toks[1]) == 1
738
739	# Now read in the parameters
740	elif line.startswith('#C_'):
741	toks = line.split('=')
742	p = re.compile('#C_([0-9]+)')
743	m = p.search(toks[0])
744	toks2 = toks[1].split('+-')
745	i = int(m.group(1))
746	self.out[i] = float(toks2[0])
747
748	self.cov[i][i] = float(toks2[1])
749
750	except:
751	msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
752	raise RuntimeError(msg)
753	else:
754	msg = "Invertor.from_file: '%s' is not a file" % str(path)
755	raise RuntimeError(msg)

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/src/sas/sascalc/pr/invertor.py @ afb311e

Download in other formats: