invertor.py @ 0e0c645

ticket-1243

Last change on this file since 0e0c645 was 57e48ca, checked in by Paul Kienzle <pkienzle@…>, 6 years ago
fix rcond requires float error in pr
Property mode set to `100644`
File size: 25.4 KB

Line
1	# pylint: disable=invalid-name
2	"""
3	Module to perform P(r) inversion.
4	The module contains the Invertor class.
5
6	FIXME: The way the Invertor interacts with its C component should be cleaned up
7	"""
8	from __future__ import division
9
10	import numpy as np
11	import sys
12	import math
13	import time
14	import copy
15	import os
16	import re
17	import logging
18	from numpy.linalg import lstsq
19	from scipy import optimize
20	from sas.sascalc.pr._pr_inversion import Cinvertor
21
22	logger = logging.getLogger(__name__)
23
24	def help():
25	"""
26	Provide general online help text
27	Future work: extend this function to allow topic selection
28	"""
29	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
30	info_txt += "(1980) 13, 168-175.\n\n"
31	info_txt += "P(r) is set to be equal to an expansion of base functions "
32	info_txt += "of the type "
33	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each "
34	info_txt += "base functions "
35	info_txt += "in the expansion is found by performing a least square fit "
36	info_txt += "with the "
37	info_txt += "following fit function:\n\n"
38	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 +"
39	info_txt += "Reg_term\n\n"
40	info_txt += "where I_meas(q) is the measured scattering intensity and "
41	info_txt += "I_th(q) is "
42	info_txt += "the prediction from the Fourier transform of the P(r) "
43	info_txt += "expansion. "
44	info_txt += "The Reg_term term is a regularization term set to the second"
45	info_txt += " derivative "
46	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce "
47	info_txt += "a smooth P(r) output.\n\n"
48	info_txt += "The following are user inputs:\n\n"
49	info_txt += " - Number of terms: the number of base functions in the P(r)"
50	info_txt += " expansion.\n\n"
51	info_txt += " - Regularization constant: a multiplicative constant "
52	info_txt += "to set the size of "
53	info_txt += "the regularization term.\n\n"
54	info_txt += " - Maximum distance: the maximum distance between any "
55	info_txt += "two points in the system.\n"
56
57	return info_txt
58
59
60	class Invertor(Cinvertor):
61	"""
62	Invertor class to perform P(r) inversion
63
64	The problem is solved by posing the problem as Ax = b,
65	where x is the set of coefficients we are looking for.
66
67	Npts is the number of points.
68
69	In the following i refers to the ith base function coefficient.
70	The matrix has its entries j in its first Npts rows set to ::
71
72	A[j][i] = (Fourier transformed base function for point j)
73
74	We then choose a number of r-points, n_r, to evaluate the second
75	derivative of P(r) at. This is used as our regularization term.
76	For a vector r of length n_r, the following n_r rows are set to ::
77
78	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2,
79	evaluated at r[j])
80
81	The vector b has its first Npts entries set to ::
82
83	b[j] = (I(q) observed for point j)
84
85	The following n_r entries are set to zero.
86
87	The result is found by using scipy.linalg.basic.lstsq to invert
88	the matrix and find the coefficients x.
89
90	Methods inherited from Cinvertor:
91
92	* ``get_peaks(pars)``: returns the number of P(r) peaks
93	* ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
94	* ``get_positive(pars)``: returns the fraction of P(r) that is above zero
95	* ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
96	"""
97	## Chisqr of the last computation
98	chi2 = 0
99	## Time elapsed for last computation
100	elapsed = 0
101	## Alpha to get the reg term the same size as the signal
102	suggested_alpha = 0
103	## Last number of base functions used
104	nfunc = 10
105	## Last output values
106	out = None
107	## Last errors on output values
108	cov = None
109	## Background value
110	background = 0
111	## Information dictionary for application use
112	info = {}
113
114	def __init__(self):
115	Cinvertor.__init__(self)
116
117	def __setstate__(self, state):
118	"""
119	restore the state of invertor for pickle
120	"""
121	(self.__dict__, self.alpha, self.d_max,
122	self.q_min, self.q_max,
123	self.x, self.y,
124	self.err, self.est_bck,
125	self.slit_height, self.slit_width) = state
126
127	def __reduce_ex__(self, proto):
128	"""
129	Overwrite the __reduce_ex__
130	"""
131
132	state = (self.__dict__,
133	self.alpha, self.d_max,
134	self.q_min, self.q_max,
135	self.x, self.y,
136	self.err, self.est_bck,
137	self.slit_height, self.slit_width,
138	)
139	return (Invertor, tuple(), state, None, None)
140
141	def __setattr__(self, name, value):
142	"""
143	Set the value of an attribute.
144	Access the parent class methods for
145	x, y, err, d_max, q_min, q_max and alpha
146	"""
147	if name == 'x':
148	if 0.0 in value:
149	msg = "Invertor: one of your q-values is zero. "
150	msg += "Delete that entry before proceeding"
151	raise ValueError(msg)
152	return self.set_x(value)
153	elif name == 'y':
154	return self.set_y(value)
155	elif name == 'err':
156	value2 = abs(value)
157	return self.set_err(value2)
158	elif name == 'd_max':
159	if value <= 0.0:
160	msg = "Invertor: d_max must be greater than zero."
161	msg += "Correct that entry before proceeding"
162	raise ValueError(msg)
163	return self.set_dmax(value)
164	elif name == 'q_min':
165	if value is None:
166	return self.set_qmin(-1.0)
167	return self.set_qmin(value)
168	elif name == 'q_max':
169	if value is None:
170	return self.set_qmax(-1.0)
171	return self.set_qmax(value)
172	elif name == 'alpha':
173	return self.set_alpha(value)
174	elif name == 'slit_height':
175	return self.set_slit_height(value)
176	elif name == 'slit_width':
177	return self.set_slit_width(value)
178	elif name == 'est_bck':
179	if value == True:
180	return self.set_est_bck(1)
181	elif value == False:
182	return self.set_est_bck(0)
183	else:
184	raise ValueError("Invertor: est_bck can only be True or False")
185
186	return Cinvertor.__setattr__(self, name, value)
187
188	def __getattr__(self, name):
189	"""
190	Return the value of an attribute
191	"""
192	#import numpy
193	if name == 'x':
194	out = np.ones(self.get_nx())
195	self.get_x(out)
196	return out
197	elif name == 'y':
198	out = np.ones(self.get_ny())
199	self.get_y(out)
200	return out
201	elif name == 'err':
202	out = np.ones(self.get_nerr())
203	self.get_err(out)
204	return out
205	elif name == 'd_max':
206	return self.get_dmax()
207	elif name == 'q_min':
208	qmin = self.get_qmin()
209	if qmin < 0:
210	return None
211	return qmin
212	elif name == 'q_max':
213	qmax = self.get_qmax()
214	if qmax < 0:
215	return None
216	return qmax
217	elif name == 'alpha':
218	return self.get_alpha()
219	elif name == 'slit_height':
220	return self.get_slit_height()
221	elif name == 'slit_width':
222	return self.get_slit_width()
223	elif name == 'est_bck':
224	value = self.get_est_bck()
225	return value == 1
226	elif name in self.__dict__:
227	return self.__dict__[name]
228	return None
229
230	def clone(self):
231	"""
232	Return a clone of this instance
233	"""
234	#import copy
235
236	invertor = Invertor()
237	invertor.chi2 = self.chi2
238	invertor.elapsed = self.elapsed
239	invertor.nfunc = self.nfunc
240	invertor.alpha = self.alpha
241	invertor.d_max = self.d_max
242	invertor.q_min = self.q_min
243	invertor.q_max = self.q_max
244
245	invertor.x = self.x
246	invertor.y = self.y
247	invertor.err = self.err
248	invertor.est_bck = self.est_bck
249	invertor.background = self.background
250	invertor.slit_height = self.slit_height
251	invertor.slit_width = self.slit_width
252
253	invertor.info = copy.deepcopy(self.info)
254
255	return invertor
256
257	def invert(self, nfunc=10, nr=20):
258	"""
259	Perform inversion to P(r)
260
261	The problem is solved by posing the problem as Ax = b,
262	where x is the set of coefficients we are looking for.
263
264	Npts is the number of points.
265
266	In the following i refers to the ith base function coefficient.
267	The matrix has its entries j in its first Npts rows set to ::
268
269	A[i][j] = (Fourier transformed base function for point j)
270
271	We then choose a number of r-points, n_r, to evaluate the second
272	derivative of P(r) at. This is used as our regularization term.
273	For a vector r of length n_r, the following n_r rows are set to ::
274
275	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
276
277	The vector b has its first Npts entries set to ::
278
279	b[j] = (I(q) observed for point j)
280
281	The following n_r entries are set to zero.
282
283	The result is found by using scipy.linalg.basic.lstsq to invert
284	the matrix and find the coefficients x.
285
286	:param nfunc: number of base functions to use.
287	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
288	:return: c_out, c_cov - the coefficients with covariance matrix
289	"""
290	# Reset the background value before proceeding
291	# self.background = 0.0
292	if not self.est_bck:
293	self.y -= self.background
294	out, cov = self.lstsq(nfunc, nr=nr)
295	if not self.est_bck:
296	self.y += self.background
297	return out, cov
298
299	def iq(self, out, q):
300	"""
301	Function to call to evaluate the scattering intensity
302
303	:param args: c-parameters, and q
304	:return: I(q)
305
306	"""
307	return Cinvertor.iq(self, out, q) + self.background
308
309	def invert_optimize(self, nfunc=10, nr=20):
310	"""
311	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
312
313	This probably produce more reliable results, but is much slower.
314	The minimization function is set to
315	sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
316	where the reg_term is given by Svergun: it is the integral of
317	the square of the first derivative
318	of P(r), d(P(r))/dr, integrated over the full range of r.
319
320	:param nfunc: number of base functions to use.
321	:param nr: number of r points to evaluate the 2nd derivative at
322	for the reg. term.
323
324	:return: c_out, c_cov - the coefficients with covariance matrix
325
326	"""
327	self.nfunc = nfunc
328	# First, check that the current data is valid
329	if self.is_valid() <= 0:
330	msg = "Invertor.invert: Data array are of different length"
331	raise RuntimeError(msg)
332
333	p = np.ones(nfunc)
334	t_0 = time.time()
335	out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
336
337	# Compute chi^2
338	res = self.residuals(out)
339	chisqr = 0
340	for i in range(len(res)):
341	chisqr += res[i]
342
343	self.chi2 = chisqr
344
345	# Store computation time
346	self.elapsed = time.time() - t_0
347
348	if cov_x is None:
349	cov_x = np.ones([nfunc, nfunc])
350	cov_x *= math.fabs(chisqr)
351	return out, cov_x
352
353	def pr_fit(self, nfunc=5):
354	"""
355	This is a direct fit to a given P(r). It assumes that the y data
356	is set to some P(r) distribution that we are trying to reproduce
357	with a set of base functions.
358
359	This method is provided as a test.
360	"""
361	# First, check that the current data is valid
362	if self.is_valid() <= 0:
363	msg = "Invertor.invert: Data arrays are of different length"
364	raise RuntimeError(msg)
365
366	p = np.ones(nfunc)
367	t_0 = time.time()
368	out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
369
370	# Compute chi^2
371	res = self.pr_residuals(out)
372	chisqr = 0
373	for i in range(len(res)):
374	chisqr += res[i]
375
376	self.chisqr = chisqr
377
378	# Store computation time
379	self.elapsed = time.time() - t_0
380
381	return out, cov_x
382
383	def pr_err(self, c, c_cov, r):
384	"""
385	Returns the value of P(r) for a given r, and base function
386	coefficients, with error.
387
388	:param c: base function coefficients
389	:param c_cov: covariance matrice of the base function coefficients
390	:param r: r-value to evaluate P(r) at
391
392	:return: P(r)
393
394	"""
395	return self.get_pr_err(c, c_cov, r)
396
397	def _accept_q(self, q):
398	"""
399	Check q-value against user-defined range
400	"""
401	if self.q_min is not None and q < self.q_min:
402	return False
403	if self.q_max is not None and q > self.q_max:
404	return False
405	return True
406
407	def lstsq(self, nfunc=5, nr=20):
408	"""
409	The problem is solved by posing the problem as Ax = b,
410	where x is the set of coefficients we are looking for.
411
412	Npts is the number of points.
413
414	In the following i refers to the ith base function coefficient.
415	The matrix has its entries j in its first Npts rows set to ::
416
417	A[i][j] = (Fourier transformed base function for point j)
418
419	We then choose a number of r-points, n_r, to evaluate the second
420	derivative of P(r) at. This is used as our regularization term.
421	For a vector r of length n_r, the following n_r rows are set to ::
422
423	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2,
424	evaluated at r[j])
425
426	The vector b has its first Npts entries set to ::
427
428	b[j] = (I(q) observed for point j)
429
430	The following n_r entries are set to zero.
431
432	The result is found by using scipy.linalg.basic.lstsq to invert
433	the matrix and find the coefficients x.
434
435	:param nfunc: number of base functions to use.
436	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
437
438	If the result does not allow us to compute the covariance matrix,
439	a matrix filled with zeros will be returned.
440
441	"""
442	# Note: To make sure an array is contiguous:
443	# blah = np.ascontiguousarray(blah_original)
444	# ... before passing it to C
445
446	if self.is_valid() < 0:
447	msg = "Invertor: invalid data; incompatible data lengths."
448	raise RuntimeError(msg)
449
450	self.nfunc = nfunc
451	# a -- An M x N matrix.
452	# b -- An M x nrhs matrix or M vector.
453	npts = len(self.x)
454	nq = nr
455	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
456	if sqrt_alpha < 0.0:
457	nq = 0
458
459	# If we need to fit the background, add a term
460	if self.est_bck:
461	nfunc_0 = nfunc
462	nfunc += 1
463
464	a = np.zeros([npts + nq, nfunc])
465	b = np.zeros(npts + nq)
466	err = np.zeros([nfunc, nfunc])
467
468	# Construct the a matrix and b vector that represent the problem
469	t_0 = time.time()
470	try:
471	self._get_matrix(nfunc, nq, a, b)
472	except Exception as exc:
473	raise RuntimeError("Invertor: could not invert I(Q)\n %s" % str(exc))
474
475	# Perform the inversion (least square fit)
476	c, chi2, _, _ = lstsq(a, b, rcond=-1)
477	# Sanity check
478	try:
479	float(chi2)
480	except:
481	chi2 = -1.0
482	self.chi2 = chi2
483
484	inv_cov = np.zeros([nfunc, nfunc])
485	# Get the covariance matrix, defined as inv_cov = a_transposed * a
486	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
487
488	# Compute the reg term size for the output
489	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
490
491	if math.fabs(self.alpha) > 0:
492	new_alpha = sum_sig / (sum_reg / self.alpha)
493	else:
494	new_alpha = 0.0
495	self.suggested_alpha = new_alpha
496
497	try:
498	cov = np.linalg.pinv(inv_cov)
499	err = math.fabs(chi2 / (npts - nfunc)) * cov
500	except Exception as exc:
501	# We were not able to estimate the errors
502	# Return an empty error matrix
503	logger.error(exc)
504
505	# Keep a copy of the last output
506	if not self.est_bck:
507	self.out = c
508	self.cov = err
509	else:
510	self.background = c[0]
511
512	err_0 = np.zeros([nfunc, nfunc])
513	c_0 = np.zeros(nfunc)
514
515	for i in range(nfunc_0):
516	c_0[i] = c[i + 1]
517	for j in range(nfunc_0):
518	err_0[i][j] = err[i + 1][j + 1]
519
520	self.out = c_0
521	self.cov = err_0
522
523	# Store computation time
524	self.elapsed = time.time() - t_0
525
526	return self.out, self.cov
527
528	def estimate_numterms(self, isquit_func=None):
529	"""
530	Returns a reasonable guess for the
531	number of terms
532
533	:param isquit_func:
534	reference to thread function to call to check whether the computation needs to
535	be stopped.
536
537	:return: number of terms, alpha, message
538
539	"""
540	from .num_term import NTermEstimator
541	estimator = NTermEstimator(self.clone())
542	try:
543	return estimator.num_terms(isquit_func)
544	except Exception as exc:
545	# If we fail, estimate alpha and return the default
546	# number of terms
547	best_alpha, _, _ = self.estimate_alpha(self.nfunc)
548	logger.warning("Invertor.estimate_numterms: %s" % exc)
549	return self.nfunc, best_alpha, "Could not estimate number of terms"
550
551	def estimate_alpha(self, nfunc):
552	"""
553	Returns a reasonable guess for the
554	regularization constant alpha
555
556	:param nfunc: number of terms to use in the expansion.
557
558	:return: alpha, message, elapsed
559
560	where alpha is the estimate for alpha,
561	message is a message for the user,
562	elapsed is the computation time
563	"""
564	#import time
565	try:
566	pr = self.clone()
567
568	# T_0 for computation time
569	starttime = time.time()
570	elapsed = 0
571
572	# If the current alpha is zero, try
573	# another value
574	if pr.alpha <= 0:
575	pr.alpha = 0.0001
576
577	# Perform inversion to find the largest alpha
578	out, _ = pr.invert(nfunc)
579	elapsed = time.time() - starttime
580	initial_alpha = pr.alpha
581	initial_peaks = pr.get_peaks(out)
582
583	# Try the inversion with the estimated alpha
584	pr.alpha = pr.suggested_alpha
585	out, _ = pr.invert(nfunc)
586
587	npeaks = pr.get_peaks(out)
588	# if more than one peak to start with
589	# just return the estimate
590	if npeaks > 1:
591	#message = "Your P(r) is not smooth,
592	#please check your inversion parameters"
593	message = None
594	return pr.suggested_alpha, message, elapsed
595	else:
596
597	# Look at smaller values
598	# We assume that for the suggested alpha, we have 1 peak
599	# if not, send a message to change parameters
600	alpha = pr.suggested_alpha
601	best_alpha = pr.suggested_alpha
602	found = False
603	for i in range(10):
604	pr.alpha = (0.33) ** (i + 1) * alpha
605	out, _ = pr.invert(nfunc)
606
607	peaks = pr.get_peaks(out)
608	if peaks > 1:
609	found = True
610	break
611	best_alpha = pr.alpha
612
613	# If we didn't find a turning point for alpha and
614	# the initial alpha already had only one peak,
615	# just return that
616	if not found and initial_peaks == 1 and \
617	initial_alpha < best_alpha:
618	best_alpha = initial_alpha
619
620	# Check whether the size makes sense
621	message = ''
622
623	if not found:
624	message = None
625	elif best_alpha >= 0.5 * pr.suggested_alpha:
626	# best alpha is too big, return a
627	# reasonable value
628	message = "The estimated alpha for your system is too "
629	message += "large. "
630	message += "Try increasing your maximum distance."
631
632	return best_alpha, message, elapsed
633
634	except Exception as exc:
635	message = "Invertor.estimate_alpha: %s" % exc
636	return 0, message, elapsed
637
638	def to_file(self, path, npts=100):
639	"""
640	Save the state to a file that will be readable
641	by SliceView.
642
643	:param path: path of the file to write
644	:param npts: number of P(r) points to be written
645
646	"""
647	file = open(path, 'w')
648	file.write("#d_max=%g\n" % self.d_max)
649	file.write("#nfunc=%g\n" % self.nfunc)
650	file.write("#alpha=%g\n" % self.alpha)
651	file.write("#chi2=%g\n" % self.chi2)
652	file.write("#elapsed=%g\n" % self.elapsed)
653	file.write("#qmin=%s\n" % str(self.q_min))
654	file.write("#qmax=%s\n" % str(self.q_max))
655	file.write("#slit_height=%g\n" % self.slit_height)
656	file.write("#slit_width=%g\n" % self.slit_width)
657	file.write("#background=%g\n" % self.background)
658	if self.est_bck:
659	file.write("#has_bck=1\n")
660	else:
661	file.write("#has_bck=0\n")
662	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
663	if self.out is not None:
664	if len(self.out) == len(self.cov):
665	for i in range(len(self.out)):
666	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
667	str(self.cov[i][i])))
668	file.write("<r> <Pr> <dPr>\n")
669	r = np.arange(0.0, self.d_max, self.d_max / npts)
670
671	for r_i in r:
672	(value, err) = self.pr_err(self.out, self.cov, r_i)
673	file.write("%g %g %g\n" % (r_i, value, err))
674
675	file.close()
676
677	def from_file(self, path):
678	"""
679	Load the state of the Invertor from a file,
680	to be able to generate P(r) from a set of
681	parameters.
682
683	:param path: path of the file to load
684
685	"""
686	#import os
687	#import re
688	if os.path.isfile(path):
689	try:
690	fd = open(path, 'r')
691
692	buff = fd.read()
693	lines = buff.split('\n')
694	for line in lines:
695	if line.startswith('#d_max='):
696	toks = line.split('=')
697	self.d_max = float(toks[1])
698	elif line.startswith('#nfunc='):
699	toks = line.split('=')
700	self.nfunc = int(toks[1])
701	self.out = np.zeros(self.nfunc)
702	self.cov = np.zeros([self.nfunc, self.nfunc])
703	elif line.startswith('#alpha='):
704	toks = line.split('=')
705	self.alpha = float(toks[1])
706	elif line.startswith('#chi2='):
707	toks = line.split('=')
708	self.chi2 = float(toks[1])
709	elif line.startswith('#elapsed='):
710	toks = line.split('=')
711	self.elapsed = float(toks[1])
712	elif line.startswith('#alpha_estimate='):
713	toks = line.split('=')
714	self.suggested_alpha = float(toks[1])
715	elif line.startswith('#qmin='):
716	toks = line.split('=')
717	try:
718	self.q_min = float(toks[1])
719	except:
720	self.q_min = None
721	elif line.startswith('#qmax='):
722	toks = line.split('=')
723	try:
724	self.q_max = float(toks[1])
725	except:
726	self.q_max = None
727	elif line.startswith('#slit_height='):
728	toks = line.split('=')
729	self.slit_height = float(toks[1])
730	elif line.startswith('#slit_width='):
731	toks = line.split('=')
732	self.slit_width = float(toks[1])
733	elif line.startswith('#background='):
734	toks = line.split('=')
735	self.background = float(toks[1])
736	elif line.startswith('#has_bck='):
737	toks = line.split('=')
738	self.est_bck = int(toks[1]) == 1
739
740	# Now read in the parameters
741	elif line.startswith('#C_'):
742	toks = line.split('=')
743	p = re.compile('#C_([0-9]+)')
744	m = p.search(toks[0])
745	toks2 = toks[1].split('+-')
746	i = int(m.group(1))
747	self.out[i] = float(toks2[0])
748
749	self.cov[i][i] = float(toks2[1])
750
751	except Exception as exc:
752	msg = "Invertor.from_file: corrupted file\n%s" % exc
753	raise RuntimeError(msg)
754	else:
755	msg = "Invertor.from_file: '%s' is not a file" % str(path)
756	raise RuntimeError(msg)

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/src/sas/sascalc/pr/invertor.py @ 0e0c645

Download in other formats: