invertor.py @ 0dde203

ticket-1249

Last change on this file since 0dde203 was 7af652d, checked in by GitHub <noreply@…>, 6 years ago
Merge branch 'master' into py37-sascalc
Property mode set to `100644`
File size: 25.5 KB

Line
1	# pylint: disable=invalid-name
2	"""
3	Module to perform P(r) inversion.
4	The module contains the Invertor class.
5
6	FIXME: The way the Invertor interacts with its C component should be cleaned up
7	"""
8	from __future__ import division
9
10	import numpy as np
11	import sys
12	import math
13	import time
14	import copy
15	import os
16	import re
17	import logging
18	from numpy.linalg import lstsq
19	from scipy import optimize
20	from sas.sascalc.pr._pr_inversion import Cinvertor
21
22	logger = logging.getLogger(__name__)
23
24	def help():
25	"""
26	Provide general online help text
27	Future work: extend this function to allow topic selection
28	"""
29	info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
30	info_txt += "(1980) 13, 168-175.\n\n"
31	info_txt += "P(r) is set to be equal to an expansion of base functions "
32	info_txt += "of the type "
33	info_txt += "phi_n(r) = 2rsin(pinr/D_max). The coefficient of each "
34	info_txt += "base functions "
35	info_txt += "in the expansion is found by performing a least square fit "
36	info_txt += "with the "
37	info_txt += "following fit function:\n\n"
38	info_txt += "chi2 = sum_i[ I_meas(q_i) - I_th(q_i) ]2/error**2 +"
39	info_txt += "Reg_term\n\n"
40	info_txt += "where I_meas(q) is the measured scattering intensity and "
41	info_txt += "I_th(q) is "
42	info_txt += "the prediction from the Fourier transform of the P(r) "
43	info_txt += "expansion. "
44	info_txt += "The Reg_term term is a regularization term set to the second"
45	info_txt += " derivative "
46	info_txt += "d2P(r)/dr2 integrated over r. It is used to produce "
47	info_txt += "a smooth P(r) output.\n\n"
48	info_txt += "The following are user inputs:\n\n"
49	info_txt += " - Number of terms: the number of base functions in the P(r)"
50	info_txt += " expansion.\n\n"
51	info_txt += " - Regularization constant: a multiplicative constant "
52	info_txt += "to set the size of "
53	info_txt += "the regularization term.\n\n"
54	info_txt += " - Maximum distance: the maximum distance between any "
55	info_txt += "two points in the system.\n"
56
57	return info_txt
58
59
60	class Invertor(Cinvertor):
61	"""
62	Invertor class to perform P(r) inversion
63
64	The problem is solved by posing the problem as Ax = b,
65	where x is the set of coefficients we are looking for.
66
67	Npts is the number of points.
68
69	In the following i refers to the ith base function coefficient.
70	The matrix has its entries j in its first Npts rows set to ::
71
72	A[j][i] = (Fourier transformed base function for point j)
73
74	We then choose a number of r-points, n_r, to evaluate the second
75	derivative of P(r) at. This is used as our regularization term.
76	For a vector r of length n_r, the following n_r rows are set to ::
77
78	A[j+Npts][i] = (2nd derivative of P(r), d2(P(r))/d(r)2,
79	evaluated at r[j])
80
81	The vector b has its first Npts entries set to ::
82
83	b[j] = (I(q) observed for point j)
84
85	The following n_r entries are set to zero.
86
87	The result is found by using scipy.linalg.basic.lstsq to invert
88	the matrix and find the coefficients x.
89
90	Methods inherited from Cinvertor:
91
92	* ``get_peaks(pars)``: returns the number of P(r) peaks
93	* ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
94	* ``get_positive(pars)``: returns the fraction of P(r) that is above zero
95	* ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
96	"""
97	## Chisqr of the last computation
98	chi2 = 0
99	## Time elapsed for last computation
100	elapsed = 0
101	## Alpha to get the reg term the same size as the signal
102	suggested_alpha = 0
103	## Last number of base functions used
104	nfunc = 10
105	## Last output values
106	out = None
107	## Last errors on output values
108	cov = None
109	## Background value
110	background = 0
111	## Information dictionary for application use
112	info = {}
113
114	def __init__(self):
115	Cinvertor.__init__(self)
116
117	def __setstate__(self, state):
118	"""
119	restore the state of invertor for pickle
120	"""
121	(self.__dict__, self.alpha, self.d_max,
122	self.q_min, self.q_max,
123	self.x, self.y,
124	self.err, self.est_bck,
125	self.slit_height, self.slit_width) = state
126
127	def __reduce_ex__(self, proto):
128	"""
129	Overwrite the __reduce_ex__
130	"""
131
132	state = (self.__dict__,
133	self.alpha, self.d_max,
134	self.q_min, self.q_max,
135	self.x, self.y,
136	self.err, self.est_bck,
137	self.slit_height, self.slit_width,
138	)
139	return (Invertor, tuple(), state, None, None)
140
141	def __setattr__(self, name, value):
142	"""
143	Set the value of an attribute.
144	Access the parent class methods for
145	x, y, err, d_max, q_min, q_max and alpha
146	"""
147	if name == 'x':
148	if 0.0 in value:
149	msg = "Invertor: one of your q-values is zero. "
150	msg += "Delete that entry before proceeding"
151	raise ValueError(msg)
152	return self.set_x(value)
153	elif name == 'y':
154	return self.set_y(value)
155	elif name == 'err':
156	value2 = abs(value)
157	return self.set_err(value2)
158	elif name == 'd_max':
159	if value <= 0.0:
160	msg = "Invertor: d_max must be greater than zero."
161	msg += "Correct that entry before proceeding"
162	raise ValueError(msg)
163	return self.set_dmax(value)
164	elif name == 'q_min':
165	if value is None:
166	return self.set_qmin(-1.0)
167	return self.set_qmin(value)
168	elif name == 'q_max':
169	if value is None:
170	return self.set_qmax(-1.0)
171	return self.set_qmax(value)
172	elif name == 'alpha':
173	return self.set_alpha(value)
174	elif name == 'slit_height':
175	return self.set_slit_height(value)
176	elif name == 'slit_width':
177	return self.set_slit_width(value)
178	elif name == 'est_bck':
179	if value == True:
180	return self.set_est_bck(1)
181	elif value == False:
182	return self.set_est_bck(0)
183	else:
184	raise ValueError("Invertor: est_bck can only be True or False")
185
186	return Cinvertor.__setattr__(self, name, value)
187
188	def __getattr__(self, name):
189	"""
190	Return the value of an attribute
191	"""
192	#import numpy
193	if name == 'x':
194	out = np.ones(self.get_nx())
195	self.get_x(out)
196	return out
197	elif name == 'y':
198	out = np.ones(self.get_ny())
199	self.get_y(out)
200	return out
201	elif name == 'err':
202	out = np.ones(self.get_nerr())
203	self.get_err(out)
204	return out
205	elif name == 'd_max':
206	return self.get_dmax()
207	elif name == 'q_min':
208	qmin = self.get_qmin()
209	if qmin < 0:
210	return None
211	return qmin
212	elif name == 'q_max':
213	qmax = self.get_qmax()
214	if qmax < 0:
215	return None
216	return qmax
217	elif name == 'alpha':
218	return self.get_alpha()
219	elif name == 'slit_height':
220	return self.get_slit_height()
221	elif name == 'slit_width':
222	return self.get_slit_width()
223	elif name == 'est_bck':
224	value = self.get_est_bck()
225	return value == 1
226	elif name in self.__dict__:
227	return self.__dict__[name]
228	return None
229
230	def clone(self):
231	"""
232	Return a clone of this instance
233	"""
234	#import copy
235
236	invertor = Invertor()
237	invertor.chi2 = self.chi2
238	invertor.elapsed = self.elapsed
239	invertor.nfunc = self.nfunc
240	invertor.alpha = self.alpha
241	invertor.d_max = self.d_max
242	invertor.q_min = self.q_min
243	invertor.q_max = self.q_max
244
245	invertor.x = self.x
246	invertor.y = self.y
247	invertor.err = self.err
248	invertor.est_bck = self.est_bck
249	invertor.background = self.background
250	invertor.slit_height = self.slit_height
251	invertor.slit_width = self.slit_width
252
253	invertor.info = copy.deepcopy(self.info)
254
255	return invertor
256
257	def invert(self, nfunc=10, nr=20):
258	"""
259	Perform inversion to P(r)
260
261	The problem is solved by posing the problem as Ax = b,
262	where x is the set of coefficients we are looking for.
263
264	Npts is the number of points.
265
266	In the following i refers to the ith base function coefficient.
267	The matrix has its entries j in its first Npts rows set to ::
268
269	A[i][j] = (Fourier transformed base function for point j)
270
271	We then choose a number of r-points, n_r, to evaluate the second
272	derivative of P(r) at. This is used as our regularization term.
273	For a vector r of length n_r, the following n_r rows are set to ::
274
275	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2, evaluated at r[j])
276
277	The vector b has its first Npts entries set to ::
278
279	b[j] = (I(q) observed for point j)
280
281	The following n_r entries are set to zero.
282
283	The result is found by using scipy.linalg.basic.lstsq to invert
284	the matrix and find the coefficients x.
285
286	:param nfunc: number of base functions to use.
287	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
288	:return: c_out, c_cov - the coefficients with covariance matrix
289	"""
290	# Reset the background value before proceeding
291	# self.background = 0.0
292	if not self.est_bck:
293	self.y -= self.background
294	out, cov = self.lstsq(nfunc, nr=nr)
295	if not self.est_bck:
296	self.y += self.background
297	return out, cov
298
299	def iq(self, out, q):
300	"""
301	Function to call to evaluate the scattering intensity
302
303	:param args: c-parameters, and q
304	:return: I(q)
305
306	"""
307	return Cinvertor.iq(self, out, q) + self.background
308
309	def invert_optimize(self, nfunc=10, nr=20):
310	"""
311	Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
312
313	This probably produce more reliable results, but is much slower.
314	The minimization function is set to
315	sum_i[ (I_obs(q_i) - I_theo(q_i))/err*2 ] + alpha reg_term,
316	where the reg_term is given by Svergun: it is the integral of
317	the square of the first derivative
318	of P(r), d(P(r))/dr, integrated over the full range of r.
319
320	:param nfunc: number of base functions to use.
321	:param nr: number of r points to evaluate the 2nd derivative at
322	for the reg. term.
323
324	:return: c_out, c_cov - the coefficients with covariance matrix
325
326	"""
327	self.nfunc = nfunc
328	# First, check that the current data is valid
329	if self.is_valid() <= 0:
330	msg = "Invertor.invert: Data array are of different length"
331	raise RuntimeError(msg)
332
333	p = np.ones(nfunc)
334	t_0 = time.time()
335	out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)
336
337	# Compute chi^2
338	res = self.residuals(out)
339	chisqr = 0
340	for i in range(len(res)):
341	chisqr += res[i]
342
343	self.chi2 = chisqr
344
345	# Store computation time
346	self.elapsed = time.time() - t_0
347
348	if cov_x is None:
349	cov_x = np.ones([nfunc, nfunc])
350	cov_x *= math.fabs(chisqr)
351	return out, cov_x
352
353	def pr_fit(self, nfunc=5):
354	"""
355	This is a direct fit to a given P(r). It assumes that the y data
356	is set to some P(r) distribution that we are trying to reproduce
357	with a set of base functions.
358
359	This method is provided as a test.
360	"""
361	# First, check that the current data is valid
362	if self.is_valid() <= 0:
363	msg = "Invertor.invert: Data arrays are of different length"
364	raise RuntimeError(msg)
365
366	p = np.ones(nfunc)
367	t_0 = time.time()
368	out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)
369
370	# Compute chi^2
371	res = self.pr_residuals(out)
372	chisqr = 0
373	for i in range(len(res)):
374	chisqr += res[i]
375
376	self.chisqr = chisqr
377
378	# Store computation time
379	self.elapsed = time.time() - t_0
380
381	return out, cov_x
382
383	def pr_err(self, c, c_cov, r):
384	"""
385	Returns the value of P(r) for a given r, and base function
386	coefficients, with error.
387
388	:param c: base function coefficients
389	:param c_cov: covariance matrice of the base function coefficients
390	:param r: r-value to evaluate P(r) at
391
392	:return: P(r)
393
394	"""
395	return self.get_pr_err(c, c_cov, r)
396
397	def _accept_q(self, q):
398	"""
399	Check q-value against user-defined range
400	"""
401	if self.q_min is not None and q < self.q_min:
402	return False
403	if self.q_max is not None and q > self.q_max:
404	return False
405	return True
406
407	def lstsq(self, nfunc=5, nr=20):
408	"""
409	The problem is solved by posing the problem as Ax = b,
410	where x is the set of coefficients we are looking for.
411
412	Npts is the number of points.
413
414	In the following i refers to the ith base function coefficient.
415	The matrix has its entries j in its first Npts rows set to ::
416
417	A[i][j] = (Fourier transformed base function for point j)
418
419	We then choose a number of r-points, n_r, to evaluate the second
420	derivative of P(r) at. This is used as our regularization term.
421	For a vector r of length n_r, the following n_r rows are set to ::
422
423	A[i+Npts][j] = (2nd derivative of P(r), d2(P(r))/d(r)2,
424	evaluated at r[j])
425
426	The vector b has its first Npts entries set to ::
427
428	b[j] = (I(q) observed for point j)
429
430	The following n_r entries are set to zero.
431
432	The result is found by using scipy.linalg.basic.lstsq to invert
433	the matrix and find the coefficients x.
434
435	:param nfunc: number of base functions to use.
436	:param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
437
438	If the result does not allow us to compute the covariance matrix,
439	a matrix filled with zeros will be returned.
440
441	"""
442	# Note: To make sure an array is contiguous:
443	# blah = np.ascontiguousarray(blah_original)
444	# ... before passing it to C
445
446	if self.is_valid() < 0:
447	msg = "Invertor: invalid data; incompatible data lengths."
448	raise RuntimeError(msg)
449
450	self.nfunc = nfunc
451	# a -- An M x N matrix.
452	# b -- An M x nrhs matrix or M vector.
453	npts = len(self.x)
454	nq = nr
455	sqrt_alpha = math.sqrt(math.fabs(self.alpha))
456	if sqrt_alpha < 0.0:
457	nq = 0
458
459	# If we need to fit the background, add a term
460	if self.est_bck:
461	nfunc_0 = nfunc
462	nfunc += 1
463
464	a = np.zeros([npts + nq, nfunc])
465	b = np.zeros(npts + nq)
466	err = np.zeros([nfunc, nfunc])
467
468	# Construct the a matrix and b vector that represent the problem
469	t_0 = time.time()
470	try:
471	self._get_matrix(nfunc, nq, a, b)
472	except Exception as exc:
473	raise RuntimeError("Invertor: could not invert I(Q)\n %s" % str(exc))
474
475	# Perform the inversion (least square fit)
476	# CRUFT: numpy>=1.14.0 allows rcond=None for the following default
477	rcond = np.finfo(float).eps * max(a.shape)
478	c, chi2, _, _ = lstsq(a, b, rcond=rcond)
479	# Sanity check
480	try:
481	float(chi2)
482	except:
483	chi2 = -1.0
484	self.chi2 = chi2
485
486	inv_cov = np.zeros([nfunc, nfunc])
487	# Get the covariance matrix, defined as inv_cov = a_transposed * a
488	self._get_invcov_matrix(nfunc, nr, a, inv_cov)
489
490	# Compute the reg term size for the output
491	sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
492
493	if math.fabs(self.alpha) > 0:
494	new_alpha = sum_sig / (sum_reg / self.alpha)
495	else:
496	new_alpha = 0.0
497	self.suggested_alpha = new_alpha
498
499	try:
500	cov = np.linalg.pinv(inv_cov)
501	err = math.fabs(chi2 / (npts - nfunc)) * cov
502	except Exception as exc:
503	# We were not able to estimate the errors
504	# Return an empty error matrix
505	logger.error(exc)
506
507	# Keep a copy of the last output
508	if not self.est_bck:
509	self.out = c
510	self.cov = err
511	else:
512	self.background = c[0]
513
514	err_0 = np.zeros([nfunc, nfunc])
515	c_0 = np.zeros(nfunc)
516
517	for i in range(nfunc_0):
518	c_0[i] = c[i + 1]
519	for j in range(nfunc_0):
520	err_0[i][j] = err[i + 1][j + 1]
521
522	self.out = c_0
523	self.cov = err_0
524
525	# Store computation time
526	self.elapsed = time.time() - t_0
527
528	return self.out, self.cov
529
530	def estimate_numterms(self, isquit_func=None):
531	"""
532	Returns a reasonable guess for the
533	number of terms
534
535	:param isquit_func:
536	reference to thread function to call to check whether the computation needs to
537	be stopped.
538
539	:return: number of terms, alpha, message
540
541	"""
542	from .num_term import NTermEstimator
543	estimator = NTermEstimator(self.clone())
544	try:
545	return estimator.num_terms(isquit_func)
546	except Exception as exc:
547	# If we fail, estimate alpha and return the default
548	# number of terms
549	best_alpha, _, _ = self.estimate_alpha(self.nfunc)
550	logger.warning("Invertor.estimate_numterms: %s" % exc)
551	return self.nfunc, best_alpha, "Could not estimate number of terms"
552
553	def estimate_alpha(self, nfunc):
554	"""
555	Returns a reasonable guess for the
556	regularization constant alpha
557
558	:param nfunc: number of terms to use in the expansion.
559
560	:return: alpha, message, elapsed
561
562	where alpha is the estimate for alpha,
563	message is a message for the user,
564	elapsed is the computation time
565	"""
566	#import time
567	try:
568	pr = self.clone()
569
570	# T_0 for computation time
571	starttime = time.time()
572	elapsed = 0
573
574	# If the current alpha is zero, try
575	# another value
576	if pr.alpha <= 0:
577	pr.alpha = 0.0001
578
579	# Perform inversion to find the largest alpha
580	out, _ = pr.invert(nfunc)
581	elapsed = time.time() - starttime
582	initial_alpha = pr.alpha
583	initial_peaks = pr.get_peaks(out)
584
585	# Try the inversion with the estimated alpha
586	pr.alpha = pr.suggested_alpha
587	out, _ = pr.invert(nfunc)
588
589	npeaks = pr.get_peaks(out)
590	# if more than one peak to start with
591	# just return the estimate
592	if npeaks > 1:
593	#message = "Your P(r) is not smooth,
594	#please check your inversion parameters"
595	message = None
596	return pr.suggested_alpha, message, elapsed
597	else:
598
599	# Look at smaller values
600	# We assume that for the suggested alpha, we have 1 peak
601	# if not, send a message to change parameters
602	alpha = pr.suggested_alpha
603	best_alpha = pr.suggested_alpha
604	found = False
605	for i in range(10):
606	pr.alpha = (0.33) ** (i + 1) * alpha
607	out, _ = pr.invert(nfunc)
608
609	peaks = pr.get_peaks(out)
610	if peaks > 1:
611	found = True
612	break
613	best_alpha = pr.alpha
614
615	# If we didn't find a turning point for alpha and
616	# the initial alpha already had only one peak,
617	# just return that
618	if not found and initial_peaks == 1 and \
619	initial_alpha < best_alpha:
620	best_alpha = initial_alpha
621
622	# Check whether the size makes sense
623	message = ''
624
625	if not found:
626	message = None
627	elif best_alpha >= 0.5 * pr.suggested_alpha:
628	# best alpha is too big, return a
629	# reasonable value
630	message = "The estimated alpha for your system is too "
631	message += "large. "
632	message += "Try increasing your maximum distance."
633
634	return best_alpha, message, elapsed
635
636	except Exception as exc:
637	message = "Invertor.estimate_alpha: %s" % exc
638	return 0, message, elapsed
639
640	def to_file(self, path, npts=100):
641	"""
642	Save the state to a file that will be readable
643	by SliceView.
644
645	:param path: path of the file to write
646	:param npts: number of P(r) points to be written
647
648	"""
649	file = open(path, 'w')
650	file.write("#d_max=%g\n" % self.d_max)
651	file.write("#nfunc=%g\n" % self.nfunc)
652	file.write("#alpha=%g\n" % self.alpha)
653	file.write("#chi2=%g\n" % self.chi2)
654	file.write("#elapsed=%g\n" % self.elapsed)
655	file.write("#qmin=%s\n" % str(self.q_min))
656	file.write("#qmax=%s\n" % str(self.q_max))
657	file.write("#slit_height=%g\n" % self.slit_height)
658	file.write("#slit_width=%g\n" % self.slit_width)
659	file.write("#background=%g\n" % self.background)
660	if self.est_bck:
661	file.write("#has_bck=1\n")
662	else:
663	file.write("#has_bck=0\n")
664	file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
665	if self.out is not None:
666	if len(self.out) == len(self.cov):
667	for i in range(len(self.out)):
668	file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
669	str(self.cov[i][i])))
670	file.write("<r> <Pr> <dPr>\n")
671	r = np.arange(0.0, self.d_max, self.d_max / npts)
672
673	for r_i in r:
674	(value, err) = self.pr_err(self.out, self.cov, r_i)
675	file.write("%g %g %g\n" % (r_i, value, err))
676
677	file.close()
678
679	def from_file(self, path):
680	"""
681	Load the state of the Invertor from a file,
682	to be able to generate P(r) from a set of
683	parameters.
684
685	:param path: path of the file to load
686
687	"""
688	#import os
689	#import re
690	if os.path.isfile(path):
691	try:
692	fd = open(path, 'r')
693
694	buff = fd.read()
695	lines = buff.split('\n')
696	for line in lines:
697	if line.startswith('#d_max='):
698	toks = line.split('=')
699	self.d_max = float(toks[1])
700	elif line.startswith('#nfunc='):
701	toks = line.split('=')
702	self.nfunc = int(toks[1])
703	self.out = np.zeros(self.nfunc)
704	self.cov = np.zeros([self.nfunc, self.nfunc])
705	elif line.startswith('#alpha='):
706	toks = line.split('=')
707	self.alpha = float(toks[1])
708	elif line.startswith('#chi2='):
709	toks = line.split('=')
710	self.chi2 = float(toks[1])
711	elif line.startswith('#elapsed='):
712	toks = line.split('=')
713	self.elapsed = float(toks[1])
714	elif line.startswith('#alpha_estimate='):
715	toks = line.split('=')
716	self.suggested_alpha = float(toks[1])
717	elif line.startswith('#qmin='):
718	toks = line.split('=')
719	try:
720	self.q_min = float(toks[1])
721	except:
722	self.q_min = None
723	elif line.startswith('#qmax='):
724	toks = line.split('=')
725	try:
726	self.q_max = float(toks[1])
727	except:
728	self.q_max = None
729	elif line.startswith('#slit_height='):
730	toks = line.split('=')
731	self.slit_height = float(toks[1])
732	elif line.startswith('#slit_width='):
733	toks = line.split('=')
734	self.slit_width = float(toks[1])
735	elif line.startswith('#background='):
736	toks = line.split('=')
737	self.background = float(toks[1])
738	elif line.startswith('#has_bck='):
739	toks = line.split('=')
740	self.est_bck = int(toks[1]) == 1
741
742	# Now read in the parameters
743	elif line.startswith('#C_'):
744	toks = line.split('=')
745	p = re.compile('#C_([0-9]+)')
746	m = p.search(toks[0])
747	toks2 = toks[1].split('+-')
748	i = int(m.group(1))
749	self.out[i] = float(toks2[0])
750
751	self.cov[i][i] = float(toks2[1])
752
753	except Exception as exc:
754	msg = "Invertor.from_file: corrupted file\n%s" % exc
755	raise RuntimeError(msg)
756	else:
757	msg = "Invertor.from_file: '%s' is not a file" % str(path)
758	raise RuntimeError(msg)

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/src/sas/sascalc/pr/invertor.py @ 0dde203

Download in other formats: