data.py @ c668b1b

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload

Last change on this file since c668b1b was 3570545, checked in by Mathieu Doucet <doucetm@…>, 13 years ago
Adding park Part 2
Property mode set to `100644`
File size: 13.6 KB

Rev	Line
[3570545]	1	# This program is public domain
	2	"""
	3	Park 1-D data objects.
	4
	5	The class Data1D represents simple 1-D data objects, along with
	6	an ascii file loader. This format will work well for many
	7	uses, but it is likely that more specialized models will have
	8	their own data file formats.
	9
	10	The minimal data format for park must supply the following
	11	methods:
	12
	13	residuals(fn)
	14	returns the residuals vector for the model function.
	15	residuals_deriv(fn_deriv,par)
	16	returns the residuals vector for the model function, and
	17	for the derivatives with respect to the given parameters.
	18
	19	The function passed is going to be model.eval or in the case
	20	where derivatives are available, model.eval_deriv. Normally
	21	this will take a vector of dependent variables and return the
	22	theory function for that vector but this is only convention.
	23	The fitting service only uses the parameter set and the residuals
	24	method from the model.
	25
	26	The park GUI will make more demands on the interface, but the
	27	details are not yet resolved.
	28	"""
	29	from __future__ import with_statement # Used only in test()
	30
	31	__all__ = ['Data1D']
	32
	33	import numpy
	34
	35	try:
	36	from park._modeling import convolve as _convolve
	37	def convolve(xi,yi,x,dx):
	38	"""
	39	Return convolution y of width dx at points x based on the
	40	sampled input function yi = f(xi).
	41	"""
	42	y = numpy.empty(x.shape,'d')
	43	xi,yi,x,dx = [numpy.ascontiguousarray(v,'d') for v in xi,yi,x,dx]
	44	_convolve(xi,yi,x,dx,y)
	45	return y
	46	except:
	47	def convolve(args,*kw):
	48	"""
	49	Return convolution y of width dx at points x based on the
	50	sampled input function yi = f(xi).
	51
	52	Note: C version is not available in this build, and python
	53	version is not implemented.
	54	"""
	55	raise NotImplementedError("convolve is a compiled function")
	56
	57
	58	__all__ = ['Data1D']
	59
	60	class Data1D(object):
	61	"""
	62	Data representation for 1-D fitting.
	63
	64	Attributes
	65	==========
	66
	67	filename
	68	The source of the data. This may be the empty string if the
	69	data is simulation data.
	70	x,y,dy
	71	The data values.
	72	x is the measurement points of data to be fitted. x must be sorted.
	73	y is the measured value
	74	dy is the measurement uncertainty.
	75	dx
	76	Resolution at the the measured points. The resolution may be 0,
	77	constant, or defined for each data point. dx is the 1-sigma
	78	width of the Gaussian resolution function at point x. Note that
	79	dx_FWHM = sqrt(8 ln 2) dx_sigma, so scale dx appropriately.
	80
	81
	82	fit_x,fit_dx,fit_y,fit_dy
	83	The points used in evaluating the residuals.
	84	calc_x
	85	The points at which to evaluate the theory function. This may be
	86	different from the measured points for a number of reasons, such
	87	as a resolution function which suggests over or under sampling of
	88	the points (see below). By default calc_x is x, but it can be set
	89	explicitly by the user.
	90	calc_y, fx
	91	The value of the function at the theory points, and the value of
	92	the function after resolution has been applied. These values are
	93	computed by a call to residuals.
	94
	95	Notes on calc_x
	96	===============
	97
	98	The contribution of Q to a resolution of width dQo at point Qo is::
	99
	100	p(Q) = 1/sqrt(2 pi dQo2) exp ( (Q-Qo)2/(2 dQo**2) )
	101
	102	We are approximating the convolution at Qo using a numerical
	103	approximation to the integral over the measured points, with the
	104	integral is limited to p(Q_i)/p(0)>=0.001.
	105
	106	Sometimes the function we are convoluting is rapidly changing.
	107	That means the correct convolution should uniformly sample across
	108	the entire width of the Gaussian. This is not possible at the
	109	end points unless you calculate the theory function beyond what is
	110	strictly needed for the data. For a given dQ and step size,
	111	you need enough steps that the following is true::
	112
	113	(nstep)2 > -2 dQ2 ln 0.001
	114
	115	The choice of sampling density is particularly important near
	116	critical points where the shape of the function changes. In
	117	reflectometry, the function goes from flat below the critical
	118	edge to O(Q**4) above. In one particular model, calculating every
	119	0.005 rather than every 0.02 changed a value above the critical
	120	edge by 15%. In a fitting program, this would lead to a somewhat
	121	larger estimate of the critical edge for this sample.
	122
	123	Sometimes the theory function is oscillating more rapidly than
	124	the instrument can resolve. This happens for example in reflectivity
	125	measurements involving thick layers. In these systems, the theory
	126	function should be oversampled around the measured points Q. With
	127	a single thick layer, oversampling can be limited to just one
	128	period 2 pi/d. With multiple thick layers, oscillations will
	129	show interference patterns and it will be necessary to oversample
	130	uniformly through the entire width of the resolution. If this is
	131	not accommodated, then aliasing effects make it difficult to
	132	compute the correct model.
	133	"""
	134	filename = ""
	135	x,y = None,None
	136	dx,dy = 0,1
	137	calc_x,calc_y = None,None
	138	fit_x,fit_y = None,None
	139	fit_dx,fit_dy = 0,1
	140	fx = None
	141
	142	def __init__(self,filename="",x=None,y=None,dx=0,dy=1):
	143	"""
	144	Define the fitting data.
	145
	146	Data can be loaded from a file using filename or
	147	specified directly using x,y,dx,dy. File loading
	148	happens after assignment of x,y,dx,dy.
	149	"""
	150	self.x,self.y,self.dx,self.dy = x,y,dx,dy
	151	self.select(None)
	152	if filename: self.load(filename)
	153
	154	def resample(self,minstep=None):
	155	"""
	156	Over/under sampling support.
	157
	158	Compute the calc_x points required to adequately sample
	159	the function y=f(x) so that the value reported for each
	160	measured point is supported by the resolution. minstep
	161	is the minimum allowed sampling density that should be
	162	used.
	163	"""
	164	self.calc_x = resample(self.x,self.dx,minstep)
	165
	166	def load(self, filename, **kw):
	167	"""
	168	Load a multicolumn datafile.
	169
	170	Data should be in columns, with the following defaults::
	171
	172	x,y or x,y,dy or x,dx,y,dy
	173
	174	Note that this resets the selected fitting points calc_x and the
	175	computed results calc_y and fx.
	176
	177	Data is sorted after loading.
	178
	179	Any extra keyword arguments are passed to the numpy loadtxt
	180	function. This allows you to select the columns you want,
	181	skip rows, set the column separator, change the comment character,
	182	amongst other things.
	183	"""
	184	self.dx,self.dy = 0,1
	185	self.calc_x,self.calc_y,self.fx = None,None,None
	186	if filename:
	187	columns = numpy.loadtxt(filename, **kw)
	188	self.x = columns[:,0]
	189	if columns.shape[1]==4:
	190	self.dx = columns[:,1]
	191	self.y = columns[:,2]
	192	self.dy = columns[:,3]
	193	elif columns.shape[1]==3:
	194	self.dx = 0
	195	self.y = columns[:,1]
	196	self.dy = columns[:,2]
	197	elif columns.shape[1]==2:
	198	self.dx = 0
	199	self.y = columns[:,1]
	200	self.dy = 1
	201	else:
	202	raise IOError,"Unexpected number of columns in "+filename
	203	idx = numpy.argsort(self.x)
	204	self.x,self.y = self.x[idx],self.y[idx]
	205	if not numpy.isscalar(self.dx): self.dx = self.dx[idx]
	206	if not numpy.isscalar(self.dy): self.dy = self.dy[idx]
	207	else:
	208	self.x,self.dx,self.y,self.dy = None,None,None,None
	209
	210	self.filename = filename
	211	self.select(None)
	212
	213	def select(self, idx):
	214	"""
	215	A selection vector for points to use in the evaluation of the
	216	residuals, or None if all points are to be used.
	217	"""
	218	if idx is not None:
	219	self.fit_x = self.x[idx]
	220	self.fit_y = self.y[idx]
	221	if not numpy.isscalar(self.dx): self.fit_dx = self.dx[idx]
	222	if not numpy.isscalar(self.dy): self.fit_dy = self.dy[idx]
	223	else:
	224	self.fit_x = self.x
	225	self.fit_dx = self.dx
	226	self.fit_y = self.y
	227	self.fit_dy = self.dy
	228
	229	def residuals(self, fn):
	230	"""
	231	Compute the residuals of the data wrt to the given function.
	232
	233	y = fn(x) should be a callable accepting a list of points at which
	234	to calculate the function, returning the values at those
	235	points.
	236
	237	Any resolution function will be applied after the theory points
	238	are calculated.
	239	"""
	240	calc_x = self.calc_x if self.calc_x else self.fit_x
	241	self.calc_y = fn(calc_x)
	242	self.fx = resolution(calc_x,self.calc_y,self.fit_x,self.fit_dx)
	243	return (self.fit_y - self.fx)/self.fit_dy
	244
	245	def residuals_deriv(self, fn, pars=[]):
	246	"""
	247	Compute residuals and derivatives wrt the given parameters.
	248
	249	fdf = fn(x,pars=pars) should be a callable accepting a list
	250	of points at which to calculate the function and a keyword
	251	argument listing the parameters for which the derivative will
	252	be calculated.
	253
	254	Returns a list of the residuals and the derivative wrt the
	255	parameter for each parameter.
	256
	257	Any resolution function will be applied after the theory points
	258	and derivatives are calculated.
	259	"""
	260	calc_x = self.calc_x if self.calc_x else self.fit_x
	261
	262	# Compute f and derivatives
	263	fdf = fn(calc_x,pars=pars)
	264	self.calc_y = fdf[0]
	265
	266	# Apply resolution
	267	fdf = [resolution(calc_x,y,self.fit_x,self.fit_dx) for y in fdf]
	268	self.fx = fdf[0]
	269	delta = (self.fx-self.fit_x)/self.fit_dy
	270	raise RuntimeError('check whether we want df/dp or dR/dp where R=residuals^2')
	271
	272	# R = (F(x;p)-y)/sigma => dR/dp = 1/sigma dF(x;p)/dp
	273	# dR^2/dp = 2 R /sigma dF(x;p)/dp
	274	df = [ v/self.fit_dy for v in fdf_res[1:] ]
	275
	276	return [delta]+df
	277
	278	def equal_spaced(x,tol=1e-5):
	279	"""
	280	Return true if data is regularly spaced within tolerance. Tolerance
	281	uses relative error.
	282	"""
	283	step = numpy.diff(x)
	284	step_bar = numpy.mean(step)
	285	return (abs(step-step_bar) < tol*step_bar).all()
	286
	287	def resample(x,dx,minstep):
	288	"""
	289	Defining the minimum support basis.
	290
	291	Compute the calc_x points required to adequately sample
	292	the function y=f(x) so that the value reported for each
	293	measured point is supported by the resolution. minstep
	294	is the minimum allowed sampling density that should be used.
	295	"""
	296	raise NotImplementedError
	297
	298	def resolution(calcx,calcy,fitx,fitdx):
	299	"""
	300	Apply resolution function. If there is no resolution function, then
	301	interpolate from the calculated points to the desired theory points.
	302	If the data are irregular, use a brute force convolution function.
	303
	304	If the data are regular and the resolution is fixed, then you can
	305	deconvolute the data before fitting, saving time and complexity.
	306	"""
	307	if numpy.any(fitdx != 0):
	308	if numpy.isscalar(fitdx):
	309	fitdx = numpy.ones(fitx.shape)*fitdx
	310	fx = convolve(calcx, calcy, fitx, fitdx)
	311	elif calcx is fitx:
	312	fx = calcy
	313	else:
	314	fx = numpy.interp(fitx,calcx,calcy)
	315	return fx
	316
	317
	318	class TempData:
	319	"""
	320	Create a temporary file with a given data set and remove it when done.
	321
	322	Example::
	323
	324	from __future__ import with_statement
	325	...
	326	with TempData("1 2 3\n4 5 6") as filename:
	327	# run tests of loading from filename
	328
	329	This class is useful for testing data file readers.
	330	"""
	331	def __init__(self,data,suffix='.dat',prefix='',text=True):
	332	import os,tempfile
	333	fid,self.filename = tempfile.mkstemp('.dat',prefix,text=True)
	334	os.write(fid,data)
	335	os.close(fid)
	336	def __enter__(self):
	337	return self.filename
	338	def __exit__(self, exc_type, exc_value, traceback):
	339	import os
	340	os.unlink(self.filename)
	341
	342	D2 = "# x y\n1 1\n2 2\n3 4\n2 5\n"
	343	"""x,y dataset for TempData"""
	344	D3 = "# x y dy\n1 1 .1\n2 2 .2\n3 4 .4\n2 5 .5\n"
	345	"""x,y,dy dataset for TempData"""
	346	D4 = "# x dx y dy\n1 .1 1 .1\n2 .2 2 .2\n3 .3 4 .4\n2 .3 5 .5\n"
	347	"""x,dx,y,dy dataset for TempData"""
	348
	349	def test():
	350	import os
	351	import numpy
	352
	353	# Check that two column data loading works
	354	with TempData(D2) as filename:
	355	data = Data1D(filename=filename)
	356	assert numpy.all(data.x == [1,2,2,3])
	357	assert numpy.all(data.y == [1,2,5,4])
	358	assert data.dx == 0
	359	assert data.dy == 1
	360	assert data.calc_x is None
	361	assert data.residuals(lambda x: x)[3] == 1
	362
	363	# Check that interpolation works
	364	data.calc_x = [1,1.5,3]
	365	assert data.residuals(lambda x: x)[1] == 0
	366	assert numpy.all(data.calc_y == data.calc_x)
	367	# Note: calc_y is updated by data.residuals, so be careful with this test
	368
	369	# Check that three column data loading works
	370	with TempData(D3) as filename:
	371	data = Data1D(filename=filename)
	372	assert numpy.all(data.x == [1,2,2,3])
	373	assert numpy.all(data.y == [1,2,5,4])
	374	assert numpy.all(data.dy == [.1,.2,.5,.4])
	375	assert data.dx == 0
	376	assert data.calc_x is None
	377	assert data.residuals(lambda x: x)[3] == 1/.4
	378
	379	# Check that four column data loading works
	380	with TempData(D4) as filename:
	381	data = Data1D(filename=filename)
	382	assert numpy.all(data.x == [1,2,2,3])
	383	assert numpy.all(data.dx == [.1,.2,.3,.3])
	384	assert numpy.all(data.y == [1,2,5,4])
	385	assert numpy.all(data.dy == [.1,.2,.5,.4])
	386
	387	# Test residuals
	388	print "Fix the convolution function!"
	389	print data.residuals(lambda x: x)
	390	assert data.calc_x is None
	391
	392	if __name__ == "__main__": test()

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasview/park-1.2.1/park/data.py @ c668b1b

Download in other formats: