1 | # This program is public domain |
---|
2 | """ |
---|
3 | Park 1-D data objects. |
---|
4 | |
---|
5 | The class Data1D represents simple 1-D data objects, along with |
---|
6 | an ascii file loader. This format will work well for many |
---|
7 | uses, but it is likely that more specialized models will have |
---|
8 | their own data file formats. |
---|
9 | |
---|
10 | The minimal data format for park must supply the following |
---|
11 | methods: |
---|
12 | |
---|
13 | residuals(fn) |
---|
14 | returns the residuals vector for the model function. |
---|
15 | residuals_deriv(fn_deriv,par) |
---|
16 | returns the residuals vector for the model function, and |
---|
17 | for the derivatives with respect to the given parameters. |
---|
18 | |
---|
19 | The function passed is going to be model.eval or in the case |
---|
20 | where derivatives are available, model.eval_deriv. Normally |
---|
21 | this will take a vector of dependent variables and return the |
---|
22 | theory function for that vector but this is only convention. |
---|
23 | The fitting service only uses the parameter set and the residuals |
---|
24 | method from the model. |
---|
25 | |
---|
26 | The park GUI will make more demands on the interface, but the |
---|
27 | details are not yet resolved. |
---|
28 | """ |
---|
29 | from __future__ import with_statement # Used only in test() |
---|
30 | |
---|
31 | __all__ = ['Data1D'] |
---|
32 | |
---|
33 | import numpy |
---|
34 | |
---|
35 | try: |
---|
36 | from park._modeling import convolve as _convolve |
---|
37 | def convolve(xi,yi,x,dx): |
---|
38 | """ |
---|
39 | Return convolution y of width dx at points x based on the |
---|
40 | sampled input function yi = f(xi). |
---|
41 | """ |
---|
42 | y = numpy.empty(x.shape,'d') |
---|
43 | xi,yi,x,dx = [numpy.ascontiguousarray(v,'d') for v in xi,yi,x,dx] |
---|
44 | _convolve(xi,yi,x,dx,y) |
---|
45 | return y |
---|
46 | except: |
---|
47 | def convolve(*args,**kw): |
---|
48 | """ |
---|
49 | Return convolution y of width dx at points x based on the |
---|
50 | sampled input function yi = f(xi). |
---|
51 | |
---|
52 | Note: C version is not available in this build, and python |
---|
53 | version is not implemented. |
---|
54 | """ |
---|
55 | raise NotImplementedError("convolve is a compiled function") |
---|
56 | |
---|
57 | |
---|
58 | __all__ = ['Data1D'] |
---|
59 | |
---|
60 | class Data1D(object): |
---|
61 | """ |
---|
62 | Data representation for 1-D fitting. |
---|
63 | |
---|
64 | Attributes |
---|
65 | |
---|
66 | filename |
---|
67 | The source of the data. This may be the empty string if the |
---|
68 | data is simulation data. |
---|
69 | x,y,dy |
---|
70 | The data values. |
---|
71 | x is the measurement points of data to be fitted. x must be sorted. |
---|
72 | y is the measured value |
---|
73 | dy is the measurement uncertainty. |
---|
74 | dx |
---|
75 | Resolution at the the measured points. The resolution may be 0, |
---|
76 | constant, or defined for each data point. dx is the 1-sigma |
---|
77 | width of the Gaussian resolution function at point x. Note that |
---|
78 | dx_FWHM = sqrt(8 ln 2) dx_sigma, so scale dx appropriately. |
---|
79 | |
---|
80 | |
---|
81 | fit_x,fit_dx,fit_y,fit_dy |
---|
82 | The points used in evaluating the residuals. |
---|
83 | calc_x |
---|
84 | The points at which to evaluate the theory function. This may be |
---|
85 | different from the measured points for a number of reasons, such |
---|
86 | as a resolution function which suggests over or under sampling of |
---|
87 | the points (see below). By default calc_x is x, but it can be set |
---|
88 | explicitly by the user. |
---|
89 | calc_y, fx |
---|
90 | The value of the function at the theory points, and the value of |
---|
91 | the function after resolution has been applied. These values are |
---|
92 | computed by a call to residuals. |
---|
93 | |
---|
94 | Notes on calc_x |
---|
95 | |
---|
96 | The contribution of Q to a resolution of width dQo at point Qo is:: |
---|
97 | |
---|
98 | p(Q) = 1/sqrt(2 pi dQo**2) exp ( (Q-Qo)**2/(2 dQo**2) ) |
---|
99 | |
---|
100 | We are approximating the convolution at Qo using a numerical |
---|
101 | approximation to the integral over the measured points, with the |
---|
102 | integral is limited to p(Q_i)/p(0)>=0.001. |
---|
103 | |
---|
104 | Sometimes the function we are convoluting is rapidly changing. |
---|
105 | That means the correct convolution should uniformly sample across |
---|
106 | the entire width of the Gaussian. This is not possible at the |
---|
107 | end points unless you calculate the theory function beyond what is |
---|
108 | strictly needed for the data. For a given dQ and step size, |
---|
109 | you need enough steps that the following is true:: |
---|
110 | |
---|
111 | (n*step)**2 > -2 dQ**2 * ln 0.001 |
---|
112 | |
---|
113 | The choice of sampling density is particularly important near |
---|
114 | critical points where the shape of the function changes. In |
---|
115 | reflectometry, the function goes from flat below the critical |
---|
116 | edge to O(Q**4) above. In one particular model, calculating every |
---|
117 | 0.005 rather than every 0.02 changed a value above the critical |
---|
118 | edge by 15%. In a fitting program, this would lead to a somewhat |
---|
119 | larger estimate of the critical edge for this sample. |
---|
120 | |
---|
121 | Sometimes the theory function is oscillating more rapidly than |
---|
122 | the instrument can resolve. This happens for example in reflectivity |
---|
123 | measurements involving thick layers. In these systems, the theory |
---|
124 | function should be oversampled around the measured points Q. With |
---|
125 | a single thick layer, oversampling can be limited to just one |
---|
126 | period 2 pi/d. With multiple thick layers, oscillations will |
---|
127 | show interference patterns and it will be necessary to oversample |
---|
128 | uniformly through the entire width of the resolution. If this is |
---|
129 | not accommodated, then aliasing effects make it difficult to |
---|
130 | compute the correct model. |
---|
131 | """ |
---|
132 | filename = "" |
---|
133 | x,y = None,None |
---|
134 | dx,dy = 0,1 |
---|
135 | calc_x,calc_y = None,None |
---|
136 | fit_x,fit_y = None,None |
---|
137 | fit_dx,fit_dy = 0,1 |
---|
138 | fx = None |
---|
139 | |
---|
140 | def __init__(self,filename="",x=None,y=None,dx=0,dy=1): |
---|
141 | """ |
---|
142 | Define the fitting data. |
---|
143 | |
---|
144 | Data can be loaded from a file using filename or |
---|
145 | specified directly using x,y,dx,dy. File loading |
---|
146 | happens after assignment of x,y,dx,dy. |
---|
147 | """ |
---|
148 | self.x,self.y,self.dx,self.dy = x,y,dx,dy |
---|
149 | self.select(None) |
---|
150 | if filename: self.load(filename) |
---|
151 | |
---|
152 | def resample(self,minstep=None): |
---|
153 | """ |
---|
154 | Over/under sampling support. |
---|
155 | |
---|
156 | Compute the calc_x points required to adequately sample |
---|
157 | the function y=f(x) so that the value reported for each |
---|
158 | measured point is supported by the resolution. minstep |
---|
159 | is the minimum allowed sampling density that should be |
---|
160 | used. |
---|
161 | """ |
---|
162 | self.calc_x = resample(self.x,self.dx,minstep) |
---|
163 | |
---|
164 | def load(self, filename, **kw): |
---|
165 | """ |
---|
166 | Load a multicolumn datafile. |
---|
167 | |
---|
168 | Data should be in columns, with the following defaults:: |
---|
169 | |
---|
170 | x,y or x,y,dy or x,dx,y,dy |
---|
171 | |
---|
172 | Note that this resets the selected fitting points calc_x and the |
---|
173 | computed results calc_y and fx. |
---|
174 | |
---|
175 | Data is sorted after loading. |
---|
176 | |
---|
177 | Any extra keyword arguments are passed to the numpy loadtxt |
---|
178 | function. This allows you to select the columns you want, |
---|
179 | skip rows, set the column separator, change the comment character, |
---|
180 | amongst other things. |
---|
181 | """ |
---|
182 | self.dx,self.dy = 0,1 |
---|
183 | self.calc_x,self.calc_y,self.fx = None,None,None |
---|
184 | if filename: |
---|
185 | columns = numpy.loadtxt(filename, **kw) |
---|
186 | self.x = columns[:,0] |
---|
187 | if columns.shape[1]==4: |
---|
188 | self.dx = columns[:,1] |
---|
189 | self.y = columns[:,2] |
---|
190 | self.dy = columns[:,3] |
---|
191 | elif columns.shape[1]==3: |
---|
192 | self.dx = 0 |
---|
193 | self.y = columns[:,1] |
---|
194 | self.dy = columns[:,2] |
---|
195 | elif columns.shape[1]==2: |
---|
196 | self.dx = 0 |
---|
197 | self.y = columns[:,1] |
---|
198 | self.dy = 1 |
---|
199 | else: |
---|
200 | raise IOError,"Unexpected number of columns in "+filename |
---|
201 | idx = numpy.argsort(self.x) |
---|
202 | self.x,self.y = self.x[idx],self.y[idx] |
---|
203 | if not numpy.isscalar(self.dx): self.dx = self.dx[idx] |
---|
204 | if not numpy.isscalar(self.dy): self.dy = self.dy[idx] |
---|
205 | else: |
---|
206 | self.x,self.dx,self.y,self.dy = None,None,None,None |
---|
207 | |
---|
208 | self.filename = filename |
---|
209 | self.select(None) |
---|
210 | |
---|
211 | def select(self, idx): |
---|
212 | """ |
---|
213 | A selection vector for points to use in the evaluation of the |
---|
214 | residuals, or None if all points are to be used. |
---|
215 | """ |
---|
216 | if idx is not None: |
---|
217 | self.fit_x = self.x[idx] |
---|
218 | self.fit_y = self.y[idx] |
---|
219 | if not numpy.isscalar(self.dx): self.fit_dx = self.dx[idx] |
---|
220 | if not numpy.isscalar(self.dy): self.fit_dy = self.dy[idx] |
---|
221 | else: |
---|
222 | self.fit_x = self.x |
---|
223 | self.fit_dx = self.dx |
---|
224 | self.fit_y = self.y |
---|
225 | self.fit_dy = self.dy |
---|
226 | |
---|
227 | def residuals(self, fn): |
---|
228 | """ |
---|
229 | Compute the residuals of the data wrt to the given function. |
---|
230 | |
---|
231 | y = fn(x) should be a callable accepting a list of points at which |
---|
232 | to calculate the function, returning the values at those |
---|
233 | points. |
---|
234 | |
---|
235 | Any resolution function will be applied after the theory points |
---|
236 | are calculated. |
---|
237 | """ |
---|
238 | calc_x = self.calc_x if self.calc_x else self.fit_x |
---|
239 | self.calc_y = fn(calc_x) |
---|
240 | self.fx = resolution(calc_x,self.calc_y,self.fit_x,self.fit_dx) |
---|
241 | return (self.fit_y - self.fx)/self.fit_dy |
---|
242 | |
---|
243 | def residuals_deriv(self, fn, pars=[]): |
---|
244 | """ |
---|
245 | Compute residuals and derivatives wrt the given parameters. |
---|
246 | |
---|
247 | fdf = fn(x,pars=pars) should be a callable accepting a list |
---|
248 | of points at which to calculate the function and a keyword |
---|
249 | argument listing the parameters for which the derivative will |
---|
250 | be calculated. |
---|
251 | |
---|
252 | Returns a list of the residuals and the derivative wrt the |
---|
253 | parameter for each parameter. |
---|
254 | |
---|
255 | Any resolution function will be applied after the theory points |
---|
256 | and derivatives are calculated. |
---|
257 | """ |
---|
258 | calc_x = self.calc_x if self.calc_x else self.fit_x |
---|
259 | |
---|
260 | # Compute f and derivatives |
---|
261 | fdf = fn(calc_x,pars=pars) |
---|
262 | self.calc_y = fdf[0] |
---|
263 | |
---|
264 | # Apply resolution |
---|
265 | fdf = [resolution(calc_x,y,self.fit_x,self.fit_dx) for y in fdf] |
---|
266 | self.fx = fdf[0] |
---|
267 | delta = (self.fx-self.fit_x)/self.fit_dy |
---|
268 | raise RuntimeError('check whether we want df/dp or dR/dp where R=residuals^2') |
---|
269 | |
---|
270 | # R = (F(x;p)-y)/sigma => dR/dp = 1/sigma dF(x;p)/dp |
---|
271 | # dR^2/dp = 2 R /sigma dF(x;p)/dp |
---|
272 | df = [ v/self.fit_dy for v in fdf_res[1:] ] |
---|
273 | |
---|
274 | return [delta]+df |
---|
275 | |
---|
276 | def equal_spaced(x,tol=1e-5): |
---|
277 | """ |
---|
278 | Return true if data is regularly spaced within tolerance. Tolerance |
---|
279 | uses relative error. |
---|
280 | """ |
---|
281 | step = numpy.diff(x) |
---|
282 | step_bar = numpy.mean(step) |
---|
283 | return (abs(step-step_bar) < tol*step_bar).all() |
---|
284 | |
---|
285 | def resample(x,dx,minstep): |
---|
286 | """ |
---|
287 | Defining the minimum support basis. |
---|
288 | |
---|
289 | Compute the calc_x points required to adequately sample |
---|
290 | the function y=f(x) so that the value reported for each |
---|
291 | measured point is supported by the resolution. minstep |
---|
292 | is the minimum allowed sampling density that should be used. |
---|
293 | """ |
---|
294 | raise NotImplementedError |
---|
295 | |
---|
296 | def resolution(calcx,calcy,fitx,fitdx): |
---|
297 | """ |
---|
298 | Apply resolution function. If there is no resolution function, then |
---|
299 | interpolate from the calculated points to the desired theory points. |
---|
300 | If the data are irregular, use a brute force convolution function. |
---|
301 | |
---|
302 | If the data are regular and the resolution is fixed, then you can |
---|
303 | deconvolute the data before fitting, saving time and complexity. |
---|
304 | """ |
---|
305 | if numpy.any(fitdx != 0): |
---|
306 | if numpy.isscalar(fitdx): |
---|
307 | fitdx = numpy.ones(fitx.shape)*fitdx |
---|
308 | fx = convolve(calcx, calcy, fitx, fitdx) |
---|
309 | elif calcx is fitx: |
---|
310 | fx = calcy |
---|
311 | else: |
---|
312 | fx = numpy.interp(fitx,calcx,calcy) |
---|
313 | return fx |
---|
314 | |
---|
315 | |
---|
316 | class TempData: |
---|
317 | """ |
---|
318 | Create a temporary file with a given data set and remove it when done. |
---|
319 | |
---|
320 | Example:: |
---|
321 | |
---|
322 | from __future__ import with_statement |
---|
323 | ... |
---|
324 | with TempData("1 2 3\n4 5 6") as filename: |
---|
325 | # run tests of loading from filename |
---|
326 | |
---|
327 | This class is useful for testing data file readers. |
---|
328 | """ |
---|
329 | def __init__(self,data,suffix='.dat',prefix='',text=True): |
---|
330 | import os,tempfile |
---|
331 | fid,self.filename = tempfile.mkstemp('.dat',prefix,text=True) |
---|
332 | os.write(fid,data) |
---|
333 | os.close(fid) |
---|
334 | def __enter__(self): |
---|
335 | return self.filename |
---|
336 | def __exit__(self, exc_type, exc_value, traceback): |
---|
337 | import os |
---|
338 | os.unlink(self.filename) |
---|
339 | |
---|
340 | D2 = "# x y\n1 1\n2 2\n3 4\n2 5\n" |
---|
341 | """x,y dataset for TempData""" |
---|
342 | D3 = "# x y dy\n1 1 .1\n2 2 .2\n3 4 .4\n2 5 .5\n" |
---|
343 | """x,y,dy dataset for TempData""" |
---|
344 | D4 = "# x dx y dy\n1 .1 1 .1\n2 .2 2 .2\n3 .3 4 .4\n2 .3 5 .5\n" |
---|
345 | """x,dx,y,dy dataset for TempData""" |
---|
346 | |
---|
347 | def test(): |
---|
348 | import os |
---|
349 | import numpy |
---|
350 | |
---|
351 | # Check that two column data loading works |
---|
352 | with TempData(D2) as filename: |
---|
353 | data = Data1D(filename=filename) |
---|
354 | assert numpy.all(data.x == [1,2,2,3]) |
---|
355 | assert numpy.all(data.y == [1,2,5,4]) |
---|
356 | assert data.dx == 0 |
---|
357 | assert data.dy == 1 |
---|
358 | assert data.calc_x is None |
---|
359 | assert data.residuals(lambda x: x)[3] == 1 |
---|
360 | |
---|
361 | # Check that interpolation works |
---|
362 | data.calc_x = [1,1.5,3] |
---|
363 | assert data.residuals(lambda x: x)[1] == 0 |
---|
364 | assert numpy.all(data.calc_y == data.calc_x) |
---|
365 | # Note: calc_y is updated by data.residuals, so be careful with this test |
---|
366 | |
---|
367 | # Check that three column data loading works |
---|
368 | with TempData(D3) as filename: |
---|
369 | data = Data1D(filename=filename) |
---|
370 | assert numpy.all(data.x == [1,2,2,3]) |
---|
371 | assert numpy.all(data.y == [1,2,5,4]) |
---|
372 | assert numpy.all(data.dy == [.1,.2,.5,.4]) |
---|
373 | assert data.dx == 0 |
---|
374 | assert data.calc_x is None |
---|
375 | assert data.residuals(lambda x: x)[3] == 1/.4 |
---|
376 | |
---|
377 | # Check that four column data loading works |
---|
378 | with TempData(D4) as filename: |
---|
379 | data = Data1D(filename=filename) |
---|
380 | assert numpy.all(data.x == [1,2,2,3]) |
---|
381 | assert numpy.all(data.dx == [.1,.2,.3,.3]) |
---|
382 | assert numpy.all(data.y == [1,2,5,4]) |
---|
383 | assert numpy.all(data.dy == [.1,.2,.5,.4]) |
---|
384 | |
---|
385 | # Test residuals |
---|
386 | print "Fix the convolution function!" |
---|
387 | print data.residuals(lambda x: x) |
---|
388 | assert data.calc_x is None |
---|
389 | |
---|
390 | if __name__ == "__main__": test() |
---|