source: sasview/park-1.2.1/park/data.py @ 4a47244

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 4a47244 was 3570545, checked in by Mathieu Doucet <doucetm@…>, 13 years ago

Adding park Part 2

  • Property mode set to 100644
File size: 13.6 KB
Line 
1# This program is public domain
2"""
3Park 1-D data objects.
4
5The class Data1D represents simple 1-D data objects, along with
6an ascii file loader.  This format will work well for many
7uses, but it is likely that more specialized models will have
8their own data file formats.
9
10The minimal data format for park must supply the following
11methods:
12
13    residuals(fn)
14        returns the residuals vector for the model function.
15    residuals_deriv(fn_deriv,par)
16        returns the residuals vector for the model function, and
17        for the derivatives with respect to the given parameters.
18
19The function passed is going to be model.eval or in the case
20where derivatives are available, model.eval_deriv.  Normally
21this will take a vector of dependent variables and return the
22theory function for that vector but this is only convention.
23The fitting service only uses the parameter set and the residuals
24method from the model.
25
26The park GUI will make more demands on the interface, but the
27details are not yet resolved.
28"""
29from __future__ import with_statement  # Used only in test()
30
31__all__ = ['Data1D']
32
33import numpy
34
35try:
36    from park._modeling import convolve as _convolve
37    def convolve(xi,yi,x,dx):
38        """
39        Return convolution y of width dx at points x based on the
40        sampled input function yi = f(xi).
41        """
42        y = numpy.empty(x.shape,'d')
43        xi,yi,x,dx = [numpy.ascontiguousarray(v,'d') for v in xi,yi,x,dx]
44        _convolve(xi,yi,x,dx,y)
45        return y
46except:
47    def convolve(*args,**kw):
48        """
49        Return convolution y of width dx at points x based on the
50        sampled input function yi = f(xi).
51       
52        Note: C version is not available in this build, and python
53        version is not implemented.
54        """
55        raise NotImplementedError("convolve is a compiled function")
56   
57
58__all__ = ['Data1D']
59   
60class Data1D(object):
61    """
62    Data representation for 1-D fitting.
63
64    Attributes
65    ==========
66   
67    filename
68        The source of the data.  This may be the empty string if the
69        data is simulation data.
70    x,y,dy
71        The data values.
72        x is the measurement points of data to be fitted. x must be sorted.
73        y is the measured value
74        dy is the measurement uncertainty.
75    dx
76        Resolution at the the measured points.  The resolution may be 0,
77        constant, or defined for each data point.  dx is the 1-sigma
78        width of the Gaussian resolution function at point x.  Note that
79        dx_FWHM = sqrt(8 ln 2) dx_sigma, so scale dx appropriately.
80
81
82    fit_x,fit_dx,fit_y,fit_dy
83        The points used in evaluating the residuals.
84    calc_x
85        The points at which to evaluate the theory function.  This may be
86        different from the measured points for a number of reasons, such
87        as a resolution function which suggests over or under sampling of
88        the points (see below).  By default calc_x is x, but it can be set
89        explicitly by the user.
90    calc_y, fx
91        The value of the function at the theory points, and the value of
92        the function after resolution has been applied.  These values are
93        computed by a call to residuals.
94
95    Notes on calc_x
96    ===============   
97   
98    The contribution of Q to a resolution of width dQo at point Qo is::
99
100       p(Q) = 1/sqrt(2 pi dQo**2) exp ( (Q-Qo)**2/(2 dQo**2) )
101
102    We are approximating the convolution at Qo using a numerical
103    approximation to the integral over the measured points, with the
104    integral is limited to p(Q_i)/p(0)>=0.001. 
105
106    Sometimes the function we are convoluting is rapidly changing.
107    That means the correct convolution should uniformly sample across
108    the entire width of the Gaussian.  This is not possible at the
109    end points unless you calculate the theory function beyond what is
110    strictly needed for the data. For a given dQ and step size,
111    you need enough steps that the following is true::
112
113        (n*step)**2 > -2 dQ**2 * ln 0.001
114
115    The choice of sampling density is particularly important near
116    critical points where the shape of the function changes.  In
117    reflectometry, the function goes from flat below the critical
118    edge to O(Q**4) above.  In one particular model, calculating every
119    0.005 rather than every 0.02 changed a value above the critical
120    edge by 15%.  In a fitting program, this would lead to a somewhat
121    larger estimate of the critical edge for this sample.
122
123    Sometimes the theory function is oscillating more rapidly than
124    the instrument can resolve.  This happens for example in reflectivity
125    measurements involving thick layers.  In these systems, the theory
126    function should be oversampled around the measured points Q.  With
127    a single thick layer, oversampling can be limited to just one
128    period 2 pi/d.  With multiple thick layers, oscillations will
129    show interference patterns and it will be necessary to oversample
130    uniformly through the entire width of the resolution.  If this is
131    not accommodated, then aliasing effects make it difficult to
132    compute the correct model.
133    """
134    filename = ""
135    x,y = None,None
136    dx,dy = 0,1
137    calc_x,calc_y = None,None
138    fit_x,fit_y = None,None
139    fit_dx,fit_dy = 0,1
140    fx = None
141   
142    def __init__(self,filename="",x=None,y=None,dx=0,dy=1):
143        """
144        Define the fitting data.
145       
146        Data can be loaded from a file using filename or
147        specified directly using x,y,dx,dy.  File loading
148        happens after assignment of x,y,dx,dy.
149        """
150        self.x,self.y,self.dx,self.dy = x,y,dx,dy
151        self.select(None)
152        if filename: self.load(filename)
153   
154    def resample(self,minstep=None):
155        """
156        Over/under sampling support.
157
158        Compute the calc_x points required to adequately sample
159        the function y=f(x) so that the value reported for each
160        measured point is supported by the resolution.  minstep
161        is the minimum allowed sampling density that should be
162        used.
163        """
164        self.calc_x = resample(self.x,self.dx,minstep)
165
166    def load(self, filename, **kw):
167        """
168        Load a multicolumn datafile.
169       
170        Data should be in columns, with the following defaults::
171       
172            x,y or x,y,dy or x,dx,y,dy
173           
174        Note that this resets the selected fitting points calc_x and the
175        computed results calc_y and fx.
176
177        Data is sorted after loading.
178       
179        Any extra keyword arguments are passed to the numpy loadtxt
180        function.  This allows you to select the columns you want,
181        skip rows, set the column separator, change the comment character,
182        amongst other things.
183        """
184        self.dx,self.dy = 0,1
185        self.calc_x,self.calc_y,self.fx = None,None,None
186        if filename:
187            columns = numpy.loadtxt(filename, **kw)
188            self.x = columns[:,0]
189            if columns.shape[1]==4:
190                self.dx = columns[:,1]
191                self.y = columns[:,2]
192                self.dy = columns[:,3]
193            elif columns.shape[1]==3:
194                self.dx = 0
195                self.y = columns[:,1]
196                self.dy = columns[:,2]
197            elif columns.shape[1]==2:
198                self.dx = 0
199                self.y = columns[:,1]
200                self.dy = 1
201            else:
202                raise IOError,"Unexpected number of columns in "+filename
203            idx = numpy.argsort(self.x)
204            self.x,self.y = self.x[idx],self.y[idx]
205            if not numpy.isscalar(self.dx): self.dx = self.dx[idx]
206            if not numpy.isscalar(self.dy): self.dy = self.dy[idx]
207        else:
208            self.x,self.dx,self.y,self.dy = None,None,None,None
209       
210        self.filename = filename
211        self.select(None)
212
213    def select(self, idx):
214        """
215        A selection vector for points to use in the evaluation of the
216        residuals, or None if all points are to be used.
217        """
218        if idx is not None:
219            self.fit_x = self.x[idx]
220            self.fit_y = self.y[idx]
221            if not numpy.isscalar(self.dx): self.fit_dx = self.dx[idx]
222            if not numpy.isscalar(self.dy): self.fit_dy = self.dy[idx]
223        else:
224            self.fit_x = self.x
225            self.fit_dx = self.dx
226            self.fit_y = self.y
227            self.fit_dy = self.dy
228 
229    def residuals(self, fn):
230        """
231        Compute the residuals of the data wrt to the given function.
232
233        y = fn(x) should be a callable accepting a list of points at which
234        to calculate the function, returning the values at those
235        points.
236
237        Any resolution function will be applied after the theory points
238        are calculated.
239        """
240        calc_x = self.calc_x if self.calc_x else self.fit_x
241        self.calc_y = fn(calc_x)
242        self.fx = resolution(calc_x,self.calc_y,self.fit_x,self.fit_dx)
243        return (self.fit_y - self.fx)/self.fit_dy
244
245    def residuals_deriv(self, fn, pars=[]):
246        """
247        Compute residuals and derivatives wrt the given parameters.
248
249        fdf = fn(x,pars=pars) should be a callable accepting a list
250        of points at which to calculate the function and a keyword
251        argument listing the parameters for which the derivative will
252        be calculated.
253
254        Returns a list of the residuals and the derivative wrt the
255        parameter for each parameter.
256
257        Any resolution function will be applied after the theory points
258        and derivatives are calculated.
259        """
260        calc_x = self.calc_x if self.calc_x else self.fit_x
261
262        # Compute f and derivatives
263        fdf = fn(calc_x,pars=pars)
264        self.calc_y = fdf[0]
265
266        # Apply resolution
267        fdf = [resolution(calc_x,y,self.fit_x,self.fit_dx) for y in fdf]
268        self.fx = fdf[0]
269        delta = (self.fx-self.fit_x)/self.fit_dy
270        raise RuntimeError('check whether we want df/dp or dR/dp where R=residuals^2')
271
272        # R = (F(x;p)-y)/sigma => dR/dp  = 1/sigma dF(x;p)/dp
273        # dR^2/dp = 2 R /sigma dF(x;p)/dp
274        df = [ v/self.fit_dy for v in fdf_res[1:] ]
275
276        return [delta]+df
277
278def equal_spaced(x,tol=1e-5):
279    """
280    Return true if data is regularly spaced within tolerance.  Tolerance
281    uses relative error.
282    """
283    step = numpy.diff(x)
284    step_bar = numpy.mean(step)
285    return (abs(step-step_bar) < tol*step_bar).all()
286
287def resample(x,dx,minstep):
288    """
289    Defining the minimum support basis.
290
291    Compute the calc_x points required to adequately sample
292    the function y=f(x) so that the value reported for each
293    measured point is supported by the resolution.  minstep
294    is the minimum allowed sampling density that should be used.
295    """
296    raise NotImplementedError
297
298def resolution(calcx,calcy,fitx,fitdx):
299    """
300    Apply resolution function.  If there is no resolution function, then
301    interpolate from the calculated points to the desired theory points.
302    If the data are irregular, use a brute force convolution function.
303   
304    If the data are regular and the resolution is fixed, then you can
305    deconvolute the data before fitting, saving time and complexity.
306    """
307    if numpy.any(fitdx != 0):
308        if numpy.isscalar(fitdx):
309            fitdx = numpy.ones(fitx.shape)*fitdx
310        fx = convolve(calcx, calcy, fitx, fitdx)
311    elif calcx is fitx:
312        fx = calcy
313    else:
314        fx = numpy.interp(fitx,calcx,calcy)
315    return fx
316
317
318class TempData:
319    """
320    Create a temporary file with a given data set and remove it when done.
321
322    Example::
323
324        from __future__ import with_statement
325        ...
326        with TempData("1 2 3\n4 5 6") as filename:
327            # run tests of loading from filename
328
329    This class is useful for testing data file readers.
330    """
331    def __init__(self,data,suffix='.dat',prefix='',text=True):
332        import os,tempfile
333        fid,self.filename = tempfile.mkstemp('.dat',prefix,text=True)
334        os.write(fid,data)
335        os.close(fid)
336    def __enter__(self):
337        return self.filename
338    def __exit__(self, exc_type, exc_value, traceback):
339        import os
340        os.unlink(self.filename)
341
342D2 = "# x y\n1 1\n2 2\n3 4\n2 5\n"
343"""x,y dataset for TempData"""
344D3 = "# x y dy\n1 1 .1\n2 2 .2\n3 4 .4\n2 5 .5\n"
345"""x,y,dy dataset for TempData"""
346D4 = "# x dx y dy\n1 .1 1 .1\n2 .2 2 .2\n3 .3 4 .4\n2 .3 5 .5\n"
347"""x,dx,y,dy dataset for TempData"""
348   
349def test():
350    import os
351    import numpy
352
353    # Check that two column data loading works
354    with TempData(D2) as filename:
355        data = Data1D(filename=filename)
356    assert numpy.all(data.x == [1,2,2,3])
357    assert numpy.all(data.y == [1,2,5,4])
358    assert data.dx == 0
359    assert data.dy == 1
360    assert data.calc_x is None
361    assert data.residuals(lambda x: x)[3] == 1
362   
363    # Check that interpolation works
364    data.calc_x = [1,1.5,3]
365    assert data.residuals(lambda x: x)[1] == 0
366    assert numpy.all(data.calc_y == data.calc_x) 
367    # Note: calc_y is updated by data.residuals, so be careful with this test
368
369    # Check that three column data loading works
370    with TempData(D3) as filename:
371        data = Data1D(filename=filename)
372    assert numpy.all(data.x == [1,2,2,3])
373    assert numpy.all(data.y == [1,2,5,4])
374    assert numpy.all(data.dy == [.1,.2,.5,.4])
375    assert data.dx == 0
376    assert data.calc_x is None
377    assert data.residuals(lambda x: x)[3] == 1/.4
378
379    # Check that four column data loading works
380    with TempData(D4) as filename:
381        data = Data1D(filename=filename)
382    assert numpy.all(data.x == [1,2,2,3])
383    assert numpy.all(data.dx == [.1,.2,.3,.3])
384    assert numpy.all(data.y == [1,2,5,4])
385    assert numpy.all(data.dy == [.1,.2,.5,.4])
386
387    # Test residuals
388    print "Fix the convolution function!"
389    print data.residuals(lambda x: x)
390    assert data.calc_x is None
391
392if __name__ == "__main__": test()
Note: See TracBrowser for help on using the repository browser.