source: sasview/park-1.2.1/park/data.py @ 5ab5cae

ESS_GUIESS_GUI_DocsESS_GUI_batch_fittingESS_GUI_bumps_abstractionESS_GUI_iss1116ESS_GUI_iss879ESS_GUI_iss959ESS_GUI_openclESS_GUI_orderingESS_GUI_sync_sascalccostrafo411magnetic_scattrelease-4.1.1release-4.1.2release-4.2.2release_4.0.1ticket-1009ticket-1094-headlessticket-1242-2d-resolutionticket-1243ticket-1249ticket885unittest-saveload
Last change on this file since 5ab5cae was 5ab5cae, checked in by Peter Parker, 9 years ago

Refs #202 - Try and fix some Sphinx warnings.

  • Property mode set to 100644
File size: 13.6 KB
Line 
1# This program is public domain
2"""
3Park 1-D data objects.
4
5The class Data1D represents simple 1-D data objects, along with
6an ascii file loader.  This format will work well for many
7uses, but it is likely that more specialized models will have
8their own data file formats.
9
10The minimal data format for park must supply the following
11methods:
12
13    residuals(fn)
14        returns the residuals vector for the model function.
15    residuals_deriv(fn_deriv,par)
16        returns the residuals vector for the model function, and
17        for the derivatives with respect to the given parameters.
18
19The function passed is going to be model.eval or in the case
20where derivatives are available, model.eval_deriv.  Normally
21this will take a vector of dependent variables and return the
22theory function for that vector but this is only convention.
23The fitting service only uses the parameter set and the residuals
24method from the model.
25
26The park GUI will make more demands on the interface, but the
27details are not yet resolved.
28"""
29from __future__ import with_statement  # Used only in test()
30
31__all__ = ['Data1D']
32
33import numpy
34
35try:
36    from park._modeling import convolve as _convolve
37    def convolve(xi,yi,x,dx):
38        """
39        Return convolution y of width dx at points x based on the
40        sampled input function yi = f(xi).
41        """
42        y = numpy.empty(x.shape,'d')
43        xi,yi,x,dx = [numpy.ascontiguousarray(v,'d') for v in xi,yi,x,dx]
44        _convolve(xi,yi,x,dx,y)
45        return y
46except:
47    def convolve(*args,**kw):
48        """
49        Return convolution y of width dx at points x based on the
50        sampled input function yi = f(xi).
51       
52        Note: C version is not available in this build, and python
53        version is not implemented.
54        """
55        raise NotImplementedError("convolve is a compiled function")
56   
57
58__all__ = ['Data1D']
59   
60class Data1D(object):
61    """
62    Data representation for 1-D fitting.
63
64    Attributes
65   
66    filename
67        The source of the data.  This may be the empty string if the
68        data is simulation data.
69    x,y,dy
70        The data values.
71        x is the measurement points of data to be fitted. x must be sorted.
72        y is the measured value
73        dy is the measurement uncertainty.
74    dx
75        Resolution at the the measured points.  The resolution may be 0,
76        constant, or defined for each data point.  dx is the 1-sigma
77        width of the Gaussian resolution function at point x.  Note that
78        dx_FWHM = sqrt(8 ln 2) dx_sigma, so scale dx appropriately.
79
80
81    fit_x,fit_dx,fit_y,fit_dy
82        The points used in evaluating the residuals.
83    calc_x
84        The points at which to evaluate the theory function.  This may be
85        different from the measured points for a number of reasons, such
86        as a resolution function which suggests over or under sampling of
87        the points (see below).  By default calc_x is x, but it can be set
88        explicitly by the user.
89    calc_y, fx
90        The value of the function at the theory points, and the value of
91        the function after resolution has been applied.  These values are
92        computed by a call to residuals.
93
94    Notes on calc_x
95   
96    The contribution of Q to a resolution of width dQo at point Qo is::
97
98       p(Q) = 1/sqrt(2 pi dQo**2) exp ( (Q-Qo)**2/(2 dQo**2) )
99
100    We are approximating the convolution at Qo using a numerical
101    approximation to the integral over the measured points, with the
102    integral is limited to p(Q_i)/p(0)>=0.001. 
103
104    Sometimes the function we are convoluting is rapidly changing.
105    That means the correct convolution should uniformly sample across
106    the entire width of the Gaussian.  This is not possible at the
107    end points unless you calculate the theory function beyond what is
108    strictly needed for the data. For a given dQ and step size,
109    you need enough steps that the following is true::
110
111        (n*step)**2 > -2 dQ**2 * ln 0.001
112
113    The choice of sampling density is particularly important near
114    critical points where the shape of the function changes.  In
115    reflectometry, the function goes from flat below the critical
116    edge to O(Q**4) above.  In one particular model, calculating every
117    0.005 rather than every 0.02 changed a value above the critical
118    edge by 15%.  In a fitting program, this would lead to a somewhat
119    larger estimate of the critical edge for this sample.
120
121    Sometimes the theory function is oscillating more rapidly than
122    the instrument can resolve.  This happens for example in reflectivity
123    measurements involving thick layers.  In these systems, the theory
124    function should be oversampled around the measured points Q.  With
125    a single thick layer, oversampling can be limited to just one
126    period 2 pi/d.  With multiple thick layers, oscillations will
127    show interference patterns and it will be necessary to oversample
128    uniformly through the entire width of the resolution.  If this is
129    not accommodated, then aliasing effects make it difficult to
130    compute the correct model.
131    """
132    filename = ""
133    x,y = None,None
134    dx,dy = 0,1
135    calc_x,calc_y = None,None
136    fit_x,fit_y = None,None
137    fit_dx,fit_dy = 0,1
138    fx = None
139   
140    def __init__(self,filename="",x=None,y=None,dx=0,dy=1):
141        """
142        Define the fitting data.
143       
144        Data can be loaded from a file using filename or
145        specified directly using x,y,dx,dy.  File loading
146        happens after assignment of x,y,dx,dy.
147        """
148        self.x,self.y,self.dx,self.dy = x,y,dx,dy
149        self.select(None)
150        if filename: self.load(filename)
151   
152    def resample(self,minstep=None):
153        """
154        Over/under sampling support.
155
156        Compute the calc_x points required to adequately sample
157        the function y=f(x) so that the value reported for each
158        measured point is supported by the resolution.  minstep
159        is the minimum allowed sampling density that should be
160        used.
161        """
162        self.calc_x = resample(self.x,self.dx,minstep)
163
164    def load(self, filename, **kw):
165        """
166        Load a multicolumn datafile.
167       
168        Data should be in columns, with the following defaults::
169       
170            x,y or x,y,dy or x,dx,y,dy
171           
172        Note that this resets the selected fitting points calc_x and the
173        computed results calc_y and fx.
174
175        Data is sorted after loading.
176       
177        Any extra keyword arguments are passed to the numpy loadtxt
178        function.  This allows you to select the columns you want,
179        skip rows, set the column separator, change the comment character,
180        amongst other things.
181        """
182        self.dx,self.dy = 0,1
183        self.calc_x,self.calc_y,self.fx = None,None,None
184        if filename:
185            columns = numpy.loadtxt(filename, **kw)
186            self.x = columns[:,0]
187            if columns.shape[1]==4:
188                self.dx = columns[:,1]
189                self.y = columns[:,2]
190                self.dy = columns[:,3]
191            elif columns.shape[1]==3:
192                self.dx = 0
193                self.y = columns[:,1]
194                self.dy = columns[:,2]
195            elif columns.shape[1]==2:
196                self.dx = 0
197                self.y = columns[:,1]
198                self.dy = 1
199            else:
200                raise IOError,"Unexpected number of columns in "+filename
201            idx = numpy.argsort(self.x)
202            self.x,self.y = self.x[idx],self.y[idx]
203            if not numpy.isscalar(self.dx): self.dx = self.dx[idx]
204            if not numpy.isscalar(self.dy): self.dy = self.dy[idx]
205        else:
206            self.x,self.dx,self.y,self.dy = None,None,None,None
207       
208        self.filename = filename
209        self.select(None)
210
211    def select(self, idx):
212        """
213        A selection vector for points to use in the evaluation of the
214        residuals, or None if all points are to be used.
215        """
216        if idx is not None:
217            self.fit_x = self.x[idx]
218            self.fit_y = self.y[idx]
219            if not numpy.isscalar(self.dx): self.fit_dx = self.dx[idx]
220            if not numpy.isscalar(self.dy): self.fit_dy = self.dy[idx]
221        else:
222            self.fit_x = self.x
223            self.fit_dx = self.dx
224            self.fit_y = self.y
225            self.fit_dy = self.dy
226 
227    def residuals(self, fn):
228        """
229        Compute the residuals of the data wrt to the given function.
230
231        y = fn(x) should be a callable accepting a list of points at which
232        to calculate the function, returning the values at those
233        points.
234
235        Any resolution function will be applied after the theory points
236        are calculated.
237        """
238        calc_x = self.calc_x if self.calc_x else self.fit_x
239        self.calc_y = fn(calc_x)
240        self.fx = resolution(calc_x,self.calc_y,self.fit_x,self.fit_dx)
241        return (self.fit_y - self.fx)/self.fit_dy
242
243    def residuals_deriv(self, fn, pars=[]):
244        """
245        Compute residuals and derivatives wrt the given parameters.
246
247        fdf = fn(x,pars=pars) should be a callable accepting a list
248        of points at which to calculate the function and a keyword
249        argument listing the parameters for which the derivative will
250        be calculated.
251
252        Returns a list of the residuals and the derivative wrt the
253        parameter for each parameter.
254
255        Any resolution function will be applied after the theory points
256        and derivatives are calculated.
257        """
258        calc_x = self.calc_x if self.calc_x else self.fit_x
259
260        # Compute f and derivatives
261        fdf = fn(calc_x,pars=pars)
262        self.calc_y = fdf[0]
263
264        # Apply resolution
265        fdf = [resolution(calc_x,y,self.fit_x,self.fit_dx) for y in fdf]
266        self.fx = fdf[0]
267        delta = (self.fx-self.fit_x)/self.fit_dy
268        raise RuntimeError('check whether we want df/dp or dR/dp where R=residuals^2')
269
270        # R = (F(x;p)-y)/sigma => dR/dp  = 1/sigma dF(x;p)/dp
271        # dR^2/dp = 2 R /sigma dF(x;p)/dp
272        df = [ v/self.fit_dy for v in fdf_res[1:] ]
273
274        return [delta]+df
275
276def equal_spaced(x,tol=1e-5):
277    """
278    Return true if data is regularly spaced within tolerance.  Tolerance
279    uses relative error.
280    """
281    step = numpy.diff(x)
282    step_bar = numpy.mean(step)
283    return (abs(step-step_bar) < tol*step_bar).all()
284
285def resample(x,dx,minstep):
286    """
287    Defining the minimum support basis.
288
289    Compute the calc_x points required to adequately sample
290    the function y=f(x) so that the value reported for each
291    measured point is supported by the resolution.  minstep
292    is the minimum allowed sampling density that should be used.
293    """
294    raise NotImplementedError
295
296def resolution(calcx,calcy,fitx,fitdx):
297    """
298    Apply resolution function.  If there is no resolution function, then
299    interpolate from the calculated points to the desired theory points.
300    If the data are irregular, use a brute force convolution function.
301   
302    If the data are regular and the resolution is fixed, then you can
303    deconvolute the data before fitting, saving time and complexity.
304    """
305    if numpy.any(fitdx != 0):
306        if numpy.isscalar(fitdx):
307            fitdx = numpy.ones(fitx.shape)*fitdx
308        fx = convolve(calcx, calcy, fitx, fitdx)
309    elif calcx is fitx:
310        fx = calcy
311    else:
312        fx = numpy.interp(fitx,calcx,calcy)
313    return fx
314
315
316class TempData:
317    """
318    Create a temporary file with a given data set and remove it when done.
319
320    Example::
321
322        from __future__ import with_statement
323        ...
324        with TempData("1 2 3\n4 5 6") as filename:
325            # run tests of loading from filename
326
327    This class is useful for testing data file readers.
328    """
329    def __init__(self,data,suffix='.dat',prefix='',text=True):
330        import os,tempfile
331        fid,self.filename = tempfile.mkstemp('.dat',prefix,text=True)
332        os.write(fid,data)
333        os.close(fid)
334    def __enter__(self):
335        return self.filename
336    def __exit__(self, exc_type, exc_value, traceback):
337        import os
338        os.unlink(self.filename)
339
340D2 = "# x y\n1 1\n2 2\n3 4\n2 5\n"
341"""x,y dataset for TempData"""
342D3 = "# x y dy\n1 1 .1\n2 2 .2\n3 4 .4\n2 5 .5\n"
343"""x,y,dy dataset for TempData"""
344D4 = "# x dx y dy\n1 .1 1 .1\n2 .2 2 .2\n3 .3 4 .4\n2 .3 5 .5\n"
345"""x,dx,y,dy dataset for TempData"""
346   
347def test():
348    import os
349    import numpy
350
351    # Check that two column data loading works
352    with TempData(D2) as filename:
353        data = Data1D(filename=filename)
354    assert numpy.all(data.x == [1,2,2,3])
355    assert numpy.all(data.y == [1,2,5,4])
356    assert data.dx == 0
357    assert data.dy == 1
358    assert data.calc_x is None
359    assert data.residuals(lambda x: x)[3] == 1
360   
361    # Check that interpolation works
362    data.calc_x = [1,1.5,3]
363    assert data.residuals(lambda x: x)[1] == 0
364    assert numpy.all(data.calc_y == data.calc_x) 
365    # Note: calc_y is updated by data.residuals, so be careful with this test
366
367    # Check that three column data loading works
368    with TempData(D3) as filename:
369        data = Data1D(filename=filename)
370    assert numpy.all(data.x == [1,2,2,3])
371    assert numpy.all(data.y == [1,2,5,4])
372    assert numpy.all(data.dy == [.1,.2,.5,.4])
373    assert data.dx == 0
374    assert data.calc_x is None
375    assert data.residuals(lambda x: x)[3] == 1/.4
376
377    # Check that four column data loading works
378    with TempData(D4) as filename:
379        data = Data1D(filename=filename)
380    assert numpy.all(data.x == [1,2,2,3])
381    assert numpy.all(data.dx == [.1,.2,.3,.3])
382    assert numpy.all(data.y == [1,2,5,4])
383    assert numpy.all(data.dy == [.1,.2,.5,.4])
384
385    # Test residuals
386    print "Fix the convolution function!"
387    print data.residuals(lambda x: x)
388    assert data.calc_x is None
389
390if __name__ == "__main__": test()
Note: See TracBrowser for help on using the repository browser.