[3570545] | 1 | # This program is public domain |
---|
| 2 | """ |
---|
| 3 | Park 1-D data objects. |
---|
| 4 | |
---|
| 5 | The class Data1D represents simple 1-D data objects, along with |
---|
| 6 | an ascii file loader. This format will work well for many |
---|
| 7 | uses, but it is likely that more specialized models will have |
---|
| 8 | their own data file formats. |
---|
| 9 | |
---|
| 10 | The minimal data format for park must supply the following |
---|
| 11 | methods: |
---|
| 12 | |
---|
| 13 | residuals(fn) |
---|
| 14 | returns the residuals vector for the model function. |
---|
| 15 | residuals_deriv(fn_deriv,par) |
---|
| 16 | returns the residuals vector for the model function, and |
---|
| 17 | for the derivatives with respect to the given parameters. |
---|
| 18 | |
---|
| 19 | The function passed is going to be model.eval or in the case |
---|
| 20 | where derivatives are available, model.eval_deriv. Normally |
---|
| 21 | this will take a vector of dependent variables and return the |
---|
| 22 | theory function for that vector but this is only convention. |
---|
| 23 | The fitting service only uses the parameter set and the residuals |
---|
| 24 | method from the model. |
---|
| 25 | |
---|
| 26 | The park GUI will make more demands on the interface, but the |
---|
| 27 | details are not yet resolved. |
---|
| 28 | """ |
---|
| 29 | from __future__ import with_statement # Used only in test() |
---|
| 30 | |
---|
| 31 | __all__ = ['Data1D'] |
---|
| 32 | |
---|
| 33 | import numpy |
---|
| 34 | |
---|
| 35 | try: |
---|
| 36 | from park._modeling import convolve as _convolve |
---|
| 37 | def convolve(xi,yi,x,dx): |
---|
| 38 | """ |
---|
| 39 | Return convolution y of width dx at points x based on the |
---|
| 40 | sampled input function yi = f(xi). |
---|
| 41 | """ |
---|
| 42 | y = numpy.empty(x.shape,'d') |
---|
| 43 | xi,yi,x,dx = [numpy.ascontiguousarray(v,'d') for v in xi,yi,x,dx] |
---|
| 44 | _convolve(xi,yi,x,dx,y) |
---|
| 45 | return y |
---|
| 46 | except: |
---|
| 47 | def convolve(*args,**kw): |
---|
| 48 | """ |
---|
| 49 | Return convolution y of width dx at points x based on the |
---|
| 50 | sampled input function yi = f(xi). |
---|
| 51 | |
---|
| 52 | Note: C version is not available in this build, and python |
---|
| 53 | version is not implemented. |
---|
| 54 | """ |
---|
| 55 | raise NotImplementedError("convolve is a compiled function") |
---|
| 56 | |
---|
| 57 | |
---|
| 58 | __all__ = ['Data1D'] |
---|
| 59 | |
---|
| 60 | class Data1D(object): |
---|
| 61 | """ |
---|
| 62 | Data representation for 1-D fitting. |
---|
| 63 | |
---|
| 64 | Attributes |
---|
| 65 | |
---|
| 66 | filename |
---|
| 67 | The source of the data. This may be the empty string if the |
---|
| 68 | data is simulation data. |
---|
| 69 | x,y,dy |
---|
| 70 | The data values. |
---|
| 71 | x is the measurement points of data to be fitted. x must be sorted. |
---|
| 72 | y is the measured value |
---|
| 73 | dy is the measurement uncertainty. |
---|
| 74 | dx |
---|
| 75 | Resolution at the the measured points. The resolution may be 0, |
---|
| 76 | constant, or defined for each data point. dx is the 1-sigma |
---|
| 77 | width of the Gaussian resolution function at point x. Note that |
---|
| 78 | dx_FWHM = sqrt(8 ln 2) dx_sigma, so scale dx appropriately. |
---|
| 79 | |
---|
| 80 | |
---|
| 81 | fit_x,fit_dx,fit_y,fit_dy |
---|
| 82 | The points used in evaluating the residuals. |
---|
| 83 | calc_x |
---|
| 84 | The points at which to evaluate the theory function. This may be |
---|
| 85 | different from the measured points for a number of reasons, such |
---|
| 86 | as a resolution function which suggests over or under sampling of |
---|
| 87 | the points (see below). By default calc_x is x, but it can be set |
---|
| 88 | explicitly by the user. |
---|
| 89 | calc_y, fx |
---|
| 90 | The value of the function at the theory points, and the value of |
---|
| 91 | the function after resolution has been applied. These values are |
---|
| 92 | computed by a call to residuals. |
---|
| 93 | |
---|
| 94 | Notes on calc_x |
---|
| 95 | |
---|
| 96 | The contribution of Q to a resolution of width dQo at point Qo is:: |
---|
| 97 | |
---|
| 98 | p(Q) = 1/sqrt(2 pi dQo**2) exp ( (Q-Qo)**2/(2 dQo**2) ) |
---|
| 99 | |
---|
| 100 | We are approximating the convolution at Qo using a numerical |
---|
| 101 | approximation to the integral over the measured points, with the |
---|
| 102 | integral is limited to p(Q_i)/p(0)>=0.001. |
---|
| 103 | |
---|
| 104 | Sometimes the function we are convoluting is rapidly changing. |
---|
| 105 | That means the correct convolution should uniformly sample across |
---|
| 106 | the entire width of the Gaussian. This is not possible at the |
---|
| 107 | end points unless you calculate the theory function beyond what is |
---|
| 108 | strictly needed for the data. For a given dQ and step size, |
---|
| 109 | you need enough steps that the following is true:: |
---|
| 110 | |
---|
| 111 | (n*step)**2 > -2 dQ**2 * ln 0.001 |
---|
| 112 | |
---|
| 113 | The choice of sampling density is particularly important near |
---|
| 114 | critical points where the shape of the function changes. In |
---|
| 115 | reflectometry, the function goes from flat below the critical |
---|
| 116 | edge to O(Q**4) above. In one particular model, calculating every |
---|
| 117 | 0.005 rather than every 0.02 changed a value above the critical |
---|
| 118 | edge by 15%. In a fitting program, this would lead to a somewhat |
---|
| 119 | larger estimate of the critical edge for this sample. |
---|
| 120 | |
---|
| 121 | Sometimes the theory function is oscillating more rapidly than |
---|
| 122 | the instrument can resolve. This happens for example in reflectivity |
---|
| 123 | measurements involving thick layers. In these systems, the theory |
---|
| 124 | function should be oversampled around the measured points Q. With |
---|
| 125 | a single thick layer, oversampling can be limited to just one |
---|
| 126 | period 2 pi/d. With multiple thick layers, oscillations will |
---|
| 127 | show interference patterns and it will be necessary to oversample |
---|
| 128 | uniformly through the entire width of the resolution. If this is |
---|
| 129 | not accommodated, then aliasing effects make it difficult to |
---|
| 130 | compute the correct model. |
---|
| 131 | """ |
---|
| 132 | filename = "" |
---|
| 133 | x,y = None,None |
---|
| 134 | dx,dy = 0,1 |
---|
| 135 | calc_x,calc_y = None,None |
---|
| 136 | fit_x,fit_y = None,None |
---|
| 137 | fit_dx,fit_dy = 0,1 |
---|
| 138 | fx = None |
---|
| 139 | |
---|
| 140 | def __init__(self,filename="",x=None,y=None,dx=0,dy=1): |
---|
| 141 | """ |
---|
| 142 | Define the fitting data. |
---|
| 143 | |
---|
| 144 | Data can be loaded from a file using filename or |
---|
| 145 | specified directly using x,y,dx,dy. File loading |
---|
| 146 | happens after assignment of x,y,dx,dy. |
---|
| 147 | """ |
---|
| 148 | self.x,self.y,self.dx,self.dy = x,y,dx,dy |
---|
| 149 | self.select(None) |
---|
| 150 | if filename: self.load(filename) |
---|
| 151 | |
---|
| 152 | def resample(self,minstep=None): |
---|
| 153 | """ |
---|
| 154 | Over/under sampling support. |
---|
| 155 | |
---|
| 156 | Compute the calc_x points required to adequately sample |
---|
| 157 | the function y=f(x) so that the value reported for each |
---|
| 158 | measured point is supported by the resolution. minstep |
---|
| 159 | is the minimum allowed sampling density that should be |
---|
| 160 | used. |
---|
| 161 | """ |
---|
| 162 | self.calc_x = resample(self.x,self.dx,minstep) |
---|
| 163 | |
---|
| 164 | def load(self, filename, **kw): |
---|
| 165 | """ |
---|
| 166 | Load a multicolumn datafile. |
---|
| 167 | |
---|
| 168 | Data should be in columns, with the following defaults:: |
---|
| 169 | |
---|
| 170 | x,y or x,y,dy or x,dx,y,dy |
---|
| 171 | |
---|
| 172 | Note that this resets the selected fitting points calc_x and the |
---|
| 173 | computed results calc_y and fx. |
---|
| 174 | |
---|
| 175 | Data is sorted after loading. |
---|
| 176 | |
---|
| 177 | Any extra keyword arguments are passed to the numpy loadtxt |
---|
| 178 | function. This allows you to select the columns you want, |
---|
| 179 | skip rows, set the column separator, change the comment character, |
---|
| 180 | amongst other things. |
---|
| 181 | """ |
---|
| 182 | self.dx,self.dy = 0,1 |
---|
| 183 | self.calc_x,self.calc_y,self.fx = None,None,None |
---|
| 184 | if filename: |
---|
| 185 | columns = numpy.loadtxt(filename, **kw) |
---|
| 186 | self.x = columns[:,0] |
---|
| 187 | if columns.shape[1]==4: |
---|
| 188 | self.dx = columns[:,1] |
---|
| 189 | self.y = columns[:,2] |
---|
| 190 | self.dy = columns[:,3] |
---|
| 191 | elif columns.shape[1]==3: |
---|
| 192 | self.dx = 0 |
---|
| 193 | self.y = columns[:,1] |
---|
| 194 | self.dy = columns[:,2] |
---|
| 195 | elif columns.shape[1]==2: |
---|
| 196 | self.dx = 0 |
---|
| 197 | self.y = columns[:,1] |
---|
| 198 | self.dy = 1 |
---|
| 199 | else: |
---|
| 200 | raise IOError,"Unexpected number of columns in "+filename |
---|
| 201 | idx = numpy.argsort(self.x) |
---|
| 202 | self.x,self.y = self.x[idx],self.y[idx] |
---|
| 203 | if not numpy.isscalar(self.dx): self.dx = self.dx[idx] |
---|
| 204 | if not numpy.isscalar(self.dy): self.dy = self.dy[idx] |
---|
| 205 | else: |
---|
| 206 | self.x,self.dx,self.y,self.dy = None,None,None,None |
---|
| 207 | |
---|
| 208 | self.filename = filename |
---|
| 209 | self.select(None) |
---|
| 210 | |
---|
| 211 | def select(self, idx): |
---|
| 212 | """ |
---|
| 213 | A selection vector for points to use in the evaluation of the |
---|
| 214 | residuals, or None if all points are to be used. |
---|
| 215 | """ |
---|
| 216 | if idx is not None: |
---|
| 217 | self.fit_x = self.x[idx] |
---|
| 218 | self.fit_y = self.y[idx] |
---|
| 219 | if not numpy.isscalar(self.dx): self.fit_dx = self.dx[idx] |
---|
| 220 | if not numpy.isscalar(self.dy): self.fit_dy = self.dy[idx] |
---|
| 221 | else: |
---|
| 222 | self.fit_x = self.x |
---|
| 223 | self.fit_dx = self.dx |
---|
| 224 | self.fit_y = self.y |
---|
| 225 | self.fit_dy = self.dy |
---|
| 226 | |
---|
| 227 | def residuals(self, fn): |
---|
| 228 | """ |
---|
| 229 | Compute the residuals of the data wrt to the given function. |
---|
| 230 | |
---|
| 231 | y = fn(x) should be a callable accepting a list of points at which |
---|
| 232 | to calculate the function, returning the values at those |
---|
| 233 | points. |
---|
| 234 | |
---|
| 235 | Any resolution function will be applied after the theory points |
---|
| 236 | are calculated. |
---|
| 237 | """ |
---|
| 238 | calc_x = self.calc_x if self.calc_x else self.fit_x |
---|
| 239 | self.calc_y = fn(calc_x) |
---|
| 240 | self.fx = resolution(calc_x,self.calc_y,self.fit_x,self.fit_dx) |
---|
| 241 | return (self.fit_y - self.fx)/self.fit_dy |
---|
| 242 | |
---|
| 243 | def residuals_deriv(self, fn, pars=[]): |
---|
| 244 | """ |
---|
| 245 | Compute residuals and derivatives wrt the given parameters. |
---|
| 246 | |
---|
| 247 | fdf = fn(x,pars=pars) should be a callable accepting a list |
---|
| 248 | of points at which to calculate the function and a keyword |
---|
| 249 | argument listing the parameters for which the derivative will |
---|
| 250 | be calculated. |
---|
| 251 | |
---|
| 252 | Returns a list of the residuals and the derivative wrt the |
---|
| 253 | parameter for each parameter. |
---|
| 254 | |
---|
| 255 | Any resolution function will be applied after the theory points |
---|
| 256 | and derivatives are calculated. |
---|
| 257 | """ |
---|
| 258 | calc_x = self.calc_x if self.calc_x else self.fit_x |
---|
| 259 | |
---|
| 260 | # Compute f and derivatives |
---|
| 261 | fdf = fn(calc_x,pars=pars) |
---|
| 262 | self.calc_y = fdf[0] |
---|
| 263 | |
---|
| 264 | # Apply resolution |
---|
| 265 | fdf = [resolution(calc_x,y,self.fit_x,self.fit_dx) for y in fdf] |
---|
| 266 | self.fx = fdf[0] |
---|
| 267 | delta = (self.fx-self.fit_x)/self.fit_dy |
---|
| 268 | raise RuntimeError('check whether we want df/dp or dR/dp where R=residuals^2') |
---|
| 269 | |
---|
| 270 | # R = (F(x;p)-y)/sigma => dR/dp = 1/sigma dF(x;p)/dp |
---|
| 271 | # dR^2/dp = 2 R /sigma dF(x;p)/dp |
---|
| 272 | df = [ v/self.fit_dy for v in fdf_res[1:] ] |
---|
| 273 | |
---|
| 274 | return [delta]+df |
---|
| 275 | |
---|
| 276 | def equal_spaced(x,tol=1e-5): |
---|
| 277 | """ |
---|
| 278 | Return true if data is regularly spaced within tolerance. Tolerance |
---|
| 279 | uses relative error. |
---|
| 280 | """ |
---|
| 281 | step = numpy.diff(x) |
---|
| 282 | step_bar = numpy.mean(step) |
---|
| 283 | return (abs(step-step_bar) < tol*step_bar).all() |
---|
| 284 | |
---|
| 285 | def resample(x,dx,minstep): |
---|
| 286 | """ |
---|
| 287 | Defining the minimum support basis. |
---|
| 288 | |
---|
| 289 | Compute the calc_x points required to adequately sample |
---|
| 290 | the function y=f(x) so that the value reported for each |
---|
| 291 | measured point is supported by the resolution. minstep |
---|
| 292 | is the minimum allowed sampling density that should be used. |
---|
| 293 | """ |
---|
| 294 | raise NotImplementedError |
---|
| 295 | |
---|
| 296 | def resolution(calcx,calcy,fitx,fitdx): |
---|
| 297 | """ |
---|
| 298 | Apply resolution function. If there is no resolution function, then |
---|
| 299 | interpolate from the calculated points to the desired theory points. |
---|
| 300 | If the data are irregular, use a brute force convolution function. |
---|
| 301 | |
---|
| 302 | If the data are regular and the resolution is fixed, then you can |
---|
| 303 | deconvolute the data before fitting, saving time and complexity. |
---|
| 304 | """ |
---|
| 305 | if numpy.any(fitdx != 0): |
---|
| 306 | if numpy.isscalar(fitdx): |
---|
| 307 | fitdx = numpy.ones(fitx.shape)*fitdx |
---|
| 308 | fx = convolve(calcx, calcy, fitx, fitdx) |
---|
| 309 | elif calcx is fitx: |
---|
| 310 | fx = calcy |
---|
| 311 | else: |
---|
| 312 | fx = numpy.interp(fitx,calcx,calcy) |
---|
| 313 | return fx |
---|
| 314 | |
---|
| 315 | |
---|
| 316 | class TempData: |
---|
| 317 | """ |
---|
| 318 | Create a temporary file with a given data set and remove it when done. |
---|
| 319 | |
---|
| 320 | Example:: |
---|
| 321 | |
---|
| 322 | from __future__ import with_statement |
---|
| 323 | ... |
---|
| 324 | with TempData("1 2 3\n4 5 6") as filename: |
---|
| 325 | # run tests of loading from filename |
---|
| 326 | |
---|
| 327 | This class is useful for testing data file readers. |
---|
| 328 | """ |
---|
| 329 | def __init__(self,data,suffix='.dat',prefix='',text=True): |
---|
| 330 | import os,tempfile |
---|
| 331 | fid,self.filename = tempfile.mkstemp('.dat',prefix,text=True) |
---|
| 332 | os.write(fid,data) |
---|
| 333 | os.close(fid) |
---|
| 334 | def __enter__(self): |
---|
| 335 | return self.filename |
---|
| 336 | def __exit__(self, exc_type, exc_value, traceback): |
---|
| 337 | import os |
---|
| 338 | os.unlink(self.filename) |
---|
| 339 | |
---|
| 340 | D2 = "# x y\n1 1\n2 2\n3 4\n2 5\n" |
---|
| 341 | """x,y dataset for TempData""" |
---|
| 342 | D3 = "# x y dy\n1 1 .1\n2 2 .2\n3 4 .4\n2 5 .5\n" |
---|
| 343 | """x,y,dy dataset for TempData""" |
---|
| 344 | D4 = "# x dx y dy\n1 .1 1 .1\n2 .2 2 .2\n3 .3 4 .4\n2 .3 5 .5\n" |
---|
| 345 | """x,dx,y,dy dataset for TempData""" |
---|
| 346 | |
---|
| 347 | def test(): |
---|
| 348 | import os |
---|
| 349 | import numpy |
---|
| 350 | |
---|
| 351 | # Check that two column data loading works |
---|
| 352 | with TempData(D2) as filename: |
---|
| 353 | data = Data1D(filename=filename) |
---|
| 354 | assert numpy.all(data.x == [1,2,2,3]) |
---|
| 355 | assert numpy.all(data.y == [1,2,5,4]) |
---|
| 356 | assert data.dx == 0 |
---|
| 357 | assert data.dy == 1 |
---|
| 358 | assert data.calc_x is None |
---|
| 359 | assert data.residuals(lambda x: x)[3] == 1 |
---|
| 360 | |
---|
| 361 | # Check that interpolation works |
---|
| 362 | data.calc_x = [1,1.5,3] |
---|
| 363 | assert data.residuals(lambda x: x)[1] == 0 |
---|
| 364 | assert numpy.all(data.calc_y == data.calc_x) |
---|
| 365 | # Note: calc_y is updated by data.residuals, so be careful with this test |
---|
| 366 | |
---|
| 367 | # Check that three column data loading works |
---|
| 368 | with TempData(D3) as filename: |
---|
| 369 | data = Data1D(filename=filename) |
---|
| 370 | assert numpy.all(data.x == [1,2,2,3]) |
---|
| 371 | assert numpy.all(data.y == [1,2,5,4]) |
---|
| 372 | assert numpy.all(data.dy == [.1,.2,.5,.4]) |
---|
| 373 | assert data.dx == 0 |
---|
| 374 | assert data.calc_x is None |
---|
| 375 | assert data.residuals(lambda x: x)[3] == 1/.4 |
---|
| 376 | |
---|
| 377 | # Check that four column data loading works |
---|
| 378 | with TempData(D4) as filename: |
---|
| 379 | data = Data1D(filename=filename) |
---|
| 380 | assert numpy.all(data.x == [1,2,2,3]) |
---|
| 381 | assert numpy.all(data.dx == [.1,.2,.3,.3]) |
---|
| 382 | assert numpy.all(data.y == [1,2,5,4]) |
---|
| 383 | assert numpy.all(data.dy == [.1,.2,.5,.4]) |
---|
| 384 | |
---|
| 385 | # Test residuals |
---|
| 386 | print "Fix the convolution function!" |
---|
| 387 | print data.residuals(lambda x: x) |
---|
| 388 | assert data.calc_x is None |
---|
| 389 | |
---|
| 390 | if __name__ == "__main__": test() |
---|