[3570545] | 1 | # This program is public domain |
---|
| 2 | """ |
---|
| 3 | Park 1-D data objects. |
---|
| 4 | |
---|
| 5 | The class Data1D represents simple 1-D data objects, along with |
---|
| 6 | an ascii file loader. This format will work well for many |
---|
| 7 | uses, but it is likely that more specialized models will have |
---|
| 8 | their own data file formats. |
---|
| 9 | |
---|
| 10 | The minimal data format for park must supply the following |
---|
| 11 | methods: |
---|
| 12 | |
---|
| 13 | residuals(fn) |
---|
| 14 | returns the residuals vector for the model function. |
---|
| 15 | residuals_deriv(fn_deriv,par) |
---|
| 16 | returns the residuals vector for the model function, and |
---|
| 17 | for the derivatives with respect to the given parameters. |
---|
| 18 | |
---|
| 19 | The function passed is going to be model.eval or in the case |
---|
| 20 | where derivatives are available, model.eval_deriv. Normally |
---|
| 21 | this will take a vector of dependent variables and return the |
---|
| 22 | theory function for that vector but this is only convention. |
---|
| 23 | The fitting service only uses the parameter set and the residuals |
---|
| 24 | method from the model. |
---|
| 25 | |
---|
| 26 | The park GUI will make more demands on the interface, but the |
---|
| 27 | details are not yet resolved. |
---|
| 28 | """ |
---|
| 29 | from __future__ import with_statement # Used only in test() |
---|
| 30 | |
---|
| 31 | __all__ = ['Data1D'] |
---|
| 32 | |
---|
| 33 | import numpy |
---|
| 34 | |
---|
| 35 | try: |
---|
| 36 | from park._modeling import convolve as _convolve |
---|
| 37 | def convolve(xi,yi,x,dx): |
---|
| 38 | """ |
---|
| 39 | Return convolution y of width dx at points x based on the |
---|
| 40 | sampled input function yi = f(xi). |
---|
| 41 | """ |
---|
| 42 | y = numpy.empty(x.shape,'d') |
---|
| 43 | xi,yi,x,dx = [numpy.ascontiguousarray(v,'d') for v in xi,yi,x,dx] |
---|
| 44 | _convolve(xi,yi,x,dx,y) |
---|
| 45 | return y |
---|
| 46 | except: |
---|
| 47 | def convolve(*args,**kw): |
---|
| 48 | """ |
---|
| 49 | Return convolution y of width dx at points x based on the |
---|
| 50 | sampled input function yi = f(xi). |
---|
| 51 | |
---|
| 52 | Note: C version is not available in this build, and python |
---|
| 53 | version is not implemented. |
---|
| 54 | """ |
---|
| 55 | raise NotImplementedError("convolve is a compiled function") |
---|
| 56 | |
---|
| 57 | |
---|
| 58 | __all__ = ['Data1D'] |
---|
| 59 | |
---|
| 60 | class Data1D(object): |
---|
| 61 | """ |
---|
| 62 | Data representation for 1-D fitting. |
---|
| 63 | |
---|
| 64 | Attributes |
---|
| 65 | ========== |
---|
| 66 | |
---|
| 67 | filename |
---|
| 68 | The source of the data. This may be the empty string if the |
---|
| 69 | data is simulation data. |
---|
| 70 | x,y,dy |
---|
| 71 | The data values. |
---|
| 72 | x is the measurement points of data to be fitted. x must be sorted. |
---|
| 73 | y is the measured value |
---|
| 74 | dy is the measurement uncertainty. |
---|
| 75 | dx |
---|
| 76 | Resolution at the the measured points. The resolution may be 0, |
---|
| 77 | constant, or defined for each data point. dx is the 1-sigma |
---|
| 78 | width of the Gaussian resolution function at point x. Note that |
---|
| 79 | dx_FWHM = sqrt(8 ln 2) dx_sigma, so scale dx appropriately. |
---|
| 80 | |
---|
| 81 | |
---|
| 82 | fit_x,fit_dx,fit_y,fit_dy |
---|
| 83 | The points used in evaluating the residuals. |
---|
| 84 | calc_x |
---|
| 85 | The points at which to evaluate the theory function. This may be |
---|
| 86 | different from the measured points for a number of reasons, such |
---|
| 87 | as a resolution function which suggests over or under sampling of |
---|
| 88 | the points (see below). By default calc_x is x, but it can be set |
---|
| 89 | explicitly by the user. |
---|
| 90 | calc_y, fx |
---|
| 91 | The value of the function at the theory points, and the value of |
---|
| 92 | the function after resolution has been applied. These values are |
---|
| 93 | computed by a call to residuals. |
---|
| 94 | |
---|
| 95 | Notes on calc_x |
---|
| 96 | =============== |
---|
| 97 | |
---|
| 98 | The contribution of Q to a resolution of width dQo at point Qo is:: |
---|
| 99 | |
---|
| 100 | p(Q) = 1/sqrt(2 pi dQo**2) exp ( (Q-Qo)**2/(2 dQo**2) ) |
---|
| 101 | |
---|
| 102 | We are approximating the convolution at Qo using a numerical |
---|
| 103 | approximation to the integral over the measured points, with the |
---|
| 104 | integral is limited to p(Q_i)/p(0)>=0.001. |
---|
| 105 | |
---|
| 106 | Sometimes the function we are convoluting is rapidly changing. |
---|
| 107 | That means the correct convolution should uniformly sample across |
---|
| 108 | the entire width of the Gaussian. This is not possible at the |
---|
| 109 | end points unless you calculate the theory function beyond what is |
---|
| 110 | strictly needed for the data. For a given dQ and step size, |
---|
| 111 | you need enough steps that the following is true:: |
---|
| 112 | |
---|
| 113 | (n*step)**2 > -2 dQ**2 * ln 0.001 |
---|
| 114 | |
---|
| 115 | The choice of sampling density is particularly important near |
---|
| 116 | critical points where the shape of the function changes. In |
---|
| 117 | reflectometry, the function goes from flat below the critical |
---|
| 118 | edge to O(Q**4) above. In one particular model, calculating every |
---|
| 119 | 0.005 rather than every 0.02 changed a value above the critical |
---|
| 120 | edge by 15%. In a fitting program, this would lead to a somewhat |
---|
| 121 | larger estimate of the critical edge for this sample. |
---|
| 122 | |
---|
| 123 | Sometimes the theory function is oscillating more rapidly than |
---|
| 124 | the instrument can resolve. This happens for example in reflectivity |
---|
| 125 | measurements involving thick layers. In these systems, the theory |
---|
| 126 | function should be oversampled around the measured points Q. With |
---|
| 127 | a single thick layer, oversampling can be limited to just one |
---|
| 128 | period 2 pi/d. With multiple thick layers, oscillations will |
---|
| 129 | show interference patterns and it will be necessary to oversample |
---|
| 130 | uniformly through the entire width of the resolution. If this is |
---|
| 131 | not accommodated, then aliasing effects make it difficult to |
---|
| 132 | compute the correct model. |
---|
| 133 | """ |
---|
| 134 | filename = "" |
---|
| 135 | x,y = None,None |
---|
| 136 | dx,dy = 0,1 |
---|
| 137 | calc_x,calc_y = None,None |
---|
| 138 | fit_x,fit_y = None,None |
---|
| 139 | fit_dx,fit_dy = 0,1 |
---|
| 140 | fx = None |
---|
| 141 | |
---|
| 142 | def __init__(self,filename="",x=None,y=None,dx=0,dy=1): |
---|
| 143 | """ |
---|
| 144 | Define the fitting data. |
---|
| 145 | |
---|
| 146 | Data can be loaded from a file using filename or |
---|
| 147 | specified directly using x,y,dx,dy. File loading |
---|
| 148 | happens after assignment of x,y,dx,dy. |
---|
| 149 | """ |
---|
| 150 | self.x,self.y,self.dx,self.dy = x,y,dx,dy |
---|
| 151 | self.select(None) |
---|
| 152 | if filename: self.load(filename) |
---|
| 153 | |
---|
| 154 | def resample(self,minstep=None): |
---|
| 155 | """ |
---|
| 156 | Over/under sampling support. |
---|
| 157 | |
---|
| 158 | Compute the calc_x points required to adequately sample |
---|
| 159 | the function y=f(x) so that the value reported for each |
---|
| 160 | measured point is supported by the resolution. minstep |
---|
| 161 | is the minimum allowed sampling density that should be |
---|
| 162 | used. |
---|
| 163 | """ |
---|
| 164 | self.calc_x = resample(self.x,self.dx,minstep) |
---|
| 165 | |
---|
| 166 | def load(self, filename, **kw): |
---|
| 167 | """ |
---|
| 168 | Load a multicolumn datafile. |
---|
| 169 | |
---|
| 170 | Data should be in columns, with the following defaults:: |
---|
| 171 | |
---|
| 172 | x,y or x,y,dy or x,dx,y,dy |
---|
| 173 | |
---|
| 174 | Note that this resets the selected fitting points calc_x and the |
---|
| 175 | computed results calc_y and fx. |
---|
| 176 | |
---|
| 177 | Data is sorted after loading. |
---|
| 178 | |
---|
| 179 | Any extra keyword arguments are passed to the numpy loadtxt |
---|
| 180 | function. This allows you to select the columns you want, |
---|
| 181 | skip rows, set the column separator, change the comment character, |
---|
| 182 | amongst other things. |
---|
| 183 | """ |
---|
| 184 | self.dx,self.dy = 0,1 |
---|
| 185 | self.calc_x,self.calc_y,self.fx = None,None,None |
---|
| 186 | if filename: |
---|
| 187 | columns = numpy.loadtxt(filename, **kw) |
---|
| 188 | self.x = columns[:,0] |
---|
| 189 | if columns.shape[1]==4: |
---|
| 190 | self.dx = columns[:,1] |
---|
| 191 | self.y = columns[:,2] |
---|
| 192 | self.dy = columns[:,3] |
---|
| 193 | elif columns.shape[1]==3: |
---|
| 194 | self.dx = 0 |
---|
| 195 | self.y = columns[:,1] |
---|
| 196 | self.dy = columns[:,2] |
---|
| 197 | elif columns.shape[1]==2: |
---|
| 198 | self.dx = 0 |
---|
| 199 | self.y = columns[:,1] |
---|
| 200 | self.dy = 1 |
---|
| 201 | else: |
---|
| 202 | raise IOError,"Unexpected number of columns in "+filename |
---|
| 203 | idx = numpy.argsort(self.x) |
---|
| 204 | self.x,self.y = self.x[idx],self.y[idx] |
---|
| 205 | if not numpy.isscalar(self.dx): self.dx = self.dx[idx] |
---|
| 206 | if not numpy.isscalar(self.dy): self.dy = self.dy[idx] |
---|
| 207 | else: |
---|
| 208 | self.x,self.dx,self.y,self.dy = None,None,None,None |
---|
| 209 | |
---|
| 210 | self.filename = filename |
---|
| 211 | self.select(None) |
---|
| 212 | |
---|
| 213 | def select(self, idx): |
---|
| 214 | """ |
---|
| 215 | A selection vector for points to use in the evaluation of the |
---|
| 216 | residuals, or None if all points are to be used. |
---|
| 217 | """ |
---|
| 218 | if idx is not None: |
---|
| 219 | self.fit_x = self.x[idx] |
---|
| 220 | self.fit_y = self.y[idx] |
---|
| 221 | if not numpy.isscalar(self.dx): self.fit_dx = self.dx[idx] |
---|
| 222 | if not numpy.isscalar(self.dy): self.fit_dy = self.dy[idx] |
---|
| 223 | else: |
---|
| 224 | self.fit_x = self.x |
---|
| 225 | self.fit_dx = self.dx |
---|
| 226 | self.fit_y = self.y |
---|
| 227 | self.fit_dy = self.dy |
---|
| 228 | |
---|
| 229 | def residuals(self, fn): |
---|
| 230 | """ |
---|
| 231 | Compute the residuals of the data wrt to the given function. |
---|
| 232 | |
---|
| 233 | y = fn(x) should be a callable accepting a list of points at which |
---|
| 234 | to calculate the function, returning the values at those |
---|
| 235 | points. |
---|
| 236 | |
---|
| 237 | Any resolution function will be applied after the theory points |
---|
| 238 | are calculated. |
---|
| 239 | """ |
---|
| 240 | calc_x = self.calc_x if self.calc_x else self.fit_x |
---|
| 241 | self.calc_y = fn(calc_x) |
---|
| 242 | self.fx = resolution(calc_x,self.calc_y,self.fit_x,self.fit_dx) |
---|
| 243 | return (self.fit_y - self.fx)/self.fit_dy |
---|
| 244 | |
---|
| 245 | def residuals_deriv(self, fn, pars=[]): |
---|
| 246 | """ |
---|
| 247 | Compute residuals and derivatives wrt the given parameters. |
---|
| 248 | |
---|
| 249 | fdf = fn(x,pars=pars) should be a callable accepting a list |
---|
| 250 | of points at which to calculate the function and a keyword |
---|
| 251 | argument listing the parameters for which the derivative will |
---|
| 252 | be calculated. |
---|
| 253 | |
---|
| 254 | Returns a list of the residuals and the derivative wrt the |
---|
| 255 | parameter for each parameter. |
---|
| 256 | |
---|
| 257 | Any resolution function will be applied after the theory points |
---|
| 258 | and derivatives are calculated. |
---|
| 259 | """ |
---|
| 260 | calc_x = self.calc_x if self.calc_x else self.fit_x |
---|
| 261 | |
---|
| 262 | # Compute f and derivatives |
---|
| 263 | fdf = fn(calc_x,pars=pars) |
---|
| 264 | self.calc_y = fdf[0] |
---|
| 265 | |
---|
| 266 | # Apply resolution |
---|
| 267 | fdf = [resolution(calc_x,y,self.fit_x,self.fit_dx) for y in fdf] |
---|
| 268 | self.fx = fdf[0] |
---|
| 269 | delta = (self.fx-self.fit_x)/self.fit_dy |
---|
| 270 | raise RuntimeError('check whether we want df/dp or dR/dp where R=residuals^2') |
---|
| 271 | |
---|
| 272 | # R = (F(x;p)-y)/sigma => dR/dp = 1/sigma dF(x;p)/dp |
---|
| 273 | # dR^2/dp = 2 R /sigma dF(x;p)/dp |
---|
| 274 | df = [ v/self.fit_dy for v in fdf_res[1:] ] |
---|
| 275 | |
---|
| 276 | return [delta]+df |
---|
| 277 | |
---|
| 278 | def equal_spaced(x,tol=1e-5): |
---|
| 279 | """ |
---|
| 280 | Return true if data is regularly spaced within tolerance. Tolerance |
---|
| 281 | uses relative error. |
---|
| 282 | """ |
---|
| 283 | step = numpy.diff(x) |
---|
| 284 | step_bar = numpy.mean(step) |
---|
| 285 | return (abs(step-step_bar) < tol*step_bar).all() |
---|
| 286 | |
---|
| 287 | def resample(x,dx,minstep): |
---|
| 288 | """ |
---|
| 289 | Defining the minimum support basis. |
---|
| 290 | |
---|
| 291 | Compute the calc_x points required to adequately sample |
---|
| 292 | the function y=f(x) so that the value reported for each |
---|
| 293 | measured point is supported by the resolution. minstep |
---|
| 294 | is the minimum allowed sampling density that should be used. |
---|
| 295 | """ |
---|
| 296 | raise NotImplementedError |
---|
| 297 | |
---|
| 298 | def resolution(calcx,calcy,fitx,fitdx): |
---|
| 299 | """ |
---|
| 300 | Apply resolution function. If there is no resolution function, then |
---|
| 301 | interpolate from the calculated points to the desired theory points. |
---|
| 302 | If the data are irregular, use a brute force convolution function. |
---|
| 303 | |
---|
| 304 | If the data are regular and the resolution is fixed, then you can |
---|
| 305 | deconvolute the data before fitting, saving time and complexity. |
---|
| 306 | """ |
---|
| 307 | if numpy.any(fitdx != 0): |
---|
| 308 | if numpy.isscalar(fitdx): |
---|
| 309 | fitdx = numpy.ones(fitx.shape)*fitdx |
---|
| 310 | fx = convolve(calcx, calcy, fitx, fitdx) |
---|
| 311 | elif calcx is fitx: |
---|
| 312 | fx = calcy |
---|
| 313 | else: |
---|
| 314 | fx = numpy.interp(fitx,calcx,calcy) |
---|
| 315 | return fx |
---|
| 316 | |
---|
| 317 | |
---|
| 318 | class TempData: |
---|
| 319 | """ |
---|
| 320 | Create a temporary file with a given data set and remove it when done. |
---|
| 321 | |
---|
| 322 | Example:: |
---|
| 323 | |
---|
| 324 | from __future__ import with_statement |
---|
| 325 | ... |
---|
| 326 | with TempData("1 2 3\n4 5 6") as filename: |
---|
| 327 | # run tests of loading from filename |
---|
| 328 | |
---|
| 329 | This class is useful for testing data file readers. |
---|
| 330 | """ |
---|
| 331 | def __init__(self,data,suffix='.dat',prefix='',text=True): |
---|
| 332 | import os,tempfile |
---|
| 333 | fid,self.filename = tempfile.mkstemp('.dat',prefix,text=True) |
---|
| 334 | os.write(fid,data) |
---|
| 335 | os.close(fid) |
---|
| 336 | def __enter__(self): |
---|
| 337 | return self.filename |
---|
| 338 | def __exit__(self, exc_type, exc_value, traceback): |
---|
| 339 | import os |
---|
| 340 | os.unlink(self.filename) |
---|
| 341 | |
---|
| 342 | D2 = "# x y\n1 1\n2 2\n3 4\n2 5\n" |
---|
| 343 | """x,y dataset for TempData""" |
---|
| 344 | D3 = "# x y dy\n1 1 .1\n2 2 .2\n3 4 .4\n2 5 .5\n" |
---|
| 345 | """x,y,dy dataset for TempData""" |
---|
| 346 | D4 = "# x dx y dy\n1 .1 1 .1\n2 .2 2 .2\n3 .3 4 .4\n2 .3 5 .5\n" |
---|
| 347 | """x,dx,y,dy dataset for TempData""" |
---|
| 348 | |
---|
| 349 | def test(): |
---|
| 350 | import os |
---|
| 351 | import numpy |
---|
| 352 | |
---|
| 353 | # Check that two column data loading works |
---|
| 354 | with TempData(D2) as filename: |
---|
| 355 | data = Data1D(filename=filename) |
---|
| 356 | assert numpy.all(data.x == [1,2,2,3]) |
---|
| 357 | assert numpy.all(data.y == [1,2,5,4]) |
---|
| 358 | assert data.dx == 0 |
---|
| 359 | assert data.dy == 1 |
---|
| 360 | assert data.calc_x is None |
---|
| 361 | assert data.residuals(lambda x: x)[3] == 1 |
---|
| 362 | |
---|
| 363 | # Check that interpolation works |
---|
| 364 | data.calc_x = [1,1.5,3] |
---|
| 365 | assert data.residuals(lambda x: x)[1] == 0 |
---|
| 366 | assert numpy.all(data.calc_y == data.calc_x) |
---|
| 367 | # Note: calc_y is updated by data.residuals, so be careful with this test |
---|
| 368 | |
---|
| 369 | # Check that three column data loading works |
---|
| 370 | with TempData(D3) as filename: |
---|
| 371 | data = Data1D(filename=filename) |
---|
| 372 | assert numpy.all(data.x == [1,2,2,3]) |
---|
| 373 | assert numpy.all(data.y == [1,2,5,4]) |
---|
| 374 | assert numpy.all(data.dy == [.1,.2,.5,.4]) |
---|
| 375 | assert data.dx == 0 |
---|
| 376 | assert data.calc_x is None |
---|
| 377 | assert data.residuals(lambda x: x)[3] == 1/.4 |
---|
| 378 | |
---|
| 379 | # Check that four column data loading works |
---|
| 380 | with TempData(D4) as filename: |
---|
| 381 | data = Data1D(filename=filename) |
---|
| 382 | assert numpy.all(data.x == [1,2,2,3]) |
---|
| 383 | assert numpy.all(data.dx == [.1,.2,.3,.3]) |
---|
| 384 | assert numpy.all(data.y == [1,2,5,4]) |
---|
| 385 | assert numpy.all(data.dy == [.1,.2,.5,.4]) |
---|
| 386 | |
---|
| 387 | # Test residuals |
---|
| 388 | print "Fix the convolution function!" |
---|
| 389 | print data.residuals(lambda x: x) |
---|
| 390 | assert data.calc_x is None |
---|
| 391 | |
---|
| 392 | if __name__ == "__main__": test() |
---|