[eb2946f] | 1 | #!/usr/bin/env python |
---|
| 2 | r""" |
---|
[57eb6a4] | 3 | Show numerical precision of various expressions. |
---|
| 4 | |
---|
| 5 | Evaluates the same function(s) in single and double precision and compares |
---|
| 6 | the results to 500 digit mpmath evaluation of the same function. |
---|
| 7 | |
---|
| 8 | Note: a quick way to generation C and python code for taylor series |
---|
| 9 | expansions from sympy: |
---|
| 10 | |
---|
| 11 | import sympy as sp |
---|
| 12 | x = sp.var("x") |
---|
| 13 | f = sp.sin(x)/x |
---|
| 14 | t = sp.series(f, n=12).removeO() # taylor series with no O(x^n) term |
---|
| 15 | p = sp.horner(t) # Horner representation |
---|
| 16 | p = p.replace(x**2, sp.var("xsq") # simplify if alternate terms are zero |
---|
| 17 | p = p.n(15) # evaluate coefficients to 15 digits (optional) |
---|
| 18 | c_code = sp.ccode(p, assign_to=sp.var("p")) # convert to c code |
---|
| 19 | py_code = c[:-1] # strip semicolon to convert c to python |
---|
| 20 | |
---|
| 21 | # mpmath has pade() rational function approximation, which might work |
---|
| 22 | # better than the taylor series for some functions: |
---|
| 23 | P, Q = mp.pade(sp.Poly(t.n(15),x).coeffs(), L, M) |
---|
| 24 | P = sum(a*x**n for n,a in enumerate(reversed(P))) |
---|
| 25 | Q = sum(a*x**n for n,a in enumerate(reversed(Q))) |
---|
| 26 | c_code = sp.ccode(sp.horner(P)/sp.horner(Q), assign_to=sp.var("p")) |
---|
| 27 | |
---|
| 28 | # There are richardson and shanks series accelerators in both sympy |
---|
| 29 | # and mpmath that may be helpful. |
---|
[eb2946f] | 30 | """ |
---|
| 31 | from __future__ import division, print_function |
---|
| 32 | |
---|
| 33 | import sys |
---|
| 34 | import os |
---|
| 35 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
---|
| 36 | |
---|
| 37 | import numpy as np |
---|
| 38 | from numpy import pi, inf |
---|
| 39 | import scipy.special |
---|
| 40 | try: |
---|
| 41 | from mpmath import mp |
---|
| 42 | except ImportError: |
---|
| 43 | # CRUFT: mpmath split out into its own package |
---|
| 44 | from sympy.mpmath import mp |
---|
| 45 | #import matplotlib; matplotlib.use('TkAgg') |
---|
| 46 | import pylab |
---|
| 47 | |
---|
| 48 | from sasmodels import core, data, direct_model, modelinfo |
---|
| 49 | |
---|
| 50 | class Comparator(object): |
---|
| 51 | def __init__(self, name, mp_function, np_function, ocl_function, xaxis, limits): |
---|
| 52 | self.name = name |
---|
| 53 | self.mp_function = mp_function |
---|
| 54 | self.np_function = np_function |
---|
| 55 | self.ocl_function = ocl_function |
---|
| 56 | self.xaxis = xaxis |
---|
| 57 | self.limits = limits |
---|
| 58 | |
---|
| 59 | def __repr__(self): |
---|
| 60 | return "Comparator(%s)"%self.name |
---|
| 61 | |
---|
| 62 | def call_mpmath(self, vec, bits=500): |
---|
| 63 | """ |
---|
| 64 | Direct calculation using mpmath extended precision library. |
---|
| 65 | """ |
---|
| 66 | with mp.workprec(bits): |
---|
| 67 | return [self.mp_function(mp.mpf(x)) for x in vec] |
---|
| 68 | |
---|
| 69 | def call_numpy(self, x, dtype): |
---|
| 70 | """ |
---|
| 71 | Direct calculation using numpy/scipy. |
---|
| 72 | """ |
---|
| 73 | x = np.asarray(x, dtype) |
---|
| 74 | return self.np_function(x) |
---|
| 75 | |
---|
| 76 | def call_ocl(self, x, dtype, platform='ocl'): |
---|
| 77 | """ |
---|
| 78 | Calculation using sasmodels ocl libraries. |
---|
| 79 | """ |
---|
| 80 | x = np.asarray(x, dtype) |
---|
| 81 | model = core.build_model(self.ocl_function, dtype=dtype) |
---|
| 82 | calculator = direct_model.DirectModel(data.empty_data1D(x), model) |
---|
| 83 | return calculator(background=0) |
---|
| 84 | |
---|
[5181ccc] | 85 | def run(self, xrange="log", diff="relative"): |
---|
[eb2946f] | 86 | r""" |
---|
| 87 | Compare accuracy of different methods for computing f. |
---|
| 88 | |
---|
[5181ccc] | 89 | *xrange* is:: |
---|
[eb2946f] | 90 | |
---|
[5181ccc] | 91 | log: [10^-3,10^5] |
---|
| 92 | logq: [10^-4, 10^1] |
---|
| 93 | linear: [1,1000] |
---|
| 94 | zoom: [1000,1010] |
---|
| 95 | neg: [-100,100] |
---|
| 96 | |
---|
[fba9ca0] | 97 | For arbitrary range use "start:stop:steps:scale" where scale is |
---|
| 98 | one of log, lin, or linear. |
---|
| 99 | |
---|
[5181ccc] | 100 | *diff* is "relative", "absolute" or "none" |
---|
[eb2946f] | 101 | |
---|
| 102 | *x_bits* is the precision with which the x values are specified. The |
---|
| 103 | default 23 should reproduce the equivalent of a single precisio |
---|
| 104 | """ |
---|
[5181ccc] | 105 | linear = not xrange.startswith("log") |
---|
[eb2946f] | 106 | if xrange == "zoom": |
---|
[fba9ca0] | 107 | start, stop, steps = 1000, 1010, 2000 |
---|
[eb2946f] | 108 | elif xrange == "neg": |
---|
[fba9ca0] | 109 | start, stop, steps = -100.1, 100.1, 2000 |
---|
[5181ccc] | 110 | elif xrange == "linear": |
---|
[fba9ca0] | 111 | start, stop, steps = 1, 1000, 2000 |
---|
| 112 | start, stop, steps = 0.001, 2, 2000 |
---|
[5181ccc] | 113 | elif xrange == "log": |
---|
[fba9ca0] | 114 | start, stop, steps = -3, 5, 400 |
---|
[5181ccc] | 115 | elif xrange == "logq": |
---|
[fba9ca0] | 116 | start, stop, steps = -4, 1, 400 |
---|
| 117 | elif ':' in xrange: |
---|
| 118 | parts = xrange.split(':') |
---|
| 119 | linear = parts[3] != "log" if len(parts) == 4 else True |
---|
| 120 | steps = int(parts[2]) if len(parts) > 2 else 400 |
---|
| 121 | start = float(parts[0]) |
---|
| 122 | stop = float(parts[1]) |
---|
| 123 | |
---|
[5181ccc] | 124 | else: |
---|
| 125 | raise ValueError("unknown range "+xrange) |
---|
[eb2946f] | 126 | with mp.workprec(500): |
---|
[5181ccc] | 127 | # Note: we make sure that we are comparing apples to apples... |
---|
| 128 | # The x points are set using single precision so that we are |
---|
| 129 | # examining the accuracy of the transformation from x to f(x) |
---|
| 130 | # rather than x to f(nearest(x)) where nearest(x) is the nearest |
---|
| 131 | # value to x in the given precision. |
---|
[eb2946f] | 132 | if linear: |
---|
[fba9ca0] | 133 | start = max(start, self.limits[0]) |
---|
| 134 | stop = min(stop, self.limits[1]) |
---|
| 135 | qrf = np.linspace(start, stop, steps, dtype='single') |
---|
| 136 | #qrf = np.linspace(start, stop, steps, dtype='double') |
---|
[eb2946f] | 137 | qr = [mp.mpf(float(v)) for v in qrf] |
---|
[fba9ca0] | 138 | #qr = mp.linspace(start, stop, steps) |
---|
[eb2946f] | 139 | else: |
---|
[fba9ca0] | 140 | start = np.log10(max(10**start, self.limits[0])) |
---|
| 141 | stop = np.log10(min(10**stop, self.limits[1])) |
---|
| 142 | qrf = np.logspace(start, stop, steps, dtype='single') |
---|
| 143 | #qrf = np.logspace(start, stop, steps, dtype='double') |
---|
[eb2946f] | 144 | qr = [mp.mpf(float(v)) for v in qrf] |
---|
[fba9ca0] | 145 | #qr = [10**v for v in mp.linspace(start, stop, steps)] |
---|
[eb2946f] | 146 | |
---|
| 147 | target = self.call_mpmath(qr, bits=500) |
---|
| 148 | pylab.subplot(121) |
---|
| 149 | self.compare(qr, 'single', target, linear, diff) |
---|
| 150 | pylab.legend(loc='best') |
---|
| 151 | pylab.subplot(122) |
---|
| 152 | self.compare(qr, 'double', target, linear, diff) |
---|
| 153 | pylab.legend(loc='best') |
---|
| 154 | pylab.suptitle(self.name + " compared to 500-bit mpmath") |
---|
| 155 | |
---|
[5181ccc] | 156 | def compare(self, x, precision, target, linear=False, diff="relative"): |
---|
[eb2946f] | 157 | r""" |
---|
| 158 | Compare the different computation methods using the given precision. |
---|
| 159 | """ |
---|
| 160 | if precision == 'single': |
---|
| 161 | #n=11; plotdiff(x, target, self.call_mpmath(x, n), 'mp %d bits'%n, diff=diff) |
---|
| 162 | #n=23; plotdiff(x, target, self.call_mpmath(x, n), 'mp %d bits'%n, diff=diff) |
---|
| 163 | pass |
---|
| 164 | elif precision == 'double': |
---|
| 165 | #n=53; plotdiff(x, target, self.call_mpmath(x, n), 'mp %d bits'%n, diff=diff) |
---|
| 166 | #n=83; plotdiff(x, target, self.call_mpmath(x, n), 'mp %d bits'%n, diff=diff) |
---|
| 167 | pass |
---|
| 168 | plotdiff(x, target, self.call_numpy(x, precision), 'numpy '+precision, diff=diff) |
---|
| 169 | plotdiff(x, target, self.call_ocl(x, precision, 0), 'OpenCL '+precision, diff=diff) |
---|
| 170 | pylab.xlabel(self.xaxis) |
---|
[5181ccc] | 171 | if diff == "relative": |
---|
[eb2946f] | 172 | pylab.ylabel("relative error") |
---|
[5181ccc] | 173 | elif diff == "absolute": |
---|
| 174 | pylab.ylabel("absolute error") |
---|
[eb2946f] | 175 | else: |
---|
| 176 | pylab.ylabel(self.name) |
---|
| 177 | pylab.semilogx(x, target, '-', label="true value") |
---|
| 178 | if linear: |
---|
| 179 | pylab.xscale('linear') |
---|
| 180 | |
---|
[5181ccc] | 181 | def plotdiff(x, target, actual, label, diff): |
---|
[eb2946f] | 182 | """ |
---|
| 183 | Plot the computed value. |
---|
| 184 | |
---|
| 185 | Use relative error if SHOW_DIFF, otherwise just plot the value directly. |
---|
| 186 | """ |
---|
[5181ccc] | 187 | if diff == "relative": |
---|
[fba9ca0] | 188 | err = np.array([(abs((t-a)/t) if t != 0 else a) for t, a in zip(target, actual)], 'd') |
---|
[eb2946f] | 189 | #err = np.clip(err, 0, 1) |
---|
| 190 | pylab.loglog(x, err, '-', label=label) |
---|
[5181ccc] | 191 | elif diff == "absolute": |
---|
| 192 | err = np.array([abs((t-a)) for t, a in zip(target, actual)], 'd') |
---|
| 193 | pylab.loglog(x, err, '-', label=label) |
---|
[eb2946f] | 194 | else: |
---|
| 195 | limits = np.min(target), np.max(target) |
---|
| 196 | pylab.semilogx(x, np.clip(actual, *limits), '-', label=label) |
---|
| 197 | |
---|
| 198 | def make_ocl(function, name, source=[]): |
---|
| 199 | class Kernel(object): |
---|
| 200 | pass |
---|
| 201 | Kernel.__file__ = name+".py" |
---|
| 202 | Kernel.name = name |
---|
| 203 | Kernel.parameters = [] |
---|
| 204 | Kernel.source = source |
---|
| 205 | Kernel.Iq = function |
---|
| 206 | model_info = modelinfo.make_model_info(Kernel) |
---|
| 207 | return model_info |
---|
| 208 | |
---|
[fba9ca0] | 209 | # Hack to allow second parameter A in two parameter functions |
---|
| 210 | A = 1 |
---|
| 211 | def parse_extra_pars(): |
---|
| 212 | global A |
---|
| 213 | |
---|
| 214 | A_str = str(A) |
---|
| 215 | pop = [] |
---|
| 216 | for k, v in enumerate(sys.argv[1:]): |
---|
| 217 | if v.startswith("A="): |
---|
| 218 | A_str = v[2:] |
---|
| 219 | pop.append(k+1) |
---|
| 220 | if pop: |
---|
| 221 | sys.argv = [v for k, v in enumerate(sys.argv) if k not in pop] |
---|
| 222 | A = float(A_str) |
---|
| 223 | |
---|
| 224 | parse_extra_pars() |
---|
| 225 | |
---|
[eb2946f] | 226 | |
---|
| 227 | # =============== FUNCTION DEFINITIONS ================ |
---|
| 228 | |
---|
| 229 | FUNCTIONS = {} |
---|
| 230 | def add_function(name, mp_function, np_function, ocl_function, |
---|
| 231 | shortname=None, xaxis="x", limits=(-inf, inf)): |
---|
| 232 | if shortname is None: |
---|
| 233 | shortname = name.replace('(x)', '').replace(' ', '') |
---|
| 234 | FUNCTIONS[shortname] = Comparator(name, mp_function, np_function, ocl_function, xaxis, limits) |
---|
| 235 | |
---|
| 236 | add_function( |
---|
| 237 | name="J0(x)", |
---|
| 238 | mp_function=mp.j0, |
---|
| 239 | np_function=scipy.special.j0, |
---|
| 240 | ocl_function=make_ocl("return sas_J0(q);", "sas_J0", ["lib/polevl.c", "lib/sas_J0.c"]), |
---|
| 241 | ) |
---|
| 242 | add_function( |
---|
| 243 | name="J1(x)", |
---|
| 244 | mp_function=mp.j1, |
---|
| 245 | np_function=scipy.special.j1, |
---|
| 246 | ocl_function=make_ocl("return sas_J1(q);", "sas_J1", ["lib/polevl.c", "lib/sas_J1.c"]), |
---|
| 247 | ) |
---|
| 248 | add_function( |
---|
| 249 | name="JN(-3, x)", |
---|
| 250 | mp_function=lambda x: mp.besselj(-3, x), |
---|
| 251 | np_function=lambda x: scipy.special.jn(-3, x), |
---|
| 252 | ocl_function=make_ocl("return sas_JN(-3, q);", "sas_JN", |
---|
| 253 | ["lib/polevl.c", "lib/sas_J0.c", "lib/sas_J1.c", "lib/sas_JN.c"]), |
---|
| 254 | shortname="J-3", |
---|
| 255 | ) |
---|
| 256 | add_function( |
---|
| 257 | name="JN(3, x)", |
---|
| 258 | mp_function=lambda x: mp.besselj(3, x), |
---|
| 259 | np_function=lambda x: scipy.special.jn(3, x), |
---|
| 260 | ocl_function=make_ocl("return sas_JN(3, q);", "sas_JN", |
---|
| 261 | ["lib/polevl.c", "lib/sas_J0.c", "lib/sas_J1.c", "lib/sas_JN.c"]), |
---|
| 262 | shortname="J3", |
---|
| 263 | ) |
---|
| 264 | add_function( |
---|
| 265 | name="JN(2, x)", |
---|
| 266 | mp_function=lambda x: mp.besselj(2, x), |
---|
| 267 | np_function=lambda x: scipy.special.jn(2, x), |
---|
| 268 | ocl_function=make_ocl("return sas_JN(2, q);", "sas_JN", |
---|
| 269 | ["lib/polevl.c", "lib/sas_J0.c", "lib/sas_J1.c", "lib/sas_JN.c"]), |
---|
| 270 | shortname="J2", |
---|
| 271 | ) |
---|
| 272 | add_function( |
---|
| 273 | name="2 J1(x)/x", |
---|
| 274 | mp_function=lambda x: 2*mp.j1(x)/x, |
---|
| 275 | np_function=lambda x: 2*scipy.special.j1(x)/x, |
---|
| 276 | ocl_function=make_ocl("return sas_2J1x_x(q);", "sas_2J1x_x", ["lib/polevl.c", "lib/sas_J1.c"]), |
---|
| 277 | ) |
---|
| 278 | add_function( |
---|
| 279 | name="J1(x)", |
---|
| 280 | mp_function=mp.j1, |
---|
| 281 | np_function=scipy.special.j1, |
---|
| 282 | ocl_function=make_ocl("return sas_J1(q);", "sas_J1", ["lib/polevl.c", "lib/sas_J1.c"]), |
---|
| 283 | ) |
---|
| 284 | add_function( |
---|
| 285 | name="Si(x)", |
---|
| 286 | mp_function=mp.si, |
---|
| 287 | np_function=lambda x: scipy.special.sici(x)[0], |
---|
| 288 | ocl_function=make_ocl("return sas_Si(q);", "sas_Si", ["lib/sas_Si.c"]), |
---|
| 289 | ) |
---|
| 290 | #import fnlib |
---|
| 291 | #add_function( |
---|
| 292 | # name="fnlibJ1", |
---|
| 293 | # mp_function=mp.j1, |
---|
| 294 | # np_function=fnlib.J1, |
---|
| 295 | # ocl_function=make_ocl("return sas_J1(q);", "sas_J1", ["lib/polevl.c", "lib/sas_J1.c"]), |
---|
| 296 | #) |
---|
| 297 | add_function( |
---|
| 298 | name="sin(x)", |
---|
| 299 | mp_function=mp.sin, |
---|
| 300 | np_function=np.sin, |
---|
| 301 | #ocl_function=make_ocl("double sn, cn; SINCOS(q,sn,cn); return sn;", "sas_sin"), |
---|
| 302 | ocl_function=make_ocl("return sin(q);", "sas_sin"), |
---|
| 303 | ) |
---|
| 304 | add_function( |
---|
| 305 | name="sin(x)/x", |
---|
| 306 | mp_function=lambda x: mp.sin(x)/x if x != 0 else 1, |
---|
| 307 | ## scipy sinc function is inaccurate and has an implied pi*x term |
---|
| 308 | #np_function=lambda x: scipy.special.sinc(x/pi), |
---|
| 309 | ## numpy sin(x)/x needs to check for x=0 |
---|
| 310 | np_function=lambda x: np.sin(x)/x, |
---|
| 311 | ocl_function=make_ocl("return sas_sinx_x(q);", "sas_sinc"), |
---|
| 312 | ) |
---|
| 313 | add_function( |
---|
| 314 | name="cos(x)", |
---|
| 315 | mp_function=mp.cos, |
---|
| 316 | np_function=np.cos, |
---|
| 317 | #ocl_function=make_ocl("double sn, cn; SINCOS(q,sn,cn); return cn;", "sas_cos"), |
---|
| 318 | ocl_function=make_ocl("return cos(q);", "sas_cos"), |
---|
| 319 | ) |
---|
| 320 | add_function( |
---|
| 321 | name="gamma(x)", |
---|
| 322 | mp_function=mp.gamma, |
---|
| 323 | np_function=scipy.special.gamma, |
---|
| 324 | ocl_function=make_ocl("return sas_gamma(q);", "sas_gamma", ["lib/sas_gamma.c"]), |
---|
[487e695] | 325 | limits=(-3.1, 10), |
---|
[eb2946f] | 326 | ) |
---|
| 327 | add_function( |
---|
[fba9ca0] | 328 | name="gammaln(x)", |
---|
| 329 | mp_function=mp.loggamma, |
---|
| 330 | np_function=scipy.special.gammaln, |
---|
| 331 | ocl_function=make_ocl("return sas_gammaln(q);", "sas_gammaln", ["lib/sas_gammainc.c"]), |
---|
| 332 | #ocl_function=make_ocl("return lgamma(q);", "sas_gammaln"), |
---|
| 333 | ) |
---|
| 334 | add_function( |
---|
| 335 | name="gammainc(x)", |
---|
| 336 | mp_function=lambda x, a=A: mp.gammainc(a, a=0, b=x)/mp.gamma(a), |
---|
| 337 | np_function=lambda x, a=A: scipy.special.gammainc(a, x), |
---|
| 338 | ocl_function=make_ocl("return sas_gammainc(%.15g,q);"%A, "sas_gammainc", ["lib/sas_gammainc.c"]), |
---|
| 339 | ) |
---|
| 340 | add_function( |
---|
| 341 | name="gammaincc(x)", |
---|
| 342 | mp_function=lambda x, a=A: mp.gammainc(a, a=x, b=mp.inf)/mp.gamma(a), |
---|
| 343 | np_function=lambda x, a=A: scipy.special.gammaincc(a, x), |
---|
| 344 | ocl_function=make_ocl("return sas_gammaincc(%.15g,q);"%A, "sas_gammaincc", ["lib/sas_gammainc.c"]), |
---|
| 345 | ) |
---|
| 346 | add_function( |
---|
[eb2946f] | 347 | name="erf(x)", |
---|
| 348 | mp_function=mp.erf, |
---|
| 349 | np_function=scipy.special.erf, |
---|
| 350 | ocl_function=make_ocl("return sas_erf(q);", "sas_erf", ["lib/polevl.c", "lib/sas_erf.c"]), |
---|
[487e695] | 351 | limits=(-5., 5.), |
---|
[eb2946f] | 352 | ) |
---|
| 353 | add_function( |
---|
| 354 | name="erfc(x)", |
---|
| 355 | mp_function=mp.erfc, |
---|
| 356 | np_function=scipy.special.erfc, |
---|
| 357 | ocl_function=make_ocl("return sas_erfc(q);", "sas_erfc", ["lib/polevl.c", "lib/sas_erf.c"]), |
---|
[487e695] | 358 | limits=(-5., 5.), |
---|
[eb2946f] | 359 | ) |
---|
| 360 | add_function( |
---|
[2a602c7] | 361 | name="expm1(x)", |
---|
| 362 | mp_function=mp.expm1, |
---|
| 363 | np_function=np.expm1, |
---|
| 364 | ocl_function=make_ocl("return expm1(q);", "sas_expm1"), |
---|
| 365 | limits=(-5., 5.), |
---|
| 366 | ) |
---|
| 367 | add_function( |
---|
[eb2946f] | 368 | name="arctan(x)", |
---|
| 369 | mp_function=mp.atan, |
---|
| 370 | np_function=np.arctan, |
---|
| 371 | ocl_function=make_ocl("return atan(q);", "sas_arctan"), |
---|
| 372 | ) |
---|
| 373 | add_function( |
---|
| 374 | name="3 j1(x)/x", |
---|
| 375 | mp_function=lambda x: 3*(mp.sin(x)/x - mp.cos(x))/(x*x), |
---|
| 376 | # Note: no taylor expansion near 0 |
---|
| 377 | np_function=lambda x: 3*(np.sin(x)/x - np.cos(x))/(x*x), |
---|
| 378 | ocl_function=make_ocl("return sas_3j1x_x(q);", "sas_j1c", ["lib/sas_3j1x_x.c"]), |
---|
| 379 | ) |
---|
| 380 | add_function( |
---|
[487e695] | 381 | name="(1-cos(x))/x^2", |
---|
| 382 | mp_function=lambda x: (1 - mp.cos(x))/(x*x), |
---|
| 383 | np_function=lambda x: (1 - np.cos(x))/(x*x), |
---|
| 384 | ocl_function=make_ocl("return (1-cos(q))/q/q;", "sas_1mcosx_x2"), |
---|
| 385 | ) |
---|
| 386 | add_function( |
---|
| 387 | name="(1-sin(x)/x)/x", |
---|
| 388 | mp_function=lambda x: 1/x - mp.sin(x)/(x*x), |
---|
| 389 | np_function=lambda x: 1/x - np.sin(x)/(x*x), |
---|
| 390 | ocl_function=make_ocl("return (1-sas_sinx_x(q))/q;", "sas_1msinx_x_x"), |
---|
| 391 | ) |
---|
| 392 | add_function( |
---|
[2a7e20e] | 393 | name="(1/2-sin(x)/x+(1-cos(x))/x^2)/x", |
---|
[487e695] | 394 | mp_function=lambda x: (0.5 - mp.sin(x)/x + (1-mp.cos(x))/(x*x))/x, |
---|
| 395 | np_function=lambda x: (0.5 - np.sin(x)/x + (1-np.cos(x))/(x*x))/x, |
---|
| 396 | ocl_function=make_ocl("return (0.5-sin(q)/q + (1-cos(q))/q/q)/q;", "sas_T2"), |
---|
| 397 | ) |
---|
| 398 | add_function( |
---|
[eb2946f] | 399 | name="fmod_2pi", |
---|
| 400 | mp_function=lambda x: mp.fmod(x, 2*mp.pi), |
---|
| 401 | np_function=lambda x: np.fmod(x, 2*np.pi), |
---|
| 402 | ocl_function=make_ocl("return fmod(q, 2*M_PI);", "sas_fmod"), |
---|
| 403 | ) |
---|
[6e72989] | 404 | add_function( |
---|
[ee60aa7] | 405 | name="gauss_coil", |
---|
[6e72989] | 406 | mp_function=lambda x: 2*(mp.exp(-x**2) + x**2 - 1)/x**4, |
---|
[237c9cf] | 407 | np_function=lambda x: 2*(np.expm1(-x**2) + x**2)/x**4, |
---|
[6e72989] | 408 | ocl_function=make_ocl(""" |
---|
| 409 | const double qsq = q*q; |
---|
[ee60aa7] | 410 | // For double: use O(5) Pade with 0.5 cutoff (10 mad + 1 divide) |
---|
| 411 | // For single: use O(7) Taylor with 0.8 cutoff (7 mad) |
---|
| 412 | if (qsq < 0.0) { |
---|
[3a220e6] | 413 | const double x = qsq; |
---|
[237c9cf] | 414 | if (0) { // 0.36 single |
---|
[3a220e6] | 415 | // PadeApproximant[2*Exp[-x^2] + x^2-1)/x^4, {x, 0, 4}] |
---|
| 416 | return (x*x/180. + 1.)/((1./30.*x + 1./3.)*x + 1); |
---|
[237c9cf] | 417 | } else if (0) { // 1.0 for single |
---|
[3a220e6] | 418 | // padeapproximant[2*exp[-x^2] + x^2-1)/x^4, {x, 0, 6}] |
---|
| 419 | const double A1=1./24., A2=1./84, A3=-1./3360; |
---|
| 420 | const double B1=3./8., B2=3./56., B3=1./336.; |
---|
| 421 | return (((A3*x + A2)*x + A1)*x + 1.)/(((B3*x + B2)*x + B1)*x + 1.); |
---|
[ee60aa7] | 422 | } else if (0) { // 1.0 for single, 0.25 for double |
---|
[3a220e6] | 423 | // PadeApproximant[2*Exp[-x^2] + x^2-1)/x^4, {x, 0, 8}] |
---|
| 424 | const double A1=1./15., A2=1./60, A3=0., A4=1./75600.; |
---|
| 425 | const double B1=2./5., B2=1./15., B3=1./180., B4=1./5040.; |
---|
| 426 | return ((((A4*x + A3)*x + A2)*x + A1)*x + 1.) |
---|
| 427 | /((((B4*x + B3)*x + B2)*x + B1)*x + 1.); |
---|
[237c9cf] | 428 | } else { // 1.0 for single, 0.5 for double |
---|
[3a220e6] | 429 | // PadeApproximant[2*Exp[-x^2] + x^2-1)/x^4, {x, 0, 8}] |
---|
| 430 | const double A1=1./12., A2=2./99., A3=1./2640., A4=1./23760., A5=-1./1995840.; |
---|
| 431 | const double B1=5./12., B2=5./66., B3=1./132., B4=1./2376., B5=1./95040.; |
---|
| 432 | return (((((A5*x + A4)*x + A3)*x + A2)*x + A1)*x + 1.) |
---|
| 433 | /(((((B5*x + B4)*x + B3)*x + B2)*x + B1)*x + 1.); |
---|
| 434 | } |
---|
[ee60aa7] | 435 | } else if (qsq < 0.8) { |
---|
[6e72989] | 436 | const double x = qsq; |
---|
| 437 | const double C0 = +1.; |
---|
| 438 | const double C1 = -1./3.; |
---|
| 439 | const double C2 = +1./12.; |
---|
| 440 | const double C3 = -1./60.; |
---|
| 441 | const double C4 = +1./360.; |
---|
| 442 | const double C5 = -1./2520.; |
---|
| 443 | const double C6 = +1./20160.; |
---|
| 444 | const double C7 = -1./181440.; |
---|
| 445 | //return ((((C5*x + C4)*x + C3)*x + C2)*x + C1)*x + C0; |
---|
[3a220e6] | 446 | //return (((((C6*x + C5)*x + C4)*x + C3)*x + C2)*x + C1)*x + C0; |
---|
| 447 | return ((((((C7*x + C6)*x + C5)*x + C4)*x + C3)*x + C2)*x + C1)*x + C0; |
---|
| 448 | } else { |
---|
[237c9cf] | 449 | return 2.*(expm1(-qsq) + qsq)/(qsq*qsq); |
---|
[6e72989] | 450 | } |
---|
| 451 | """, "sas_debye"), |
---|
| 452 | ) |
---|
[eb2946f] | 453 | |
---|
| 454 | RADIUS=3000 |
---|
| 455 | LENGTH=30 |
---|
| 456 | THETA=45 |
---|
| 457 | def mp_cyl(x): |
---|
| 458 | f = mp.mpf |
---|
| 459 | theta = f(THETA)*mp.pi/f(180) |
---|
| 460 | qr = x * f(RADIUS)*mp.sin(theta) |
---|
| 461 | qh = x * f(LENGTH)/f(2)*mp.cos(theta) |
---|
[5181ccc] | 462 | be = f(2)*mp.j1(qr)/qr |
---|
| 463 | si = mp.sin(qh)/qh |
---|
| 464 | background = f(0) |
---|
| 465 | #background = f(1)/f(1000) |
---|
| 466 | volume = mp.pi*f(RADIUS)**f(2)*f(LENGTH) |
---|
| 467 | contrast = f(5) |
---|
| 468 | units = f(1)/f(10000) |
---|
| 469 | #return be |
---|
| 470 | #return si |
---|
| 471 | return units*(volume*contrast*be*si)**f(2)/volume + background |
---|
[eb2946f] | 472 | def np_cyl(x): |
---|
| 473 | f = np.float64 if x.dtype == np.float64 else np.float32 |
---|
| 474 | theta = f(THETA)*f(np.pi)/f(180) |
---|
| 475 | qr = x * f(RADIUS)*np.sin(theta) |
---|
| 476 | qh = x * f(LENGTH)/f(2)*np.cos(theta) |
---|
[5181ccc] | 477 | be = f(2)*scipy.special.j1(qr)/qr |
---|
| 478 | si = np.sin(qh)/qh |
---|
| 479 | background = f(0) |
---|
| 480 | #background = f(1)/f(1000) |
---|
| 481 | volume = f(np.pi)*f(RADIUS)**2*f(LENGTH) |
---|
| 482 | contrast = f(5) |
---|
| 483 | units = f(1)/f(10000) |
---|
| 484 | #return be |
---|
| 485 | #return si |
---|
| 486 | return units*(volume*contrast*be*si)**f(2)/volume + background |
---|
[eb2946f] | 487 | ocl_cyl = """\ |
---|
| 488 | double THETA = %(THETA).15e*M_PI_180; |
---|
| 489 | double qr = q*%(RADIUS).15e*sin(THETA); |
---|
| 490 | double qh = q*0.5*%(LENGTH).15e*cos(THETA); |
---|
[5181ccc] | 491 | double be = sas_2J1x_x(qr); |
---|
| 492 | double si = sas_sinx_x(qh); |
---|
| 493 | double background = 0; |
---|
| 494 | //double background = 0.001; |
---|
| 495 | double volume = M_PI*square(%(RADIUS).15e)*%(LENGTH).15e; |
---|
| 496 | double contrast = 5.0; |
---|
| 497 | double units = 1e-4; |
---|
| 498 | //return be; |
---|
| 499 | //return si; |
---|
| 500 | return units*square(volume*contrast*be*si)/volume + background; |
---|
[eb2946f] | 501 | """%{"LENGTH":LENGTH, "RADIUS": RADIUS, "THETA": THETA} |
---|
| 502 | add_function( |
---|
| 503 | name="cylinder(r=%g, l=%g, theta=%g)"%(RADIUS, LENGTH, THETA), |
---|
| 504 | mp_function=mp_cyl, |
---|
| 505 | np_function=np_cyl, |
---|
| 506 | ocl_function=make_ocl(ocl_cyl, "ocl_cyl", ["lib/polevl.c", "lib/sas_J1.c"]), |
---|
| 507 | shortname="cylinder", |
---|
| 508 | xaxis="$q/A^{-1}$", |
---|
| 509 | ) |
---|
| 510 | |
---|
| 511 | lanczos_gamma = """\ |
---|
| 512 | const double coeff[] = { |
---|
[fba9ca0] | 513 | 76.18009172947146, -86.50532032941677, |
---|
| 514 | 24.01409824083091, -1.231739572450155, |
---|
[eb2946f] | 515 | 0.1208650973866179e-2,-0.5395239384953e-5 |
---|
| 516 | }; |
---|
| 517 | const double x = q; |
---|
| 518 | double tmp = x + 5.5; |
---|
| 519 | tmp -= (x + 0.5)*log(tmp); |
---|
| 520 | double ser = 1.000000000190015; |
---|
| 521 | for (int k=0; k < 6; k++) ser += coeff[k]/(x + k+1); |
---|
| 522 | return -tmp + log(2.5066282746310005*ser/x); |
---|
| 523 | """ |
---|
| 524 | add_function( |
---|
[fba9ca0] | 525 | name="loggamma(x)", |
---|
[eb2946f] | 526 | mp_function=mp.loggamma, |
---|
| 527 | np_function=scipy.special.gammaln, |
---|
| 528 | ocl_function=make_ocl(lanczos_gamma, "lgamma"), |
---|
| 529 | ) |
---|
| 530 | |
---|
[2a602c7] | 531 | replacement_expm1 = """\ |
---|
| 532 | double x = (double)q; // go back to float for single precision kernels |
---|
| 533 | // Adapted from the cephes math library. |
---|
| 534 | // Copyright 1984 - 1992 by Stephen L. Moshier |
---|
| 535 | if (x != x || x == 0.0) { |
---|
| 536 | return x; // NaN and +/- 0 |
---|
| 537 | } else if (x < -0.5 || x > 0.5) { |
---|
| 538 | return exp(x) - 1.0; |
---|
| 539 | } else { |
---|
| 540 | const double xsq = x*x; |
---|
| 541 | const double p = ((( |
---|
| 542 | +1.2617719307481059087798E-4)*xsq |
---|
| 543 | +3.0299440770744196129956E-2)*xsq |
---|
| 544 | +9.9999999999999999991025E-1); |
---|
| 545 | const double q = (((( |
---|
| 546 | +3.0019850513866445504159E-6)*xsq |
---|
| 547 | +2.5244834034968410419224E-3)*xsq |
---|
| 548 | +2.2726554820815502876593E-1)*xsq |
---|
| 549 | +2.0000000000000000000897E0); |
---|
| 550 | double r = x * p; |
---|
| 551 | r = r / (q - r); |
---|
| 552 | return r+r; |
---|
| 553 | } |
---|
| 554 | """ |
---|
| 555 | add_function( |
---|
| 556 | name="sas_expm1(x)", |
---|
| 557 | mp_function=mp.expm1, |
---|
| 558 | np_function=np.expm1, |
---|
| 559 | ocl_function=make_ocl(replacement_expm1, "sas_expm1"), |
---|
| 560 | ) |
---|
| 561 | |
---|
[eb2946f] | 562 | # Alternate versions of 3 j1(x)/x, for posterity |
---|
| 563 | def taylor_3j1x_x(x): |
---|
| 564 | """ |
---|
| 565 | Calculation using taylor series. |
---|
| 566 | """ |
---|
| 567 | # Generate coefficients using the precision of the target value. |
---|
| 568 | n = 5 |
---|
| 569 | cinv = [3991680, -45360, 840, -30, 3] |
---|
| 570 | three = x.dtype.type(3) |
---|
| 571 | p = three/np.array(cinv, x.dtype) |
---|
| 572 | return np.polyval(p[-n:], x*x) |
---|
| 573 | add_function( |
---|
| 574 | name="3 j1(x)/x: taylor", |
---|
| 575 | mp_function=lambda x: 3*(mp.sin(x)/x - mp.cos(x))/(x*x), |
---|
| 576 | np_function=taylor_3j1x_x, |
---|
| 577 | ocl_function=make_ocl("return sas_3j1x_x(q);", "sas_j1c", ["lib/sas_3j1x_x.c"]), |
---|
| 578 | ) |
---|
| 579 | def trig_3j1x_x(x): |
---|
| 580 | r""" |
---|
| 581 | Direct calculation using linear combination of sin/cos. |
---|
| 582 | |
---|
| 583 | Use the following trig identity: |
---|
| 584 | |
---|
| 585 | .. math:: |
---|
| 586 | |
---|
| 587 | a \sin(x) + b \cos(x) = c \sin(x + \phi) |
---|
| 588 | |
---|
| 589 | where $c = \surd(a^2+b^2)$ and $\phi = \tan^{-1}(b/a) to calculate the |
---|
| 590 | numerator $\sin(x) - x\cos(x)$. |
---|
| 591 | """ |
---|
| 592 | one = x.dtype.type(1) |
---|
| 593 | three = x.dtype.type(3) |
---|
| 594 | c = np.sqrt(one + x*x) |
---|
| 595 | phi = np.arctan2(-x, one) |
---|
| 596 | return three*(c*np.sin(x+phi))/(x*x*x) |
---|
| 597 | add_function( |
---|
| 598 | name="3 j1(x)/x: trig", |
---|
| 599 | mp_function=lambda x: 3*(mp.sin(x)/x - mp.cos(x))/(x*x), |
---|
| 600 | np_function=trig_3j1x_x, |
---|
| 601 | ocl_function=make_ocl("return sas_3j1x_x(q);", "sas_j1c", ["lib/sas_3j1x_x.c"]), |
---|
| 602 | ) |
---|
| 603 | def np_2J1x_x(x): |
---|
| 604 | """ |
---|
| 605 | numpy implementation of 2J1(x)/x using single precision algorithm |
---|
| 606 | """ |
---|
| 607 | # pylint: disable=bad-continuation |
---|
| 608 | f = x.dtype.type |
---|
| 609 | ax = abs(x) |
---|
| 610 | if ax < f(8.0): |
---|
| 611 | y = x*x |
---|
| 612 | ans1 = f(2)*(f(72362614232.0) |
---|
| 613 | + y*(f(-7895059235.0) |
---|
| 614 | + y*(f(242396853.1) |
---|
| 615 | + y*(f(-2972611.439) |
---|
| 616 | + y*(f(15704.48260) |
---|
| 617 | + y*(f(-30.16036606))))))) |
---|
| 618 | ans2 = (f(144725228442.0) |
---|
| 619 | + y*(f(2300535178.0) |
---|
| 620 | + y*(f(18583304.74) |
---|
| 621 | + y*(f(99447.43394) |
---|
| 622 | + y*(f(376.9991397) |
---|
| 623 | + y))))) |
---|
| 624 | return ans1/ans2 |
---|
| 625 | else: |
---|
| 626 | y = f(64.0)/(ax*ax) |
---|
| 627 | xx = ax - f(2.356194491) |
---|
| 628 | ans1 = (f(1.0) |
---|
| 629 | + y*(f(0.183105e-2) |
---|
| 630 | + y*(f(-0.3516396496e-4) |
---|
| 631 | + y*(f(0.2457520174e-5) |
---|
| 632 | + y*f(-0.240337019e-6))))) |
---|
| 633 | ans2 = (f(0.04687499995) |
---|
| 634 | + y*(f(-0.2002690873e-3) |
---|
| 635 | + y*(f(0.8449199096e-5) |
---|
| 636 | + y*(f(-0.88228987e-6) |
---|
| 637 | + y*f(0.105787412e-6))))) |
---|
| 638 | sn, cn = np.sin(xx), np.cos(xx) |
---|
| 639 | ans = np.sqrt(f(0.636619772)/ax) * (cn*ans1 - (f(8.0)/ax)*sn*ans2) * f(2)/x |
---|
| 640 | return -ans if (x < f(0.0)) else ans |
---|
| 641 | add_function( |
---|
| 642 | name="2 J1(x)/x:alt", |
---|
| 643 | mp_function=lambda x: 2*mp.j1(x)/x, |
---|
| 644 | np_function=lambda x: np.asarray([np_2J1x_x(v) for v in x], x.dtype), |
---|
| 645 | ocl_function=make_ocl("return sas_2J1x_x(q);", "sas_2J1x_x", ["lib/polevl.c", "lib/sas_J1.c"]), |
---|
| 646 | ) |
---|
| 647 | |
---|
| 648 | ALL_FUNCTIONS = set(FUNCTIONS.keys()) |
---|
[fba9ca0] | 649 | ALL_FUNCTIONS.discard("loggamma") # use cephes-based gammaln instead |
---|
[eb2946f] | 650 | ALL_FUNCTIONS.discard("3j1/x:taylor") |
---|
| 651 | ALL_FUNCTIONS.discard("3j1/x:trig") |
---|
| 652 | ALL_FUNCTIONS.discard("2J1/x:alt") |
---|
| 653 | |
---|
| 654 | # =============== MAIN PROGRAM ================ |
---|
| 655 | |
---|
| 656 | def usage(): |
---|
| 657 | names = ", ".join(sorted(ALL_FUNCTIONS)) |
---|
| 658 | print("""\ |
---|
[2a7e20e] | 659 | usage: precision.py [-f/a/r] [-x<range>] "name" ... |
---|
[eb2946f] | 660 | where |
---|
[5181ccc] | 661 | -f indicates that the function value should be plotted, |
---|
| 662 | -a indicates that the absolute error should be plotted, |
---|
| 663 | -r indicates that the relative error should be plotted (default), |
---|
| 664 | -x<range> indicates the steps in x, where <range> is one of the following |
---|
[fba9ca0] | 665 | log indicates log stepping in [10^-3, 10^5] (default) |
---|
| 666 | logq indicates log stepping in [10^-4, 10^1] |
---|
| 667 | linear indicates linear stepping in [1, 1000] |
---|
| 668 | zoom indicates linear stepping in [1000, 1010] |
---|
| 669 | neg indicates linear stepping in [-100.1, 100.1] |
---|
| 670 | start:stop:n[:stepping] indicates an n-step plot in [start, stop] |
---|
| 671 | or [10^start, 10^stop] if stepping is "log" (default n=400) |
---|
| 672 | Some functions (notably gammainc/gammaincc) have an additional parameter A |
---|
| 673 | which can be set from the command line as A=value. Default is A=1. |
---|
| 674 | |
---|
| 675 | Name is one of: |
---|
[eb2946f] | 676 | """+names) |
---|
| 677 | sys.exit(1) |
---|
| 678 | |
---|
| 679 | def main(): |
---|
| 680 | import sys |
---|
[5181ccc] | 681 | diff = "relative" |
---|
[eb2946f] | 682 | xrange = "log" |
---|
[5181ccc] | 683 | options = [v for v in sys.argv[1:] if v.startswith('-')] |
---|
| 684 | for opt in options: |
---|
| 685 | if opt == '-f': |
---|
| 686 | diff = "none" |
---|
| 687 | elif opt == '-r': |
---|
| 688 | diff = "relative" |
---|
| 689 | elif opt == '-a': |
---|
| 690 | diff = "absolute" |
---|
| 691 | elif opt.startswith('-x'): |
---|
| 692 | xrange = opt[2:] |
---|
| 693 | else: |
---|
| 694 | usage() |
---|
| 695 | |
---|
| 696 | names = [v for v in sys.argv[1:] if not v.startswith('-')] |
---|
| 697 | if not names: |
---|
[eb2946f] | 698 | usage() |
---|
[5181ccc] | 699 | |
---|
| 700 | if names[0] == "all": |
---|
| 701 | cutoff = names[1] if len(names) > 1 else "" |
---|
| 702 | names = list(sorted(ALL_FUNCTIONS)) |
---|
| 703 | names = [k for k in names if k >= cutoff] |
---|
| 704 | if any(k not in FUNCTIONS for k in names): |
---|
[eb2946f] | 705 | usage() |
---|
[5181ccc] | 706 | multiple = len(names) > 1 |
---|
[eb2946f] | 707 | pylab.interactive(multiple) |
---|
[5181ccc] | 708 | for k in names: |
---|
[eb2946f] | 709 | pylab.clf() |
---|
| 710 | comparator = FUNCTIONS[k] |
---|
| 711 | comparator.run(xrange=xrange, diff=diff) |
---|
| 712 | if multiple: |
---|
| 713 | raw_input() |
---|
| 714 | if not multiple: |
---|
| 715 | pylab.show() |
---|
| 716 | |
---|
| 717 | if __name__ == "__main__": |
---|
| 718 | main() |
---|