Changeset b3796fa in sasmodels
- Timestamp:
- Aug 4, 2016 10:11:51 AM (8 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 38ce0ab
- Parents:
- 4fd2c63
- Location:
- sasmodels
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel_header.c
redf06e1 rb3796fa 15 15 # endif 16 16 // OpenCL only has type generic math 17 #define expf exp 17 18 #define erff erf 18 19 #define erfcf erfc 20 // Intel CPU on Mac gives strange values for erf(), so maybe don't use it 21 //#define NEED_ERF 19 22 #else // !USE_OPENCL 20 23 // Use SAS_DOUBLE to force the use of double even for float kernels … … 123 126 # define M_E 2.718281828459045091 124 127 #endif 128 #ifndef M_SQRT1_2 129 # define M_SQRT1_2 0.70710678118654746 130 #endif 125 131 126 132 // Non-standard function library -
sasmodels/models/lib/sas_erf.c
redf06e1 rb3796fa 89 89 90 90 #if FLOAT_SIZE>4 // DOUBLE_PRECISION 91 double erf(double x); 92 double erfc(double a); 91 92 double cephes_erf(double x); 93 double cephes_erfc(double a); 93 94 94 95 constant double PD[] = { … … 152 153 }; 153 154 154 double erfc(double a)155 double cephes_erfc(double a) 155 156 { 156 157 double MAXLOG = 88.72283905206835; … … 158 159 159 160 160 /*if (a < 0.0)161 x = -a;162 else163 x = a;*/164 165 161 x = fabs(a); 166 162 167 168 163 if (x < 1.0) { 169 // The line bellow is a troublemaker for GPU, so sas_erf function170 // is explicit here for the case < 1.0171 //return (1.0 - sas_erf(a));164 // The line below causes problems on the GPU, so inline 165 // the erf function instead and z < 1.0. 166 //return (1.0 - cephes_erf(a)); 172 167 z = x * x; 173 168 y = x * polevl(z, TD, 4) / p1evl(z, UD, 5); … … 211 206 212 207 213 double erf(double x)208 double cephes_erf(double x) 214 209 { 215 210 double y, z; 216 211 217 212 if (fabs(x) > 1.0) 218 return (1.0 - erfc(x));213 return (1.0 - cephes_erfc(x)); 219 214 220 215 z = x * x; 221 #if FLOAT_SIZE>4 222 y = x * polevl(z, TD, 4) / p1evl(z, UD, 5); 223 #else 224 y = x * polevl( z, TF, 6 ); 225 #endif 216 y = x * polevl(z, TD, 4) / p1evl(z, UD, 5); 226 217 227 218 return y; … … 230 221 #else // SINGLE PRECISION 231 222 232 double erff(doublex);233 double erfcf(doublea);223 float cephes_erff(float x); 224 float cephes_erfcf(float a); 234 225 235 226 /* erfc(x) = exp(-x^2) P(1/x), 1 < x < 2 */ 236 constant doublePF[] = {227 constant float PF[] = { 237 228 2.326819970068386E-002, 238 229 -1.387039388740657E-001, … … 247 238 248 239 /* erfc(x) = exp(-x^2) 1/x P(1/x^2), 2 < x < 14 */ 249 constant doubleRF[] = {240 constant float RF[] = { 250 241 -1.047766399936249E+001, 251 242 1.297719955372516E+001, … … 259 250 260 251 /* erf(x) = x P(x^2), 0 < x < 1 */ 261 constant doubleTF[] = {252 constant float TF[] = { 262 253 7.853861353153693E-005, 263 254 -8.010193625184903E-004, … … 270 261 271 262 272 float erfcf(float a)263 float cephes_erfcf(float a) 273 264 { 274 265 float MAXLOG = 88.72283905206835; … … 327 318 328 319 329 float erff(float x)320 float cephes_erff(float x) 330 321 { 331 322 float y, z; … … 333 324 // TODO: tinycc does not support fabsf 334 325 if (fabs(x) > 1.0) 335 return (1.0 - erfcf(x));326 return (1.0 - cephes_erfcf(x)); 336 327 337 328 z = x * x; … … 342 333 343 334 #endif // SINGLE_PRECISION 344 #endif // NEED_ERF345 335 346 336 #if FLOAT_SIZE>4 337 //static double sas_erf(double x) { return erf(x); } 338 //static double sas_erfc(double x) { return erfc(x); } 339 #define sas_erf cephes_erf 340 #define sas_erfc cephes_erfc 341 #else 342 #define sas_erf cephes_erff 343 #define sas_erfc cephes_erfcf 344 #endif 345 346 #else // !NEED_ERF 347 348 #if FLOAT_SIZE>4 349 //static double sas_erf(double x) { return erf(x); } 350 //static double sas_erfc(double x) { return erfc(x); } 347 351 #define sas_erf erf 348 352 #define sas_erfc erfc … … 351 355 #define sas_erfc erfcf 352 356 #endif 357 #endif // !NEED_ERF
Note: See TracChangeset
for help on using the changeset viewer.