source: sasmodels/sasmodels/kernel_header.c @ 0db7dbd

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 0db7dbd was 0db7dbd, checked in by pkienzle, 6 years ago

cuda support: allow cylinder model to run under CUDA as well as OpenCL

  • Property mode set to 100644
File size: 9.3 KB
Line 
1#ifdef __OPENCL_VERSION__
2# define USE_OPENCL
3#elif defined(__CUDACC__)
4# define USE_CUDA
5#elif defined(_OPENMP)
6# define USE_OPENMP
7#endif
8
9// If opencl is not available, then we are compiling a C function
10// Note: if using a C++ compiler, then define kernel as extern "C"
11#ifdef USE_OPENCL
12
13   #define USE_GPU
14   typedef int int32_t;
15   #define global_par global
16   #define local_par local
17   #define constant_par constant
18   #define global_var global
19   #define local_var local
20   #define constant_var constant
21   #define __device__
22
23   #if defined(USE_SINCOS)
24   #  define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
25   #else
26   #  define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
27   #endif
28   // Intel CPU on Mac gives strange values for erf(); on the verified
29   // platforms (intel, nvidia, amd), the cephes erf() is significantly
30   // faster than that available in the native OpenCL.
31   #define NEED_ERF
32   // OpenCL only has type generic math
33   #define expf exp
34   #ifndef NEED_ERF
35   #  define erff erf
36   #  define erfcf erfc
37   #endif
38
39#elif defined(USE_CUDA)
40
41   #define USE_GPU
42   #define global_par
43   #define local_par
44   #define constant_par const
45   #define global_var
46   #define local_var __shared__
47   #define constant_var __constant__
48
49   #define kernel extern "C" __global__
50
51   // OpenCL powr(a,b) = C99 pow(a,b), b >= 0
52   // OpenCL pown(a,b) = C99 pow(a,b), b integer
53   #define powr(a,b) pow(a,b)
54   #define pown(a,b) pow(a,b)
55   //typedef int int32_t;
56   #if defined(USE_SINCOS)
57   #  define SINCOS(angle,svar,cvar) sincos(angle,&svar,&cvar)
58   #else
59   #  define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
60   #endif
61
62#else // !USE_OPENCL && !USE_CUDA
63
64   #define global_par
65   #define local_par
66   #define constant_par const
67   #define global_var
68   #define local_var
69   #define constant_var const
70   #define __device__
71
72   #ifdef __cplusplus
73      #include <cstdio>
74      #include <cmath>
75      using namespace std;
76      #if defined(_MSC_VER)
77         #include <limits>
78         #include <float.h>
79         #define kernel extern "C" __declspec( dllexport )
80         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
81         inline double fmin(double x, double y) { return x>y ? y : x; }
82         inline double fmax(double x, double y) { return x<y ? y : x; }
83         #define isnan(x) _isnan(x)
84         #define isinf(x) (!_finite(x))
85         #define isfinite(x) _finite(x)
86         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
87         #define INFINITY (std::numeric_limits<double>::infinity())
88         #define NEED_ERF
89         #define NEED_EXPM1
90         #define NEED_TGAMMA
91     #else
92         #define kernel extern "C"
93         #include <cstdint>
94     #endif
95     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
96   #else // !__cplusplus
97     #include <inttypes.h>  // C99 guarantees that int32_t types is here
98     #include <stdio.h>
99     #if defined(__TINYC__)
100         typedef int int32_t;
101         #include <math.h>
102         // TODO: check isnan is correct
103         inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away!
104         #undef isnan
105         #define isnan(x) _isnan(x)
106         // Defeat the double->float conversion since we don't have tgmath
107         inline SAS_DOUBLE trunc(SAS_DOUBLE x) { return x>=0?floor(x):-floor(-x); }
108         inline SAS_DOUBLE fmin(SAS_DOUBLE x, SAS_DOUBLE y) { return x>y ? y : x; }
109         inline SAS_DOUBLE fmax(SAS_DOUBLE x, SAS_DOUBLE y) { return x<y ? y : x; }
110         #define NEED_ERF
111         #define NEED_EXPM1
112         #define NEED_TGAMMA
113         // expf missing from windows?
114         #define expf exp
115     #else
116         #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
117     #endif
118     // MSVC doesn't support C99, so no need for dllexport on C99 branch
119     #define kernel
120     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
121   #endif  // !__cplusplus
122   // OpenCL powr(a,b) = C99 pow(a,b), b >= 0
123   // OpenCL pown(a,b) = C99 pow(a,b), b integer
124   #define powr(a,b) pow(a,b)
125   #define pown(a,b) pow(a,b)
126
127#endif // !USE_OPENCL
128
129// Use SAS_DOUBLE to force the use of double even for float kernels
130#define SAS_DOUBLE dou ## ble
131
132#if defined(NEED_EXPM1)
133   // TODO: precision is a half digit lower than numpy on mac in [1e-7, 0.5]
134   // Run "explore/precision.py sas_expm1" to see this (may have to fiddle
135   // the xrange for log to see the complete range).
136   static SAS_DOUBLE expm1(SAS_DOUBLE x_in) {
137      double x = (double)x_in;  // go back to float for single precision kernels
138      // Adapted from the cephes math library.
139      // Copyright 1984 - 1992 by Stephen L. Moshier
140      if (x != x || x == 0.0) {
141         return x; // NaN and +/- 0
142      } else if (x < -0.5 || x > 0.5) {
143         return exp(x) - 1.0;
144      } else {
145         const double xsq = x*x;
146         const double p = (((
147            +1.2617719307481059087798E-4)*xsq
148            +3.0299440770744196129956E-2)*xsq
149            +9.9999999999999999991025E-1);
150         const double q = ((((
151            +3.0019850513866445504159E-6)*xsq
152            +2.5244834034968410419224E-3)*xsq
153            +2.2726554820815502876593E-1)*xsq
154            +2.0000000000000000000897E0);
155         double r = x * p;
156         r =  r / (q - r);
157         return r+r;
158       }
159   }
160#endif
161
162// Standard mathematical constants:
163//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
164//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
165// OpenCL defines M_constant_F for float constants, and nothing if double
166// is not enabled on the card, which is why these constants may be missing
167#ifndef M_PI
168#  define M_PI 3.141592653589793
169#endif
170#ifndef M_PI_2
171#  define M_PI_2 1.570796326794897
172#endif
173#ifndef M_PI_4
174#  define M_PI_4 0.7853981633974483
175#endif
176#ifndef M_E
177#  define M_E 2.718281828459045091
178#endif
179#ifndef M_SQRT1_2
180#  define M_SQRT1_2 0.70710678118654746
181#endif
182
183// Non-standard function library
184// pi/180, used for converting between degrees and radians
185// 4/3 pi for computing sphere volumes
186// square and cube for computing squares and cubes
187#ifndef M_PI_180
188#  define M_PI_180 0.017453292519943295
189#endif
190#ifndef M_4PI_3
191#  define M_4PI_3 4.18879020478639
192#endif
193__device__
194inline double square(double x) { return x*x; }
195__device__
196inline double cube(double x) { return x*x*x; }
197__device__
198inline double sas_sinx_x(double x) { return x==0 ? 1.0 : sin(x)/x; }
199
200// CRUFT: support old style models with orientation received qx, qy and angles
201
202// To rotate from the canonical position to theta, phi, psi, first rotate by
203// psi about the major axis, oriented along z, which is a rotation in the
204// detector plane xy. Next rotate by theta about the y axis, aligning the major
205// axis in the xz plane. Finally, rotate by phi in the detector plane xy.
206// To compute the scattering, undo these rotations in reverse order:
207//     rotate in xy by -phi, rotate in xz by -theta, rotate in xy by -psi
208// The returned q is the length of the q vector and (xhat, yhat, zhat) is a unit
209// vector in the q direction.
210// To change between counterclockwise and clockwise rotation, change the
211// sign of phi and psi.
212
213#if 1
214//think cos(theta) should be sin(theta) in new coords, RKH 11Jan2017
215#define ORIENT_SYMMETRIC(qx, qy, theta, phi, q, sn, cn) do { \
216    SINCOS(phi*M_PI_180, sn, cn); \
217    q = sqrt(qx*qx + qy*qy); \
218    cn  = (q==0. ? 1.0 : (cn*qx + sn*qy)/q * sin(theta*M_PI_180));  \
219    sn = sqrt(1 - cn*cn); \
220    } while (0)
221#else
222// SasView 3.x definition of orientation
223#define ORIENT_SYMMETRIC(qx, qy, theta, phi, q, sn, cn) do { \
224    SINCOS(theta*M_PI_180, sn, cn); \
225    q = sqrt(qx*qx + qy*qy);\
226    cn = (q==0. ? 1.0 : (cn*cos(phi*M_PI_180)*qx + sn*qy)/q); \
227    sn = sqrt(1 - cn*cn); \
228    } while (0)
229#endif
230
231#if 1
232#define ORIENT_ASYMMETRIC(qx, qy, theta, phi, psi, q, xhat, yhat, zhat) do { \
233    q = sqrt(qx*qx + qy*qy); \
234    const double qxhat = qx/q; \
235    const double qyhat = qy/q; \
236    double sin_theta, cos_theta; \
237    double sin_phi, cos_phi; \
238    double sin_psi, cos_psi; \
239    SINCOS(theta*M_PI_180, sin_theta, cos_theta); \
240    SINCOS(phi*M_PI_180, sin_phi, cos_phi); \
241    SINCOS(psi*M_PI_180, sin_psi, cos_psi); \
242    xhat = qxhat*(-sin_phi*sin_psi + cos_theta*cos_phi*cos_psi) \
243         + qyhat*( cos_phi*sin_psi + cos_theta*sin_phi*cos_psi); \
244    yhat = qxhat*(-sin_phi*cos_psi - cos_theta*cos_phi*sin_psi) \
245         + qyhat*( cos_phi*cos_psi - cos_theta*sin_phi*sin_psi); \
246    zhat = qxhat*(-sin_theta*cos_phi) \
247         + qyhat*(-sin_theta*sin_phi); \
248    } while (0)
249#else
250// SasView 3.x definition of orientation
251#define ORIENT_ASYMMETRIC(qx, qy, theta, phi, psi, q, cos_alpha, cos_mu, cos_nu) do { \
252    q = sqrt(qx*qx + qy*qy); \
253    const double qxhat = qx/q; \
254    const double qyhat = qy/q; \
255    double sin_theta, cos_theta; \
256    double sin_phi, cos_phi; \
257    double sin_psi, cos_psi; \
258    SINCOS(theta*M_PI_180, sin_theta, cos_theta); \
259    SINCOS(phi*M_PI_180, sin_phi, cos_phi); \
260    SINCOS(psi*M_PI_180, sin_psi, cos_psi); \
261    cos_alpha = cos_theta*cos_phi*qxhat + sin_theta*qyhat; \
262    cos_mu = (-sin_theta*cos_psi*cos_phi - sin_psi*sin_phi)*qxhat + cos_theta*cos_psi*qyhat; \
263    cos_nu = (-cos_phi*sin_psi*sin_theta + sin_phi*cos_psi)*qxhat + sin_psi*cos_theta*qyhat; \
264    } while (0)
265#endif
Note: See TracBrowser for help on using the repository browser.