source: sasmodels/sasmodels/kernel_template.c @ 29f27df

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 29f27df was 29f27df, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

older MSVC is missing expm1()

  • Property mode set to 100644
File size: 8.5 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14     #include <cstdio>
15     #include <cmath>
16     using namespace std;
17     #if defined(_MSC_VER)
18         #include <limits>
19         #include <float.h>
20         #define kernel extern "C" __declspec( dllexport )
21         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
22             inline double fmin(double x, double y) { return x>y ? y : x; }
23             inline double fmax(double x, double y) { return x<y ? y : x; }
24             inline double isnan(double x) { return _isnan(x); }
25             #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
26             static double cephes_expm1(double x) {
27                 if (x != x || x == 0.0) {
28                     return x; // NaN and +/- 0
29                 } else if (x < -0.5 || x > 0.5) {
30                     return exp(x) - 1.0;
31                 } else {
32                     const double xsq = x*x;
33                     const double p = (((
34                          +1.2617719307481059087798E-4)*xsq
35                      +3.0299440770744196129956E-2)*xsq
36                      +9.9999999999999999991025E-1);
37                 const double q = ((((
38                      +3.0019850513866445504159E-6)*xsq
39                      +2.5244834034968410419224E-3)*xsq
40                      +2.2726554820815502876593E-1)*xsq
41                      +2.0000000000000000000897E0);
42                 double r = x * p;
43                     r =  r / (q - r);
44                     return r+r;
45                 }
46             }
47             #define expm1 cephes_expm1
48     #else
49         #define kernel extern "C"
50     #endif
51     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
52#  else
53     #include <stdio.h>
54     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
55     // MSVC doesn't support C99, so no need for dllexport on C99 branch
56     #define kernel
57     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
58#  endif
59#  define global
60#  define local
61#  define constant const
62// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
63// OpenCL pown(a,b) = C99 pow(a,b), b integer
64#  define powr(a,b) pow(a,b)
65#  define pown(a,b) pow(a,b)
66#else
67#  ifdef USE_SINCOS
68#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
69#  else
70#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
71#  endif
72#endif
73
74// Standard mathematical constants:
75//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
76//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
77// OpenCL defines M_constant_F for float constants, and nothing if double
78// is not enabled on the card, which is why these constants may be missing
79#ifndef M_PI
80#  define M_PI 3.141592653589793
81#endif
82#ifndef M_PI_2
83#  define M_PI_2 1.570796326794897
84#endif
85#ifndef M_PI_4
86#  define M_PI_4 0.7853981633974483
87#endif
88
89// Non-standard pi/180, used for converting between degrees and radians
90#ifndef M_PI_180
91#  define M_PI_180 0.017453292519943295
92#endif
93
94
95%(DEFINES)s
96
97%(SOURCES)s
98
99/*
100    ##########################################################
101    #                                                        #
102    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
103    #   !!                                              !!   #
104    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
105    #   !!                                              !!   #
106    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
107    #                                                        #
108    ##########################################################
109*/
110
111#ifdef IQ_KERNEL_NAME
112kernel void IQ_KERNEL_NAME(
113    global const double *q,
114    global double *result,
115    const int Nq,
116#ifdef IQ_OPEN_LOOPS
117  #ifdef USE_OPENCL
118    global double *loops_g,
119  #endif
120    local double *loops,
121    const double cutoff,
122    IQ_DISPERSION_LENGTH_DECLARATIONS,
123#endif
124    IQ_FIXED_PARAMETER_DECLARATIONS
125    )
126{
127#ifdef USE_OPENCL
128  #ifdef IQ_OPEN_LOOPS
129  // copy loops info to local memory
130  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
131  wait_group_events(1, &e);
132  #endif
133
134  int i = get_global_id(0);
135  if (i < Nq)
136#else
137  #pragma omp parallel for
138  for (int i=0; i < Nq; i++)
139#endif
140  {
141    const double qi = q[i];
142#ifdef IQ_OPEN_LOOPS
143    double ret=0.0, norm=0.0;
144  #ifdef VOLUME_PARAMETERS
145    double vol=0.0, norm_vol=0.0;
146  #endif
147    IQ_OPEN_LOOPS
148    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
149    //  const double radius = loops[2*(radius_i)];
150    //  const double radius_w = loops[2*(radius_i)+1];
151
152    const double weight = IQ_WEIGHT_PRODUCT;
153    if (weight > cutoff) {
154      const double scattering = Iq(qi, IQ_PARAMETERS);
155      // allow kernels to exclude invalid regions by returning NaN
156      if (!isnan(scattering)) {
157        ret += weight*scattering;
158        norm += weight;
159      #ifdef VOLUME_PARAMETERS
160        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
161        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
162        norm_vol += vol_weight;
163      #endif
164      }
165    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
166    }
167    IQ_CLOSE_LOOPS
168  #ifdef VOLUME_PARAMETERS
169    if (vol*norm_vol != 0.0) {
170      ret *= norm_vol/vol;
171    }
172  #endif
173    result[i] = scale*ret/norm+background;
174#else
175    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
176#endif
177  }
178}
179#endif
180
181
182#ifdef IQXY_KERNEL_NAME
183kernel void IQXY_KERNEL_NAME(
184    global const double *qx,
185    global const double *qy,
186    global double *result,
187    const int Nq,
188#ifdef IQXY_OPEN_LOOPS
189  #ifdef USE_OPENCL
190    global double *loops_g,
191  #endif
192    local double *loops,
193    const double cutoff,
194    IQXY_DISPERSION_LENGTH_DECLARATIONS,
195#endif
196    IQXY_FIXED_PARAMETER_DECLARATIONS
197    )
198{
199#ifdef USE_OPENCL
200  #ifdef IQXY_OPEN_LOOPS
201  // copy loops info to local memory
202  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
203  wait_group_events(1, &e);
204  #endif
205
206  int i = get_global_id(0);
207  if (i < Nq)
208#else
209  #pragma omp parallel for
210  for (int i=0; i < Nq; i++)
211#endif
212  {
213    const double qxi = qx[i];
214    const double qyi = qy[i];
215    #if USE_KAHAN_SUMMATION
216    double accumulated_error = 0.0;
217    #endif
218#ifdef IQXY_OPEN_LOOPS
219    double ret=0.0, norm=0.0;
220    #ifdef VOLUME_PARAMETERS
221    double vol=0.0, norm_vol=0.0;
222    #endif
223    IQXY_OPEN_LOOPS
224    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
225    //  const double radius = loops[2*(radius_i)];
226    //  const double radius_w = loops[2*(radius_i)+1];
227
228    const double weight = IQXY_WEIGHT_PRODUCT;
229    if (weight > cutoff) {
230
231      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
232      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
233      //if (scattering >= 0.0) { // scattering cannot be negative
234        // TODO: use correct angle for spherical correction
235        // Definition of theta and phi are probably reversed relative to the
236        // equation which gave rise to this correction, leading to an
237        // attenuation of the pattern as theta moves through pi/2.  Either
238        // reverse the meanings of phi and theta in the forms, or use phi
239        // rather than theta in this correction.  Current code uses cos(theta)
240        // so that values match those of sasview.
241      #if defined(IQXY_HAS_THETA) // && 0
242        const double spherical_correction
243          = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0);
244        const double next = spherical_correction * weight * scattering;
245      #else
246        const double next = weight * scattering;
247      #endif
248      #if USE_KAHAN_SUMMATION
249        const double y = next - accumulated_error;
250        const double t = ret + y;
251        accumulated_error = (t - ret) - y;
252        ret = t;
253      #else
254        ret += next;
255      #endif
256        norm += weight;
257      #ifdef VOLUME_PARAMETERS
258        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
259        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
260      #endif
261        norm_vol += vol_weight;
262      }
263      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
264    }
265    IQXY_CLOSE_LOOPS
266  #ifdef VOLUME_PARAMETERS
267    if (vol*norm_vol != 0.0) {
268      ret *= norm_vol/vol;
269    }
270  #endif
271    result[i] = scale*ret/norm+background;
272#else
273    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
274#endif
275  }
276}
277#endif
Note: See TracBrowser for help on using the repository browser.