source: sasmodels/sasmodels/kernel_template.c @ 0278e3f

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 0278e3f was 0278e3f, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

remove opencl compiler warnings; maybe fix build server breakage

  • Property mode set to 100644
File size: 8.5 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14      #include <cstdio>
15      #include <cmath>
16      using namespace std;
17      #if defined(_MSC_VER)
18         #include <limits>
19         #include <float.h>
20         #define kernel extern "C" __declspec( dllexport )
21         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
22         inline double fmin(double x, double y) { return x>y ? y : x; }
23         inline double fmax(double x, double y) { return x<y ? y : x; }
24         inline double isnan(double x) { return _isnan(x); }
25         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
26         static double cephes_expm1(double x) {
27            // Adapted from the cephes math library.
28            // Copyright 1984 - 1992 by Stephen L. Moshier
29            if (x != x || x == 0.0) {
30               return x; // NaN and +/- 0
31            } else if (x < -0.5 || x > 0.5) {
32               return exp(x) - 1.0;
33            } else {
34               const double xsq = x*x;
35               const double p = (((
36                  +1.2617719307481059087798E-4)*xsq
37                  +3.0299440770744196129956E-2)*xsq
38                  +9.9999999999999999991025E-1);
39               const double q = ((((
40                  +3.0019850513866445504159E-6)*xsq
41                  +2.5244834034968410419224E-3)*xsq
42                  +2.2726554820815502876593E-1)*xsq
43                  +2.0000000000000000000897E0);
44               double r = x * p;
45               r =  r / (q - r);
46               return r+r;
47             }
48         }
49         #define expm1 cephes_expm1
50     #else
51         #define kernel extern "C"
52     #endif
53     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
54#  else
55     #include <stdio.h>
56     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
57     // MSVC doesn't support C99, so no need for dllexport on C99 branch
58     #define kernel
59     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
60#  endif
61#  define global
62#  define local
63#  define constant const
64// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
65// OpenCL pown(a,b) = C99 pow(a,b), b integer
66#  define powr(a,b) pow(a,b)
67#  define pown(a,b) pow(a,b)
68#else
69#  if defined(USE_SINCOS)
70#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
71#  else
72#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
73#  endif
74#endif
75
76// Standard mathematical constants:
77//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
78//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
79// OpenCL defines M_constant_F for float constants, and nothing if double
80// is not enabled on the card, which is why these constants may be missing
81#ifndef M_PI
82#  define M_PI 3.141592653589793
83#endif
84#ifndef M_PI_2
85#  define M_PI_2 1.570796326794897
86#endif
87#ifndef M_PI_4
88#  define M_PI_4 0.7853981633974483
89#endif
90#ifndef M_E
91#  define M_E 2.718281828459045091
92#endif
93
94// Non-standard function library
95// pi/180, used for converting between degrees and radians
96// 4/3 pi for computing sphere volumes
97// square and cube for computing squares and cubes
98#ifndef M_PI_180
99#  define M_PI_180 0.017453292519943295
100#endif
101#ifndef M_4PI_3
102#  define M_4PI_3 4.18879020478639
103#endif
104//inline double square(double x) { return pow(x,2.0); }
105//inline double square(double x) { return pown(x,2); }
106inline double square(double x) { return x*x; }
107inline double cube(double x) { return x*x*x; }
108inline double sinc(double x) { return x==0 ? 1.0 : sin(x)/x; }
109
110
111%(DEFINES)s
112
113%(SOURCES)s
114
115/*
116    ##########################################################
117    #                                                        #
118    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
119    #   !!                                              !!   #
120    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
121    #   !!                                              !!   #
122    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
123    #                                                        #
124    ##########################################################
125*/
126
127#ifdef IQ_KERNEL_NAME
128kernel void IQ_KERNEL_NAME(
129    global const double *q,
130    global double *result,
131    const int Nq,
132#ifdef IQ_OPEN_LOOPS
133  #ifdef USE_OPENCL
134    global double *loops_g,
135  #endif
136    local double *loops,
137    const double cutoff,
138    IQ_DISPERSION_LENGTH_DECLARATIONS,
139#endif
140    IQ_FIXED_PARAMETER_DECLARATIONS
141    )
142{
143#ifdef USE_OPENCL
144  #ifdef IQ_OPEN_LOOPS
145  // copy loops info to local memory
146  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
147  wait_group_events(1, &e);
148  #endif
149
150  int i = get_global_id(0);
151  if (i < Nq)
152#else
153  #pragma omp parallel for
154  for (int i=0; i < Nq; i++)
155#endif
156  {
157    const double qi = q[i];
158#ifdef IQ_OPEN_LOOPS
159    double ret=0.0, norm=0.0;
160    IQ_OPEN_LOOPS
161    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
162    //  const double radius = loops[2*(radius_i)];
163    //  const double radius_w = loops[2*(radius_i)+1];
164
165    const double weight = IQ_WEIGHT_PRODUCT;
166    if (weight > cutoff) {
167      const double scattering = Iq(qi, IQ_PARAMETERS);
168      // allow kernels to exclude invalid regions by returning NaN
169      if (!isnan(scattering)) {
170        ret += weight*scattering;
171      #ifdef VOLUME_PARAMETERS
172        norm += weight * form_volume(VOLUME_PARAMETERS);
173      #else
174        norm += weight;
175      #endif
176      }
177    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
178    }
179    IQ_CLOSE_LOOPS
180    // norm can only be zero if volume is zero, so no scattering
181    result[i] = (norm > 0. ? scale*ret/norm + background : background);
182#else
183    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
184#endif
185  }
186}
187#endif
188
189
190#ifdef IQXY_KERNEL_NAME
191kernel void IQXY_KERNEL_NAME(
192    global const double *qx,
193    global const double *qy,
194    global double *result,
195    const int Nq,
196#ifdef IQXY_OPEN_LOOPS
197  #ifdef USE_OPENCL
198    global double *loops_g,
199  #endif
200    local double *loops,
201    const double cutoff,
202    IQXY_DISPERSION_LENGTH_DECLARATIONS,
203#endif
204    IQXY_FIXED_PARAMETER_DECLARATIONS
205    )
206{
207#ifdef USE_OPENCL
208  #ifdef IQXY_OPEN_LOOPS
209  // copy loops info to local memory
210  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
211  wait_group_events(1, &e);
212  #endif
213
214  int i = get_global_id(0);
215  if (i < Nq)
216#else
217  #pragma omp parallel for
218  for (int i=0; i < Nq; i++)
219#endif
220  {
221    const double qxi = qx[i];
222    const double qyi = qy[i];
223    #if USE_KAHAN_SUMMATION
224    double accumulated_error = 0.0;
225    #endif
226#ifdef IQXY_OPEN_LOOPS
227    double ret=0.0, norm=0.0;
228    IQXY_OPEN_LOOPS
229    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
230    //  const double radius = loops[2*(radius_i)];
231    //  const double radius_w = loops[2*(radius_i)+1];
232    double weight = IQXY_WEIGHT_PRODUCT;
233    if (weight > cutoff) {
234
235      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
236      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
237      #if defined(IQXY_HAS_THETA)
238        // Force a nominal value for the spherical correction even when
239        // theta is +90/-90 so that there are no divide by zero problems.
240        // For cos(theta) fixed at 90, we effectively multiply top and bottom
241        // by 1e-6, so the effect cancels.
242        const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6);
243        weight *= spherical_correction;
244      #endif
245      const double next = weight * scattering;
246      #if USE_KAHAN_SUMMATION
247        const double y = next - accumulated_error;
248        const double t = ret + y;
249        accumulated_error = (t - ret) - y;
250        ret = t;
251      #else
252        ret += next;
253      #endif
254      #ifdef VOLUME_PARAMETERS
255        norm += weight*form_volume(VOLUME_PARAMETERS);
256      #else
257        norm += weight;
258      #endif
259      }
260      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
261    }
262    IQXY_CLOSE_LOOPS
263    // norm can only be zero if volume is zero, so no scattering
264    result[i] = (norm>0. ? scale*ret/norm + background : background);
265#else
266    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
267#endif
268  }
269}
270#endif
Note: See TracBrowser for help on using the repository browser.