source: sasmodels/sasmodels/kernel_template.c @ 3832f27

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 3832f27 was 3832f27, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

add line directives to generated source so compiler errors are reported correctly

  • Property mode set to 100644
File size: 9.0 KB
Line 
1#line 1 "kernel_template.c"
2// GENERATED CODE --- DO NOT EDIT ---
3// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
4
5#ifdef __OPENCL_VERSION__
6# define USE_OPENCL
7#endif
8
9#define USE_KAHAN_SUMMATION 0
10
11// If opencl is not available, then we are compiling a C function
12// Note: if using a C++ compiler, then define kernel as extern "C"
13#ifndef USE_OPENCL
14#  ifdef __cplusplus
15      #include <cstdio>
16      #include <cmath>
17      using namespace std;
18      #if defined(_MSC_VER)
19         #include <limits>
20         #include <float.h>
21         #define kernel extern "C" __declspec( dllexport )
22         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
23         inline double fmin(double x, double y) { return x>y ? y : x; }
24         inline double fmax(double x, double y) { return x<y ? y : x; }
25         #define isnan(x) _isnan(x)
26         #define isinf(x) (!_finite(x))
27         #define isfinite(x) _finite(x)
28         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
29         #define INFINITY (std::numeric_limits<double>::infinity())
30         #define NEED_EXPM1
31         #define NEED_TGAMMA
32     #else
33         #define kernel extern "C"
34     #endif
35     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
36#  else
37     #include <stdio.h>
38     #if defined(__TINYC__)
39         #include <math.h>
40         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
41         inline double fmin(double x, double y) { return x>y ? y : x; }
42         inline double fmax(double x, double y) { return x<y ? y : x; }
43         // TODO: test isnan
44         inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away!
45         #undef isnan
46         #define isnan(x) _isnan(x)
47         #define NEED_EXPM1
48         #define NEED_TGAMMA
49     #else
50         #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
51     #endif
52     // MSVC doesn't support C99, so no need for dllexport on C99 branch
53     #define kernel
54     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
55#  endif
56#  define global
57#  define local
58#  define constant const
59// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
60// OpenCL pown(a,b) = C99 pow(a,b), b integer
61#  define powr(a,b) pow(a,b)
62#  define pown(a,b) pow(a,b)
63#else
64#  if defined(USE_SINCOS)
65#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
66#  else
67#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
68#  endif
69#endif
70
71#if defined(NEED_EXPM1)
72   static double expm1(double x) {
73      // Adapted from the cephes math library.
74      // Copyright 1984 - 1992 by Stephen L. Moshier
75      if (x != x || x == 0.0) {
76         return x; // NaN and +/- 0
77      } else if (x < -0.5 || x > 0.5) {
78         return exp(x) - 1.0;
79      } else {
80         const double xsq = x*x;
81         const double p = (((
82            +1.2617719307481059087798E-4)*xsq
83            +3.0299440770744196129956E-2)*xsq
84            +9.9999999999999999991025E-1);
85         const double q = ((((
86            +3.0019850513866445504159E-6)*xsq
87            +2.5244834034968410419224E-3)*xsq
88            +2.2726554820815502876593E-1)*xsq
89            +2.0000000000000000000897E0);
90         double r = x * p;
91         r =  r / (q - r);
92         return r+r;
93       }
94   }
95#endif
96
97// Standard mathematical constants:
98//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
99//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
100// OpenCL defines M_constant_F for float constants, and nothing if double
101// is not enabled on the card, which is why these constants may be missing
102#ifndef M_PI
103#  define M_PI 3.141592653589793
104#endif
105#ifndef M_PI_2
106#  define M_PI_2 1.570796326794897
107#endif
108#ifndef M_PI_4
109#  define M_PI_4 0.7853981633974483
110#endif
111#ifndef M_E
112#  define M_E 2.718281828459045091
113#endif
114
115// Non-standard function library
116// pi/180, used for converting between degrees and radians
117// 4/3 pi for computing sphere volumes
118// square and cube for computing squares and cubes
119#ifndef M_PI_180
120#  define M_PI_180 0.017453292519943295
121#endif
122#ifndef M_4PI_3
123#  define M_4PI_3 4.18879020478639
124#endif
125//inline double square(double x) { return pow(x,2.0); }
126//inline double square(double x) { return pown(x,2); }
127inline double square(double x) { return x*x; }
128inline double cube(double x) { return x*x*x; }
129inline double sinc(double x) { return x==0 ? 1.0 : sin(x)/x; }
130
131
132%(DEFINES)s
133
134%(SOURCES)s
135
136/*
137    ##########################################################
138    #                                                        #
139    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
140    #   !!                                              !!   #
141    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
142    #   !!                                              !!   #
143    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
144    #                                                        #
145    ##########################################################
146*/
147
148#ifdef IQ_KERNEL_NAME
149kernel void IQ_KERNEL_NAME(
150    global const double *q,
151    global double *result,
152    const int Nq,
153#ifdef IQ_OPEN_LOOPS
154  #ifdef USE_OPENCL
155    global double *loops_g,
156  #endif
157    local double *loops,
158    const double cutoff,
159    IQ_DISPERSION_LENGTH_DECLARATIONS,
160#endif
161    IQ_FIXED_PARAMETER_DECLARATIONS
162    )
163{
164#ifdef USE_OPENCL
165  #ifdef IQ_OPEN_LOOPS
166  // copy loops info to local memory
167  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
168  wait_group_events(1, &e);
169  #endif
170
171  int i = get_global_id(0);
172  if (i < Nq)
173#else
174  #pragma omp parallel for
175  for (int i=0; i < Nq; i++)
176#endif
177  {
178    const double qi = q[i];
179#ifdef IQ_OPEN_LOOPS
180    double ret=0.0, norm=0.0;
181    IQ_OPEN_LOOPS
182    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
183    //  const double radius = loops[2*(radius_i)];
184    //  const double radius_w = loops[2*(radius_i)+1];
185
186    const double weight = IQ_WEIGHT_PRODUCT;
187    if (weight > cutoff) {
188      const double scattering = Iq(qi, IQ_PARAMETERS);
189      // allow kernels to exclude invalid regions by returning NaN
190      if (!isnan(scattering)) {
191        ret += weight*scattering;
192      #ifdef VOLUME_PARAMETERS
193        norm += weight * form_volume(VOLUME_PARAMETERS);
194      #else
195        norm += weight;
196      #endif
197      }
198    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
199    }
200    IQ_CLOSE_LOOPS
201    // norm can only be zero if volume is zero, so no scattering
202    result[i] = (norm > 0. ? scale*ret/norm + background : background);
203#else
204    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
205#endif
206  }
207}
208#endif
209
210
211#ifdef IQXY_KERNEL_NAME
212kernel void IQXY_KERNEL_NAME(
213    global const double *qx,
214    global const double *qy,
215    global double *result,
216    const int Nq,
217#ifdef IQXY_OPEN_LOOPS
218  #ifdef USE_OPENCL
219    global double *loops_g,
220  #endif
221    local double *loops,
222    const double cutoff,
223    IQXY_DISPERSION_LENGTH_DECLARATIONS,
224#endif
225    IQXY_FIXED_PARAMETER_DECLARATIONS
226    )
227{
228#ifdef USE_OPENCL
229  #ifdef IQXY_OPEN_LOOPS
230  // copy loops info to local memory
231  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
232  wait_group_events(1, &e);
233  #endif
234
235  int i = get_global_id(0);
236  if (i < Nq)
237#else
238  #pragma omp parallel for
239  for (int i=0; i < Nq; i++)
240#endif
241  {
242    const double qxi = qx[i];
243    const double qyi = qy[i];
244    #if USE_KAHAN_SUMMATION
245    double accumulated_error = 0.0;
246    #endif
247#ifdef IQXY_OPEN_LOOPS
248    double ret=0.0, norm=0.0;
249    IQXY_OPEN_LOOPS
250    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
251    //  const double radius = loops[2*(radius_i)];
252    //  const double radius_w = loops[2*(radius_i)+1];
253    double weight = IQXY_WEIGHT_PRODUCT;
254    if (weight > cutoff) {
255
256      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
257      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
258      #if defined(IQXY_HAS_THETA)
259        // Force a nominal value for the spherical correction even when
260        // theta is +90/-90 so that there are no divide by zero problems.
261        // For cos(theta) fixed at 90, we effectively multiply top and bottom
262        // by 1e-6, so the effect cancels.
263        const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6);
264        weight *= spherical_correction;
265      #endif
266      const double next = weight * scattering;
267      #if USE_KAHAN_SUMMATION
268        const double y = next - accumulated_error;
269        const double t = ret + y;
270        accumulated_error = (t - ret) - y;
271        ret = t;
272      #else
273        ret += next;
274      #endif
275      #ifdef VOLUME_PARAMETERS
276        norm += weight*form_volume(VOLUME_PARAMETERS);
277      #else
278        norm += weight;
279      #endif
280      }
281      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
282    }
283    IQXY_CLOSE_LOOPS
284    // norm can only be zero if volume is zero, so no scattering
285    result[i] = (norm>0. ? scale*ret/norm + background : background);
286#else
287    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
288#endif
289  }
290}
291#endif
Note: See TracBrowser for help on using the repository browser.