source: sasmodels/sasmodels/kernel_template.c @ 98cb4d7

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 98cb4d7 was 98cb4d7, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

only define tgamma when compiling for MSVC

  • Property mode set to 100644
File size: 8.4 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14      #include <cstdio>
15      #include <cmath>
16      using namespace std;
17      #if defined(_MSC_VER)
18         #include <limits>
19         #include <float.h>
20         #define kernel extern "C" __declspec( dllexport )
21         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
22         inline double fmin(double x, double y) { return x>y ? y : x; }
23         inline double fmax(double x, double y) { return x<y ? y : x; }
24         inline double isnan(double x) { return _isnan(x); }
25         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
26         #define NEED_EXPM1
27         #define NEED_TGAMMA
28     #else
29         #define kernel extern "C"
30     #endif
31     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
32#  else
33     #include <stdio.h>
34     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
35     // MSVC doesn't support C99, so no need for dllexport on C99 branch
36     #define kernel
37     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
38#  endif
39#  define global
40#  define local
41#  define constant const
42// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
43// OpenCL pown(a,b) = C99 pow(a,b), b integer
44#  define powr(a,b) pow(a,b)
45#  define pown(a,b) pow(a,b)
46#else
47#  if defined(USE_SINCOS)
48#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
49#  else
50#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
51#  endif
52#endif
53
54#if defined(NEED_EXPM1)
55   static double expm1(double x) {
56      // Adapted from the cephes math library.
57      // Copyright 1984 - 1992 by Stephen L. Moshier
58      if (x != x || x == 0.0) {
59         return x; // NaN and +/- 0
60      } else if (x < -0.5 || x > 0.5) {
61         return exp(x) - 1.0;
62      } else {
63         const double xsq = x*x;
64         const double p = (((
65            +1.2617719307481059087798E-4)*xsq
66            +3.0299440770744196129956E-2)*xsq
67            +9.9999999999999999991025E-1);
68         const double q = ((((
69            +3.0019850513866445504159E-6)*xsq
70            +2.5244834034968410419224E-3)*xsq
71            +2.2726554820815502876593E-1)*xsq
72            +2.0000000000000000000897E0);
73         double r = x * p;
74         r =  r / (q - r);
75         return r+r;
76       }
77   }
78#endif
79
80// Standard mathematical constants:
81//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
82//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
83// OpenCL defines M_constant_F for float constants, and nothing if double
84// is not enabled on the card, which is why these constants may be missing
85#ifndef M_PI
86#  define M_PI 3.141592653589793
87#endif
88#ifndef M_PI_2
89#  define M_PI_2 1.570796326794897
90#endif
91#ifndef M_PI_4
92#  define M_PI_4 0.7853981633974483
93#endif
94#ifndef M_E
95#  define M_E 2.718281828459045091
96#endif
97
98// Non-standard function library
99// pi/180, used for converting between degrees and radians
100// 4/3 pi for computing sphere volumes
101// square and cube for computing squares and cubes
102#ifndef M_PI_180
103#  define M_PI_180 0.017453292519943295
104#endif
105#ifndef M_4PI_3
106#  define M_4PI_3 4.18879020478639
107#endif
108//inline double square(double x) { return pow(x,2.0); }
109//inline double square(double x) { return pown(x,2); }
110inline double square(double x) { return x*x; }
111inline double cube(double x) { return x*x*x; }
112inline double sinc(double x) { return x==0 ? 1.0 : sin(x)/x; }
113
114
115%(DEFINES)s
116
117%(SOURCES)s
118
119/*
120    ##########################################################
121    #                                                        #
122    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
123    #   !!                                              !!   #
124    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
125    #   !!                                              !!   #
126    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
127    #                                                        #
128    ##########################################################
129*/
130
131#ifdef IQ_KERNEL_NAME
132kernel void IQ_KERNEL_NAME(
133    global const double *q,
134    global double *result,
135    const int Nq,
136#ifdef IQ_OPEN_LOOPS
137  #ifdef USE_OPENCL
138    global double *loops_g,
139  #endif
140    local double *loops,
141    const double cutoff,
142    IQ_DISPERSION_LENGTH_DECLARATIONS,
143#endif
144    IQ_FIXED_PARAMETER_DECLARATIONS
145    )
146{
147#ifdef USE_OPENCL
148  #ifdef IQ_OPEN_LOOPS
149  // copy loops info to local memory
150  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
151  wait_group_events(1, &e);
152  #endif
153
154  int i = get_global_id(0);
155  if (i < Nq)
156#else
157  #pragma omp parallel for
158  for (int i=0; i < Nq; i++)
159#endif
160  {
161    const double qi = q[i];
162#ifdef IQ_OPEN_LOOPS
163    double ret=0.0, norm=0.0;
164    IQ_OPEN_LOOPS
165    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
166    //  const double radius = loops[2*(radius_i)];
167    //  const double radius_w = loops[2*(radius_i)+1];
168
169    const double weight = IQ_WEIGHT_PRODUCT;
170    if (weight > cutoff) {
171      const double scattering = Iq(qi, IQ_PARAMETERS);
172      // allow kernels to exclude invalid regions by returning NaN
173      if (!isnan(scattering)) {
174        ret += weight*scattering;
175      #ifdef VOLUME_PARAMETERS
176        norm += weight * form_volume(VOLUME_PARAMETERS);
177      #else
178        norm += weight;
179      #endif
180      }
181    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
182    }
183    IQ_CLOSE_LOOPS
184    // norm can only be zero if volume is zero, so no scattering
185    result[i] = (norm > 0. ? scale*ret/norm + background : background);
186#else
187    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
188#endif
189  }
190}
191#endif
192
193
194#ifdef IQXY_KERNEL_NAME
195kernel void IQXY_KERNEL_NAME(
196    global const double *qx,
197    global const double *qy,
198    global double *result,
199    const int Nq,
200#ifdef IQXY_OPEN_LOOPS
201  #ifdef USE_OPENCL
202    global double *loops_g,
203  #endif
204    local double *loops,
205    const double cutoff,
206    IQXY_DISPERSION_LENGTH_DECLARATIONS,
207#endif
208    IQXY_FIXED_PARAMETER_DECLARATIONS
209    )
210{
211#ifdef USE_OPENCL
212  #ifdef IQXY_OPEN_LOOPS
213  // copy loops info to local memory
214  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
215  wait_group_events(1, &e);
216  #endif
217
218  int i = get_global_id(0);
219  if (i < Nq)
220#else
221  #pragma omp parallel for
222  for (int i=0; i < Nq; i++)
223#endif
224  {
225    const double qxi = qx[i];
226    const double qyi = qy[i];
227    #if USE_KAHAN_SUMMATION
228    double accumulated_error = 0.0;
229    #endif
230#ifdef IQXY_OPEN_LOOPS
231    double ret=0.0, norm=0.0;
232    IQXY_OPEN_LOOPS
233    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
234    //  const double radius = loops[2*(radius_i)];
235    //  const double radius_w = loops[2*(radius_i)+1];
236    double weight = IQXY_WEIGHT_PRODUCT;
237    if (weight > cutoff) {
238
239      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
240      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
241      #if defined(IQXY_HAS_THETA)
242        // Force a nominal value for the spherical correction even when
243        // theta is +90/-90 so that there are no divide by zero problems.
244        // For cos(theta) fixed at 90, we effectively multiply top and bottom
245        // by 1e-6, so the effect cancels.
246        const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6);
247        weight *= spherical_correction;
248      #endif
249      const double next = weight * scattering;
250      #if USE_KAHAN_SUMMATION
251        const double y = next - accumulated_error;
252        const double t = ret + y;
253        accumulated_error = (t - ret) - y;
254        ret = t;
255      #else
256        ret += next;
257      #endif
258      #ifdef VOLUME_PARAMETERS
259        norm += weight*form_volume(VOLUME_PARAMETERS);
260      #else
261        norm += weight;
262      #endif
263      }
264      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
265    }
266    IQXY_CLOSE_LOOPS
267    // norm can only be zero if volume is zero, so no scattering
268    result[i] = (norm>0. ? scale*ret/norm + background : background);
269#else
270    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
271#endif
272  }
273}
274#endif
Note: See TracBrowser for help on using the repository browser.