source: sasmodels/sasmodels/kernel_template.c @ 2a55a6f

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 2a55a6f was 2a55a6f, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

add support for tinycc

  • Property mode set to 100644
File size: 9.0 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14      #include <cstdio>
15      #include <cmath>
16      using namespace std;
17      #if defined(_MSC_VER)
18         #include <limits>
19         #include <float.h>
20         #define kernel extern "C" __declspec( dllexport )
21         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
22         inline double fmin(double x, double y) { return x>y ? y : x; }
23         inline double fmax(double x, double y) { return x<y ? y : x; }
24         #define isnan(x) _isnan(x)
25         #define isinf(x) (!_finite(x))
26         #define isfinite(x) _finite(x)
27         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
28         #define INFINITY (std::numeric_limits<double>::infinity())
29         #define NEED_EXPM1
30         #define NEED_TGAMMA
31     #else
32         #define kernel extern "C"
33     #endif
34     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
35#  else
36     #include <stdio.h>
37     #if defined(__TINYC__)
38         #include <math.h>
39         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
40         inline double fmin(double x, double y) { return x>y ? y : x; }
41         inline double fmax(double x, double y) { return x<y ? y : x; }
42         // TODO: test isnan
43         inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away!
44         #undef isnan
45         #define isnan(x) _isnan(x)
46         #define NEED_EXPM1
47         #define NEED_TGAMMA
48     #else
49         #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
50     #endif
51     // MSVC doesn't support C99, so no need for dllexport on C99 branch
52     #define kernel
53     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
54#  endif
55#  define global
56#  define local
57#  define constant const
58// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
59// OpenCL pown(a,b) = C99 pow(a,b), b integer
60#  define powr(a,b) pow(a,b)
61#  define pown(a,b) pow(a,b)
62#else
63#  if defined(USE_SINCOS)
64#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
65#  else
66#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
67#  endif
68#endif
69
70#if defined(NEED_EXPM1)
71   static double expm1(double x) {
72      // Adapted from the cephes math library.
73      // Copyright 1984 - 1992 by Stephen L. Moshier
74      if (x != x || x == 0.0) {
75         return x; // NaN and +/- 0
76      } else if (x < -0.5 || x > 0.5) {
77         return exp(x) - 1.0;
78      } else {
79         const double xsq = x*x;
80         const double p = (((
81            +1.2617719307481059087798E-4)*xsq
82            +3.0299440770744196129956E-2)*xsq
83            +9.9999999999999999991025E-1);
84         const double q = ((((
85            +3.0019850513866445504159E-6)*xsq
86            +2.5244834034968410419224E-3)*xsq
87            +2.2726554820815502876593E-1)*xsq
88            +2.0000000000000000000897E0);
89         double r = x * p;
90         r =  r / (q - r);
91         return r+r;
92       }
93   }
94#endif
95
96// Standard mathematical constants:
97//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
98//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
99// OpenCL defines M_constant_F for float constants, and nothing if double
100// is not enabled on the card, which is why these constants may be missing
101#ifndef M_PI
102#  define M_PI 3.141592653589793
103#endif
104#ifndef M_PI_2
105#  define M_PI_2 1.570796326794897
106#endif
107#ifndef M_PI_4
108#  define M_PI_4 0.7853981633974483
109#endif
110#ifndef M_E
111#  define M_E 2.718281828459045091
112#endif
113
114// Non-standard function library
115// pi/180, used for converting between degrees and radians
116// 4/3 pi for computing sphere volumes
117// square and cube for computing squares and cubes
118#ifndef M_PI_180
119#  define M_PI_180 0.017453292519943295
120#endif
121#ifndef M_4PI_3
122#  define M_4PI_3 4.18879020478639
123#endif
124//inline double square(double x) { return pow(x,2.0); }
125//inline double square(double x) { return pown(x,2); }
126inline double square(double x) { return x*x; }
127inline double cube(double x) { return x*x*x; }
128inline double sinc(double x) { return x==0 ? 1.0 : sin(x)/x; }
129
130
131%(DEFINES)s
132
133%(SOURCES)s
134
135/*
136    ##########################################################
137    #                                                        #
138    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
139    #   !!                                              !!   #
140    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
141    #   !!                                              !!   #
142    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
143    #                                                        #
144    ##########################################################
145*/
146
147#ifdef IQ_KERNEL_NAME
148kernel void IQ_KERNEL_NAME(
149    global const double *q,
150    global double *result,
151    const int Nq,
152#ifdef IQ_OPEN_LOOPS
153  #ifdef USE_OPENCL
154    global double *loops_g,
155  #endif
156    local double *loops,
157    const double cutoff,
158    IQ_DISPERSION_LENGTH_DECLARATIONS,
159#endif
160    IQ_FIXED_PARAMETER_DECLARATIONS
161    )
162{
163#ifdef USE_OPENCL
164  #ifdef IQ_OPEN_LOOPS
165  // copy loops info to local memory
166  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
167  wait_group_events(1, &e);
168  #endif
169
170  int i = get_global_id(0);
171  if (i < Nq)
172#else
173  #pragma omp parallel for
174  for (int i=0; i < Nq; i++)
175#endif
176  {
177    const double qi = q[i];
178#ifdef IQ_OPEN_LOOPS
179    double ret=0.0, norm=0.0;
180    IQ_OPEN_LOOPS
181    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
182    //  const double radius = loops[2*(radius_i)];
183    //  const double radius_w = loops[2*(radius_i)+1];
184
185    const double weight = IQ_WEIGHT_PRODUCT;
186    if (weight > cutoff) {
187      const double scattering = Iq(qi, IQ_PARAMETERS);
188      // allow kernels to exclude invalid regions by returning NaN
189      if (!isnan(scattering)) {
190        ret += weight*scattering;
191      #ifdef VOLUME_PARAMETERS
192        norm += weight * form_volume(VOLUME_PARAMETERS);
193      #else
194        norm += weight;
195      #endif
196      }
197    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
198    }
199    IQ_CLOSE_LOOPS
200    // norm can only be zero if volume is zero, so no scattering
201    result[i] = (norm > 0. ? scale*ret/norm + background : background);
202#else
203    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
204#endif
205  }
206}
207#endif
208
209
210#ifdef IQXY_KERNEL_NAME
211kernel void IQXY_KERNEL_NAME(
212    global const double *qx,
213    global const double *qy,
214    global double *result,
215    const int Nq,
216#ifdef IQXY_OPEN_LOOPS
217  #ifdef USE_OPENCL
218    global double *loops_g,
219  #endif
220    local double *loops,
221    const double cutoff,
222    IQXY_DISPERSION_LENGTH_DECLARATIONS,
223#endif
224    IQXY_FIXED_PARAMETER_DECLARATIONS
225    )
226{
227#ifdef USE_OPENCL
228  #ifdef IQXY_OPEN_LOOPS
229  // copy loops info to local memory
230  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
231  wait_group_events(1, &e);
232  #endif
233
234  int i = get_global_id(0);
235  if (i < Nq)
236#else
237  #pragma omp parallel for
238  for (int i=0; i < Nq; i++)
239#endif
240  {
241    const double qxi = qx[i];
242    const double qyi = qy[i];
243    #if USE_KAHAN_SUMMATION
244    double accumulated_error = 0.0;
245    #endif
246#ifdef IQXY_OPEN_LOOPS
247    double ret=0.0, norm=0.0;
248    IQXY_OPEN_LOOPS
249    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
250    //  const double radius = loops[2*(radius_i)];
251    //  const double radius_w = loops[2*(radius_i)+1];
252    double weight = IQXY_WEIGHT_PRODUCT;
253    if (weight > cutoff) {
254
255      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
256      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
257      #if defined(IQXY_HAS_THETA)
258        // Force a nominal value for the spherical correction even when
259        // theta is +90/-90 so that there are no divide by zero problems.
260        // For cos(theta) fixed at 90, we effectively multiply top and bottom
261        // by 1e-6, so the effect cancels.
262        const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6);
263        weight *= spherical_correction;
264      #endif
265      const double next = weight * scattering;
266      #if USE_KAHAN_SUMMATION
267        const double y = next - accumulated_error;
268        const double t = ret + y;
269        accumulated_error = (t - ret) - y;
270        ret = t;
271      #else
272        ret += next;
273      #endif
274      #ifdef VOLUME_PARAMETERS
275        norm += weight*form_volume(VOLUME_PARAMETERS);
276      #else
277        norm += weight;
278      #endif
279      }
280      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
281    }
282    IQXY_CLOSE_LOOPS
283    // norm can only be zero if volume is zero, so no scattering
284    result[i] = (norm>0. ? scale*ret/norm + background : background);
285#else
286    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
287#endif
288  }
289}
290#endif
Note: See TracBrowser for help on using the repository browser.