source: sasmodels/sasmodels/kernel_template.c @ cf85329

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since cf85329 was cf85329, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

add sinc() function to kernel template

  • Property mode set to 100644
File size: 9.1 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14      #include <cstdio>
15      #include <cmath>
16      using namespace std;
17      #if defined(_MSC_VER)
18         #include <limits>
19         #include <float.h>
20         #define kernel extern "C" __declspec( dllexport )
21         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
22         inline double fmin(double x, double y) { return x>y ? y : x; }
23         inline double fmax(double x, double y) { return x<y ? y : x; }
24         inline double isnan(double x) { return _isnan(x); }
25         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
26         static double cephes_expm1(double x) {
27            // Adapted from the cephes math library.
28            // Copyright 1984 - 1992 by Stephen L. Moshier
29            if (x != x || x == 0.0) {
30               return x; // NaN and +/- 0
31            } else if (x < -0.5 || x > 0.5) {
32               return exp(x) - 1.0;
33            } else {
34               const double xsq = x*x;
35               const double p = (((
36                  +1.2617719307481059087798E-4)*xsq
37                  +3.0299440770744196129956E-2)*xsq
38                  +9.9999999999999999991025E-1);
39               const double q = ((((
40                  +3.0019850513866445504159E-6)*xsq
41                  +2.5244834034968410419224E-3)*xsq
42                  +2.2726554820815502876593E-1)*xsq
43                  +2.0000000000000000000897E0);
44               double r = x * p;
45               r =  r / (q - r);
46               return r+r;
47             }
48         }
49         #define expm1 cephes_expm1
50     #else
51         #define kernel extern "C"
52     #endif
53     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
54#  else
55     #include <stdio.h>
56     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
57     // MSVC doesn't support C99, so no need for dllexport on C99 branch
58     #define kernel
59     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
60#  endif
61#  define global
62#  define local
63#  define constant const
64// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
65// OpenCL pown(a,b) = C99 pow(a,b), b integer
66#  define powr(a,b) pow(a,b)
67#  define pown(a,b) pow(a,b)
68#else
69#  if defined(USE_SINCOS)
70#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
71#  else
72#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
73#  endif
74#endif
75
76// Standard mathematical constants:
77//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
78//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
79// OpenCL defines M_constant_F for float constants, and nothing if double
80// is not enabled on the card, which is why these constants may be missing
81#ifndef M_PI
82#  define M_PI 3.141592653589793
83#endif
84#ifndef M_PI_2
85#  define M_PI_2 1.570796326794897
86#endif
87#ifndef M_PI_4
88#  define M_PI_4 0.7853981633974483
89#endif
90#ifndef M_E
91#  define M_E 2.718281828459045091
92#endif
93
94// Non-standard function library
95// pi/180, used for converting between degrees and radians
96// 4/3 pi for computing sphere volumes
97// square and cube for computing squares and cubes
98#ifndef M_PI_180
99#  define M_PI_180 0.017453292519943295
100#endif
101#ifndef M_4PI_3
102#  define M_4PI_3 4.18879020478639
103#endif
104//inline double square(double x) { return pow(x,2.0); }
105//inline double square(double x) { return pown(x,2); }
106inline double square(double x) { return x*x; }
107inline double cube(double x) { return x*x*x; }
108inline double sinc(double x) { return x==0 ? 1.0 : sin(x)/x; }
109
110
111%(DEFINES)s
112
113%(SOURCES)s
114
115/*
116    ##########################################################
117    #                                                        #
118    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
119    #   !!                                              !!   #
120    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
121    #   !!                                              !!   #
122    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
123    #                                                        #
124    ##########################################################
125*/
126
127#ifdef IQ_KERNEL_NAME
128kernel void IQ_KERNEL_NAME(
129    global const double *q,
130    global double *result,
131    const int Nq,
132#ifdef IQ_OPEN_LOOPS
133  #ifdef USE_OPENCL
134    global double *loops_g,
135  #endif
136    local double *loops,
137    const double cutoff,
138    IQ_DISPERSION_LENGTH_DECLARATIONS,
139#endif
140    IQ_FIXED_PARAMETER_DECLARATIONS
141    )
142{
143#ifdef USE_OPENCL
144  #ifdef IQ_OPEN_LOOPS
145  // copy loops info to local memory
146  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
147  wait_group_events(1, &e);
148  #endif
149
150  int i = get_global_id(0);
151  if (i < Nq)
152#else
153  #pragma omp parallel for
154  for (int i=0; i < Nq; i++)
155#endif
156  {
157    const double qi = q[i];
158#ifdef IQ_OPEN_LOOPS
159    double ret=0.0, norm=0.0;
160  #ifdef VOLUME_PARAMETERS
161    double vol=0.0, norm_vol=0.0;
162  #endif
163    IQ_OPEN_LOOPS
164    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
165    //  const double radius = loops[2*(radius_i)];
166    //  const double radius_w = loops[2*(radius_i)+1];
167
168    const double weight = IQ_WEIGHT_PRODUCT;
169    if (weight > cutoff) {
170      const double scattering = Iq(qi, IQ_PARAMETERS);
171      // allow kernels to exclude invalid regions by returning NaN
172      if (!isnan(scattering)) {
173        ret += weight*scattering;
174        norm += weight;
175      #ifdef VOLUME_PARAMETERS
176        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
177        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
178        norm_vol += vol_weight;
179      #endif
180      }
181    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
182    }
183    IQ_CLOSE_LOOPS
184  #ifdef VOLUME_PARAMETERS
185    if (vol*norm_vol != 0.0) {
186      ret *= norm_vol/vol;
187    }
188  #endif
189    result[i] = scale*ret/norm+background;
190#else
191    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
192#endif
193  }
194}
195#endif
196
197
198#ifdef IQXY_KERNEL_NAME
199kernel void IQXY_KERNEL_NAME(
200    global const double *qx,
201    global const double *qy,
202    global double *result,
203    const int Nq,
204#ifdef IQXY_OPEN_LOOPS
205  #ifdef USE_OPENCL
206    global double *loops_g,
207  #endif
208    local double *loops,
209    const double cutoff,
210    IQXY_DISPERSION_LENGTH_DECLARATIONS,
211#endif
212    IQXY_FIXED_PARAMETER_DECLARATIONS
213    )
214{
215#ifdef USE_OPENCL
216  #ifdef IQXY_OPEN_LOOPS
217  // copy loops info to local memory
218  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
219  wait_group_events(1, &e);
220  #endif
221
222  int i = get_global_id(0);
223  if (i < Nq)
224#else
225  #pragma omp parallel for
226  for (int i=0; i < Nq; i++)
227#endif
228  {
229    const double qxi = qx[i];
230    const double qyi = qy[i];
231    #if USE_KAHAN_SUMMATION
232    double accumulated_error = 0.0;
233    #endif
234#ifdef IQXY_OPEN_LOOPS
235    double ret=0.0, norm=0.0;
236    #ifdef VOLUME_PARAMETERS
237    double vol=0.0, norm_vol=0.0;
238    #endif
239    IQXY_OPEN_LOOPS
240    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
241    //  const double radius = loops[2*(radius_i)];
242    //  const double radius_w = loops[2*(radius_i)+1];
243
244    const double weight = IQXY_WEIGHT_PRODUCT;
245    if (weight > cutoff) {
246
247      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
248      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
249      //if (scattering >= 0.0) { // scattering cannot be negative
250        // TODO: use correct angle for spherical correction
251        // Definition of theta and phi are probably reversed relative to the
252        // equation which gave rise to this correction, leading to an
253        // attenuation of the pattern as theta moves through pi/2.  Either
254        // reverse the meanings of phi and theta in the forms, or use phi
255        // rather than theta in this correction.  Current code uses cos(theta)
256        // so that values match those of sasview.
257      #if defined(IQXY_HAS_THETA) // && 0
258        const double spherical_correction
259          = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0);
260        const double next = spherical_correction * weight * scattering;
261      #else
262        const double next = weight * scattering;
263      #endif
264      #if USE_KAHAN_SUMMATION
265        const double y = next - accumulated_error;
266        const double t = ret + y;
267        accumulated_error = (t - ret) - y;
268        ret = t;
269      #else
270        ret += next;
271      #endif
272        norm += weight;
273      #ifdef VOLUME_PARAMETERS
274        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
275        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
276        norm_vol += vol_weight;
277      #endif
278      }
279      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
280    }
281    IQXY_CLOSE_LOOPS
282  #ifdef VOLUME_PARAMETERS
283    if (vol*norm_vol != 0.0) {
284      ret *= norm_vol/vol;
285    }
286  #endif
287    result[i] = scale*ret/norm+background;
288#else
289    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
290#endif
291  }
292}
293#endif
Note: See TracBrowser for help on using the repository browser.