source: sasmodels/sasmodels/kernel_template.c @ caf768d

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since caf768d was caf768d, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

MSVC is missing isnan()

  • Property mode set to 100644
File size: 7.6 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14     #include <cstdio>
15     #include <cmath>
16     using namespace std;
17     #if defined(_MSC_VER)
18         #include <float.h>
19         #define kernel extern "C" __declspec( dllexport )
20         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
21             inline double fmin(double x, double y) { return x>y ? y : x; }
22             inline double fmax(double x, double y) { return x<y ? y : x; }
23             inline double isnan(double x) { return _isnan(x); }
24     #else
25         #define kernel extern "C"
26     #endif
27     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
28#  else
29     #include <stdio.h>
30     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
31     // MSVC doesn't support C99, so no need for dllexport on C99 branch
32     #define kernel
33     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
34#  endif
35#  define global
36#  define local
37#  define constant const
38// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
39// OpenCL pown(a,b) = C99 pow(a,b), b integer
40#  define powr(a,b) pow(a,b)
41#  define pown(a,b) pow(a,b)
42#else
43#  ifdef USE_SINCOS
44#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
45#  else
46#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
47#  endif
48#endif
49
50// Standard mathematical constants:
51//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
52//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
53// OpenCL defines M_constant_F for float constants, and nothing if double
54// is not enabled on the card, which is why these constants may be missing
55#ifndef M_PI
56#  define M_PI 3.141592653589793
57#endif
58#ifndef M_PI_2
59#  define M_PI_2 1.570796326794897
60#endif
61#ifndef M_PI_4
62#  define M_PI_4 0.7853981633974483
63#endif
64
65// Non-standard pi/180, used for converting between degrees and radians
66#ifndef M_PI_180
67#  define M_PI_180 0.017453292519943295
68#endif
69
70
71%(DEFINES)s
72
73%(SOURCES)s
74
75/*
76    ##########################################################
77    #                                                        #
78    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
79    #   !!                                              !!   #
80    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
81    #   !!                                              !!   #
82    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
83    #                                                        #
84    ##########################################################
85*/
86
87#ifdef IQ_KERNEL_NAME
88kernel void IQ_KERNEL_NAME(
89    global const double *q,
90    global double *result,
91    const int Nq,
92#ifdef IQ_OPEN_LOOPS
93  #ifdef USE_OPENCL
94    global double *loops_g,
95  #endif
96    local double *loops,
97    const double cutoff,
98    IQ_DISPERSION_LENGTH_DECLARATIONS,
99#endif
100    IQ_FIXED_PARAMETER_DECLARATIONS
101    )
102{
103#ifdef USE_OPENCL
104  #ifdef IQ_OPEN_LOOPS
105  // copy loops info to local memory
106  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
107  wait_group_events(1, &e);
108  #endif
109
110  int i = get_global_id(0);
111  if (i < Nq)
112#else
113  #pragma omp parallel for
114  for (int i=0; i < Nq; i++)
115#endif
116  {
117    const double qi = q[i];
118#ifdef IQ_OPEN_LOOPS
119    double ret=0.0, norm=0.0;
120  #ifdef VOLUME_PARAMETERS
121    double vol=0.0, norm_vol=0.0;
122  #endif
123    IQ_OPEN_LOOPS
124    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
125    //  const double radius = loops[2*(radius_i)];
126    //  const double radius_w = loops[2*(radius_i)+1];
127
128    const double weight = IQ_WEIGHT_PRODUCT;
129    if (weight > cutoff) {
130      const double scattering = Iq(qi, IQ_PARAMETERS);
131      // allow kernels to exclude invalid regions by returning NaN
132      if (!isnan(scattering)) {
133        ret += weight*scattering;
134        norm += weight;
135      #ifdef VOLUME_PARAMETERS
136        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
137        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
138        norm_vol += vol_weight;
139      #endif
140      }
141    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
142    }
143    IQ_CLOSE_LOOPS
144  #ifdef VOLUME_PARAMETERS
145    if (vol*norm_vol != 0.0) {
146      ret *= norm_vol/vol;
147    }
148  #endif
149    result[i] = scale*ret/norm+background;
150#else
151    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
152#endif
153  }
154}
155#endif
156
157
158#ifdef IQXY_KERNEL_NAME
159kernel void IQXY_KERNEL_NAME(
160    global const double *qx,
161    global const double *qy,
162    global double *result,
163    const int Nq,
164#ifdef IQXY_OPEN_LOOPS
165  #ifdef USE_OPENCL
166    global double *loops_g,
167  #endif
168    local double *loops,
169    const double cutoff,
170    IQXY_DISPERSION_LENGTH_DECLARATIONS,
171#endif
172    IQXY_FIXED_PARAMETER_DECLARATIONS
173    )
174{
175#ifdef USE_OPENCL
176  #ifdef IQXY_OPEN_LOOPS
177  // copy loops info to local memory
178  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
179  wait_group_events(1, &e);
180  #endif
181
182  int i = get_global_id(0);
183  if (i < Nq)
184#else
185  #pragma omp parallel for
186  for (int i=0; i < Nq; i++)
187#endif
188  {
189    const double qxi = qx[i];
190    const double qyi = qy[i];
191    #if USE_KAHAN_SUMMATION
192    double accumulated_error = 0.0;
193    #endif
194#ifdef IQXY_OPEN_LOOPS
195    double ret=0.0, norm=0.0;
196    #ifdef VOLUME_PARAMETERS
197    double vol=0.0, norm_vol=0.0;
198    #endif
199    IQXY_OPEN_LOOPS
200    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
201    //  const double radius = loops[2*(radius_i)];
202    //  const double radius_w = loops[2*(radius_i)+1];
203
204    const double weight = IQXY_WEIGHT_PRODUCT;
205    if (weight > cutoff) {
206
207      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
208      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
209      //if (scattering >= 0.0) { // scattering cannot be negative
210        // TODO: use correct angle for spherical correction
211        // Definition of theta and phi are probably reversed relative to the
212        // equation which gave rise to this correction, leading to an
213        // attenuation of the pattern as theta moves through pi/2.  Either
214        // reverse the meanings of phi and theta in the forms, or use phi
215        // rather than theta in this correction.  Current code uses cos(theta)
216        // so that values match those of sasview.
217      #if defined(IQXY_HAS_THETA) // && 0
218        const double spherical_correction
219          = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0);
220        const double next = spherical_correction * weight * scattering;
221      #else
222        const double next = weight * scattering;
223      #endif
224      #if USE_KAHAN_SUMMATION
225        const double y = next - accumulated_error;
226        const double t = ret + y;
227        accumulated_error = (t - ret) - y;
228        ret = t;
229      #else
230        ret += next;
231      #endif
232        norm += weight;
233      #ifdef VOLUME_PARAMETERS
234        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
235        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
236      #endif
237        norm_vol += vol_weight;
238      }
239      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
240    }
241    IQXY_CLOSE_LOOPS
242  #ifdef VOLUME_PARAMETERS
243    if (vol*norm_vol != 0.0) {
244      ret *= norm_vol/vol;
245    }
246  #endif
247    result[i] = scale*ret/norm+background;
248#else
249    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
250#endif
251  }
252}
253#endif
Note: See TracBrowser for help on using the repository browser.