source: sasmodels/sasmodels/kernel_template.c @ 960cd80

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 960cd80 was 960cd80, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

code reformatting

  • Property mode set to 100644
File size: 8.6 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14      #include <cstdio>
15      #include <cmath>
16      using namespace std;
17      #if defined(_MSC_VER)
18         #include <limits>
19         #include <float.h>
20         #define kernel extern "C" __declspec( dllexport )
21         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
22         inline double fmin(double x, double y) { return x>y ? y : x; }
23         inline double fmax(double x, double y) { return x<y ? y : x; }
24         inline double isnan(double x) { return _isnan(x); }
25         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
26         static double cephes_expm1(double x) {
27            // Adapted from the cephes math library.
28            // Copyright 1984 - 1992 by Stephen L. Moshier
29            if (x != x || x == 0.0) {
30               return x; // NaN and +/- 0
31            } else if (x < -0.5 || x > 0.5) {
32               return exp(x) - 1.0;
33            } else {
34               const double xsq = x*x;
35               const double p = (((
36                  +1.2617719307481059087798E-4)*xsq
37                  +3.0299440770744196129956E-2)*xsq
38                  +9.9999999999999999991025E-1);
39               const double q = ((((
40                  +3.0019850513866445504159E-6)*xsq
41                  +2.5244834034968410419224E-3)*xsq
42                  +2.2726554820815502876593E-1)*xsq
43                  +2.0000000000000000000897E0);
44               double r = x * p;
45               r =  r / (q - r);
46               return r+r;
47             }
48         }
49         #define expm1 cephes_expm1
50     #else
51         #define kernel extern "C"
52     #endif
53     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
54#  else
55     #include <stdio.h>
56     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
57     // MSVC doesn't support C99, so no need for dllexport on C99 branch
58     #define kernel
59     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
60#  endif
61#  define global
62#  define local
63#  define constant const
64// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
65// OpenCL pown(a,b) = C99 pow(a,b), b integer
66#  define powr(a,b) pow(a,b)
67#  define pown(a,b) pow(a,b)
68#else
69#  ifdef USE_SINCOS
70#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
71#  else
72#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
73#  endif
74#endif
75
76// Standard mathematical constants:
77//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
78//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
79// OpenCL defines M_constant_F for float constants, and nothing if double
80// is not enabled on the card, which is why these constants may be missing
81#ifndef M_PI
82#  define M_PI 3.141592653589793
83#endif
84#ifndef M_PI_2
85#  define M_PI_2 1.570796326794897
86#endif
87#ifndef M_PI_4
88#  define M_PI_4 0.7853981633974483
89#endif
90
91// Non-standard pi/180, used for converting between degrees and radians
92#ifndef M_PI_180
93#  define M_PI_180 0.017453292519943295
94#endif
95
96
97%(DEFINES)s
98
99%(SOURCES)s
100
101/*
102    ##########################################################
103    #                                                        #
104    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
105    #   !!                                              !!   #
106    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
107    #   !!                                              !!   #
108    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
109    #                                                        #
110    ##########################################################
111*/
112
113#ifdef IQ_KERNEL_NAME
114kernel void IQ_KERNEL_NAME(
115    global const double *q,
116    global double *result,
117    const int Nq,
118#ifdef IQ_OPEN_LOOPS
119  #ifdef USE_OPENCL
120    global double *loops_g,
121  #endif
122    local double *loops,
123    const double cutoff,
124    IQ_DISPERSION_LENGTH_DECLARATIONS,
125#endif
126    IQ_FIXED_PARAMETER_DECLARATIONS
127    )
128{
129#ifdef USE_OPENCL
130  #ifdef IQ_OPEN_LOOPS
131  // copy loops info to local memory
132  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
133  wait_group_events(1, &e);
134  #endif
135
136  int i = get_global_id(0);
137  if (i < Nq)
138#else
139  #pragma omp parallel for
140  for (int i=0; i < Nq; i++)
141#endif
142  {
143    const double qi = q[i];
144#ifdef IQ_OPEN_LOOPS
145    double ret=0.0, norm=0.0;
146  #ifdef VOLUME_PARAMETERS
147    double vol=0.0, norm_vol=0.0;
148  #endif
149    IQ_OPEN_LOOPS
150    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
151    //  const double radius = loops[2*(radius_i)];
152    //  const double radius_w = loops[2*(radius_i)+1];
153
154    const double weight = IQ_WEIGHT_PRODUCT;
155    if (weight > cutoff) {
156      const double scattering = Iq(qi, IQ_PARAMETERS);
157      // allow kernels to exclude invalid regions by returning NaN
158      if (!isnan(scattering)) {
159        ret += weight*scattering;
160        norm += weight;
161      #ifdef VOLUME_PARAMETERS
162        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
163        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
164        norm_vol += vol_weight;
165      #endif
166      }
167    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
168    }
169    IQ_CLOSE_LOOPS
170  #ifdef VOLUME_PARAMETERS
171    if (vol*norm_vol != 0.0) {
172      ret *= norm_vol/vol;
173    }
174  #endif
175    result[i] = scale*ret/norm+background;
176#else
177    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
178#endif
179  }
180}
181#endif
182
183
184#ifdef IQXY_KERNEL_NAME
185kernel void IQXY_KERNEL_NAME(
186    global const double *qx,
187    global const double *qy,
188    global double *result,
189    const int Nq,
190#ifdef IQXY_OPEN_LOOPS
191  #ifdef USE_OPENCL
192    global double *loops_g,
193  #endif
194    local double *loops,
195    const double cutoff,
196    IQXY_DISPERSION_LENGTH_DECLARATIONS,
197#endif
198    IQXY_FIXED_PARAMETER_DECLARATIONS
199    )
200{
201#ifdef USE_OPENCL
202  #ifdef IQXY_OPEN_LOOPS
203  // copy loops info to local memory
204  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
205  wait_group_events(1, &e);
206  #endif
207
208  int i = get_global_id(0);
209  if (i < Nq)
210#else
211  #pragma omp parallel for
212  for (int i=0; i < Nq; i++)
213#endif
214  {
215    const double qxi = qx[i];
216    const double qyi = qy[i];
217    #if USE_KAHAN_SUMMATION
218    double accumulated_error = 0.0;
219    #endif
220#ifdef IQXY_OPEN_LOOPS
221    double ret=0.0, norm=0.0;
222    #ifdef VOLUME_PARAMETERS
223    double vol=0.0, norm_vol=0.0;
224    #endif
225    IQXY_OPEN_LOOPS
226    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
227    //  const double radius = loops[2*(radius_i)];
228    //  const double radius_w = loops[2*(radius_i)+1];
229
230    const double weight = IQXY_WEIGHT_PRODUCT;
231    if (weight > cutoff) {
232
233      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
234      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
235      //if (scattering >= 0.0) { // scattering cannot be negative
236        // TODO: use correct angle for spherical correction
237        // Definition of theta and phi are probably reversed relative to the
238        // equation which gave rise to this correction, leading to an
239        // attenuation of the pattern as theta moves through pi/2.  Either
240        // reverse the meanings of phi and theta in the forms, or use phi
241        // rather than theta in this correction.  Current code uses cos(theta)
242        // so that values match those of sasview.
243      #if defined(IQXY_HAS_THETA) // && 0
244        const double spherical_correction
245          = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0);
246        const double next = spherical_correction * weight * scattering;
247      #else
248        const double next = weight * scattering;
249      #endif
250      #if USE_KAHAN_SUMMATION
251        const double y = next - accumulated_error;
252        const double t = ret + y;
253        accumulated_error = (t - ret) - y;
254        ret = t;
255      #else
256        ret += next;
257      #endif
258        norm += weight;
259      #ifdef VOLUME_PARAMETERS
260        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
261        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
262        norm_vol += vol_weight;
263      #endif
264      }
265      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
266    }
267    IQXY_CLOSE_LOOPS
268  #ifdef VOLUME_PARAMETERS
269    if (vol*norm_vol != 0.0) {
270      ret *= norm_vol/vol;
271    }
272  #endif
273    result[i] = scale*ret/norm+background;
274#else
275    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
276#endif
277  }
278}
279#endif
Note: See TracBrowser for help on using the repository browser.