source: sasmodels/sasmodels/kernel_template.c @ deac08c

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since deac08c was deac08c, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

add square(), cube() and M_4PI_3

  • Property mode set to 100644
File size: 9.0 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14      #include <cstdio>
15      #include <cmath>
16      using namespace std;
17      #if defined(_MSC_VER)
18         #include <limits>
19         #include <float.h>
20         #define kernel extern "C" __declspec( dllexport )
21         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
22         inline double fmin(double x, double y) { return x>y ? y : x; }
23         inline double fmax(double x, double y) { return x<y ? y : x; }
24         inline double isnan(double x) { return _isnan(x); }
25         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
26         static double cephes_expm1(double x) {
27            // Adapted from the cephes math library.
28            // Copyright 1984 - 1992 by Stephen L. Moshier
29            if (x != x || x == 0.0) {
30               return x; // NaN and +/- 0
31            } else if (x < -0.5 || x > 0.5) {
32               return exp(x) - 1.0;
33            } else {
34               const double xsq = x*x;
35               const double p = (((
36                  +1.2617719307481059087798E-4)*xsq
37                  +3.0299440770744196129956E-2)*xsq
38                  +9.9999999999999999991025E-1);
39               const double q = ((((
40                  +3.0019850513866445504159E-6)*xsq
41                  +2.5244834034968410419224E-3)*xsq
42                  +2.2726554820815502876593E-1)*xsq
43                  +2.0000000000000000000897E0);
44               double r = x * p;
45               r =  r / (q - r);
46               return r+r;
47             }
48         }
49         #define expm1 cephes_expm1
50     #else
51         #define kernel extern "C"
52     #endif
53     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
54#  else
55     #include <stdio.h>
56     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
57     // MSVC doesn't support C99, so no need for dllexport on C99 branch
58     #define kernel
59     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
60#  endif
61#  define global
62#  define local
63#  define constant const
64// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
65// OpenCL pown(a,b) = C99 pow(a,b), b integer
66#  define powr(a,b) pow(a,b)
67#  define pown(a,b) pow(a,b)
68#else
69#  if defined(USE_SINCOS)
70#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
71#  else
72#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
73#  endif
74#endif
75
76// Standard mathematical constants:
77//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
78//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
79// OpenCL defines M_constant_F for float constants, and nothing if double
80// is not enabled on the card, which is why these constants may be missing
81#ifndef M_PI
82#  define M_PI 3.141592653589793
83#endif
84#ifndef M_PI_2
85#  define M_PI_2 1.570796326794897
86#endif
87#ifndef M_PI_4
88#  define M_PI_4 0.7853981633974483
89#endif
90
91// Non-standard function library
92// pi/180, used for converting between degrees and radians
93// 4/3 pi for computing sphere volumes
94// square and cube for computing squares and cubes
95#ifndef M_PI_180
96#  define M_PI_180 0.017453292519943295
97#endif
98#ifndef M_4PI_3
99#  define M_4PI_3 4.18879020478639
100#endif
101inline double square(double x) { return pow(x,2.0); }
102//inline double square(double x) { return pown(x,2); }
103//inline double square(x) { return x*x; }
104inline double cube(double x) { return x*x*x; }
105
106
107%(DEFINES)s
108
109%(SOURCES)s
110
111/*
112    ##########################################################
113    #                                                        #
114    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
115    #   !!                                              !!   #
116    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
117    #   !!                                              !!   #
118    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
119    #                                                        #
120    ##########################################################
121*/
122
123#ifdef IQ_KERNEL_NAME
124kernel void IQ_KERNEL_NAME(
125    global const double *q,
126    global double *result,
127    const int Nq,
128#ifdef IQ_OPEN_LOOPS
129  #ifdef USE_OPENCL
130    global double *loops_g,
131  #endif
132    local double *loops,
133    const double cutoff,
134    IQ_DISPERSION_LENGTH_DECLARATIONS,
135#endif
136    IQ_FIXED_PARAMETER_DECLARATIONS
137    )
138{
139#ifdef USE_OPENCL
140  #ifdef IQ_OPEN_LOOPS
141  // copy loops info to local memory
142  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
143  wait_group_events(1, &e);
144  #endif
145
146  int i = get_global_id(0);
147  if (i < Nq)
148#else
149  #pragma omp parallel for
150  for (int i=0; i < Nq; i++)
151#endif
152  {
153    const double qi = q[i];
154#ifdef IQ_OPEN_LOOPS
155    double ret=0.0, norm=0.0;
156  #ifdef VOLUME_PARAMETERS
157    double vol=0.0, norm_vol=0.0;
158  #endif
159    IQ_OPEN_LOOPS
160    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
161    //  const double radius = loops[2*(radius_i)];
162    //  const double radius_w = loops[2*(radius_i)+1];
163
164    const double weight = IQ_WEIGHT_PRODUCT;
165    if (weight > cutoff) {
166      const double scattering = Iq(qi, IQ_PARAMETERS);
167      // allow kernels to exclude invalid regions by returning NaN
168      if (!isnan(scattering)) {
169        ret += weight*scattering;
170        norm += weight;
171      #ifdef VOLUME_PARAMETERS
172        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
173        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
174        norm_vol += vol_weight;
175      #endif
176      }
177    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
178    }
179    IQ_CLOSE_LOOPS
180  #ifdef VOLUME_PARAMETERS
181    if (vol*norm_vol != 0.0) {
182      ret *= norm_vol/vol;
183    }
184  #endif
185    result[i] = scale*ret/norm+background;
186#else
187    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
188#endif
189  }
190}
191#endif
192
193
194#ifdef IQXY_KERNEL_NAME
195kernel void IQXY_KERNEL_NAME(
196    global const double *qx,
197    global const double *qy,
198    global double *result,
199    const int Nq,
200#ifdef IQXY_OPEN_LOOPS
201  #ifdef USE_OPENCL
202    global double *loops_g,
203  #endif
204    local double *loops,
205    const double cutoff,
206    IQXY_DISPERSION_LENGTH_DECLARATIONS,
207#endif
208    IQXY_FIXED_PARAMETER_DECLARATIONS
209    )
210{
211#ifdef USE_OPENCL
212  #ifdef IQXY_OPEN_LOOPS
213  // copy loops info to local memory
214  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
215  wait_group_events(1, &e);
216  #endif
217
218  int i = get_global_id(0);
219  if (i < Nq)
220#else
221  #pragma omp parallel for
222  for (int i=0; i < Nq; i++)
223#endif
224  {
225    const double qxi = qx[i];
226    const double qyi = qy[i];
227    #if USE_KAHAN_SUMMATION
228    double accumulated_error = 0.0;
229    #endif
230#ifdef IQXY_OPEN_LOOPS
231    double ret=0.0, norm=0.0;
232    #ifdef VOLUME_PARAMETERS
233    double vol=0.0, norm_vol=0.0;
234    #endif
235    IQXY_OPEN_LOOPS
236    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
237    //  const double radius = loops[2*(radius_i)];
238    //  const double radius_w = loops[2*(radius_i)+1];
239
240    const double weight = IQXY_WEIGHT_PRODUCT;
241    if (weight > cutoff) {
242
243      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
244      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
245      //if (scattering >= 0.0) { // scattering cannot be negative
246        // TODO: use correct angle for spherical correction
247        // Definition of theta and phi are probably reversed relative to the
248        // equation which gave rise to this correction, leading to an
249        // attenuation of the pattern as theta moves through pi/2.  Either
250        // reverse the meanings of phi and theta in the forms, or use phi
251        // rather than theta in this correction.  Current code uses cos(theta)
252        // so that values match those of sasview.
253      #if defined(IQXY_HAS_THETA) // && 0
254        const double spherical_correction
255          = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0);
256        const double next = spherical_correction * weight * scattering;
257      #else
258        const double next = weight * scattering;
259      #endif
260      #if USE_KAHAN_SUMMATION
261        const double y = next - accumulated_error;
262        const double t = ret + y;
263        accumulated_error = (t - ret) - y;
264        ret = t;
265      #else
266        ret += next;
267      #endif
268        norm += weight;
269      #ifdef VOLUME_PARAMETERS
270        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
271        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
272        norm_vol += vol_weight;
273      #endif
274      }
275      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
276    }
277    IQXY_CLOSE_LOOPS
278  #ifdef VOLUME_PARAMETERS
279    if (vol*norm_vol != 0.0) {
280      ret *= norm_vol/vol;
281    }
282  #endif
283    result[i] = scale*ret/norm+background;
284#else
285    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
286#endif
287  }
288}
289#endif
Note: See TracBrowser for help on using the repository browser.