source: sasmodels/sasmodels/kernel_template.c @ 73860b6

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 73860b6 was 73860b6, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

use fast version of square()

  • Property mode set to 100644
File size: 9.1 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14      #include <cstdio>
15      #include <cmath>
16      using namespace std;
17      #if defined(_MSC_VER)
18         #include <limits>
19         #include <float.h>
20         #define kernel extern "C" __declspec( dllexport )
21         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
22         inline double fmin(double x, double y) { return x>y ? y : x; }
23         inline double fmax(double x, double y) { return x<y ? y : x; }
24         inline double isnan(double x) { return _isnan(x); }
25         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
26         static double cephes_expm1(double x) {
27            // Adapted from the cephes math library.
28            // Copyright 1984 - 1992 by Stephen L. Moshier
29            if (x != x || x == 0.0) {
30               return x; // NaN and +/- 0
31            } else if (x < -0.5 || x > 0.5) {
32               return exp(x) - 1.0;
33            } else {
34               const double xsq = x*x;
35               const double p = (((
36                  +1.2617719307481059087798E-4)*xsq
37                  +3.0299440770744196129956E-2)*xsq
38                  +9.9999999999999999991025E-1);
39               const double q = ((((
40                  +3.0019850513866445504159E-6)*xsq
41                  +2.5244834034968410419224E-3)*xsq
42                  +2.2726554820815502876593E-1)*xsq
43                  +2.0000000000000000000897E0);
44               double r = x * p;
45               r =  r / (q - r);
46               return r+r;
47             }
48         }
49         #define expm1 cephes_expm1
50     #else
51         #define kernel extern "C"
52     #endif
53     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
54#  else
55     #include <stdio.h>
56     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
57     // MSVC doesn't support C99, so no need for dllexport on C99 branch
58     #define kernel
59     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
60#  endif
61#  define global
62#  define local
63#  define constant const
64// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
65// OpenCL pown(a,b) = C99 pow(a,b), b integer
66#  define powr(a,b) pow(a,b)
67#  define pown(a,b) pow(a,b)
68#else
69#  if defined(USE_SINCOS)
70#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
71#  else
72#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
73#  endif
74#endif
75
76// Standard mathematical constants:
77//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
78//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
79// OpenCL defines M_constant_F for float constants, and nothing if double
80// is not enabled on the card, which is why these constants may be missing
81#ifndef M_PI
82#  define M_PI 3.141592653589793
83#endif
84#ifndef M_PI_2
85#  define M_PI_2 1.570796326794897
86#endif
87#ifndef M_PI_4
88#  define M_PI_4 0.7853981633974483
89#endif
90#ifndef M_E
91#  define M_E 2.718281828459045091
92#endif
93
94// Non-standard function library
95// pi/180, used for converting between degrees and radians
96// 4/3 pi for computing sphere volumes
97// square and cube for computing squares and cubes
98#ifndef M_PI_180
99#  define M_PI_180 0.017453292519943295
100#endif
101#ifndef M_4PI_3
102#  define M_4PI_3 4.18879020478639
103#endif
104//inline double square(double x) { return pow(x,2.0); }
105//inline double square(double x) { return pown(x,2); }
106inline double square(x) { return x*x; }
107inline double cube(double x) { return x*x*x; }
108
109
110%(DEFINES)s
111
112%(SOURCES)s
113
114/*
115    ##########################################################
116    #                                                        #
117    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
118    #   !!                                              !!   #
119    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
120    #   !!                                              !!   #
121    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
122    #                                                        #
123    ##########################################################
124*/
125
126#ifdef IQ_KERNEL_NAME
127kernel void IQ_KERNEL_NAME(
128    global const double *q,
129    global double *result,
130    const int Nq,
131#ifdef IQ_OPEN_LOOPS
132  #ifdef USE_OPENCL
133    global double *loops_g,
134  #endif
135    local double *loops,
136    const double cutoff,
137    IQ_DISPERSION_LENGTH_DECLARATIONS,
138#endif
139    IQ_FIXED_PARAMETER_DECLARATIONS
140    )
141{
142#ifdef USE_OPENCL
143  #ifdef IQ_OPEN_LOOPS
144  // copy loops info to local memory
145  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
146  wait_group_events(1, &e);
147  #endif
148
149  int i = get_global_id(0);
150  if (i < Nq)
151#else
152  #pragma omp parallel for
153  for (int i=0; i < Nq; i++)
154#endif
155  {
156    const double qi = q[i];
157#ifdef IQ_OPEN_LOOPS
158    double ret=0.0, norm=0.0;
159  #ifdef VOLUME_PARAMETERS
160    double vol=0.0, norm_vol=0.0;
161  #endif
162    IQ_OPEN_LOOPS
163    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
164    //  const double radius = loops[2*(radius_i)];
165    //  const double radius_w = loops[2*(radius_i)+1];
166
167    const double weight = IQ_WEIGHT_PRODUCT;
168    if (weight > cutoff) {
169      const double scattering = Iq(qi, IQ_PARAMETERS);
170      // allow kernels to exclude invalid regions by returning NaN
171      if (!isnan(scattering)) {
172        ret += weight*scattering;
173        norm += weight;
174      #ifdef VOLUME_PARAMETERS
175        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
176        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
177        norm_vol += vol_weight;
178      #endif
179      }
180    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
181    }
182    IQ_CLOSE_LOOPS
183  #ifdef VOLUME_PARAMETERS
184    if (vol*norm_vol != 0.0) {
185      ret *= norm_vol/vol;
186    }
187  #endif
188    result[i] = scale*ret/norm+background;
189#else
190    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
191#endif
192  }
193}
194#endif
195
196
197#ifdef IQXY_KERNEL_NAME
198kernel void IQXY_KERNEL_NAME(
199    global const double *qx,
200    global const double *qy,
201    global double *result,
202    const int Nq,
203#ifdef IQXY_OPEN_LOOPS
204  #ifdef USE_OPENCL
205    global double *loops_g,
206  #endif
207    local double *loops,
208    const double cutoff,
209    IQXY_DISPERSION_LENGTH_DECLARATIONS,
210#endif
211    IQXY_FIXED_PARAMETER_DECLARATIONS
212    )
213{
214#ifdef USE_OPENCL
215  #ifdef IQXY_OPEN_LOOPS
216  // copy loops info to local memory
217  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
218  wait_group_events(1, &e);
219  #endif
220
221  int i = get_global_id(0);
222  if (i < Nq)
223#else
224  #pragma omp parallel for
225  for (int i=0; i < Nq; i++)
226#endif
227  {
228    const double qxi = qx[i];
229    const double qyi = qy[i];
230    #if USE_KAHAN_SUMMATION
231    double accumulated_error = 0.0;
232    #endif
233#ifdef IQXY_OPEN_LOOPS
234    double ret=0.0, norm=0.0;
235    #ifdef VOLUME_PARAMETERS
236    double vol=0.0, norm_vol=0.0;
237    #endif
238    IQXY_OPEN_LOOPS
239    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
240    //  const double radius = loops[2*(radius_i)];
241    //  const double radius_w = loops[2*(radius_i)+1];
242
243    const double weight = IQXY_WEIGHT_PRODUCT;
244    if (weight > cutoff) {
245
246      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
247      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
248      //if (scattering >= 0.0) { // scattering cannot be negative
249        // TODO: use correct angle for spherical correction
250        // Definition of theta and phi are probably reversed relative to the
251        // equation which gave rise to this correction, leading to an
252        // attenuation of the pattern as theta moves through pi/2.  Either
253        // reverse the meanings of phi and theta in the forms, or use phi
254        // rather than theta in this correction.  Current code uses cos(theta)
255        // so that values match those of sasview.
256      #if defined(IQXY_HAS_THETA) // && 0
257        const double spherical_correction
258          = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0);
259        const double next = spherical_correction * weight * scattering;
260      #else
261        const double next = weight * scattering;
262      #endif
263      #if USE_KAHAN_SUMMATION
264        const double y = next - accumulated_error;
265        const double t = ret + y;
266        accumulated_error = (t - ret) - y;
267        ret = t;
268      #else
269        ret += next;
270      #endif
271        norm += weight;
272      #ifdef VOLUME_PARAMETERS
273        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
274        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
275        norm_vol += vol_weight;
276      #endif
277      }
278      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
279    }
280    IQXY_CLOSE_LOOPS
281  #ifdef VOLUME_PARAMETERS
282    if (vol*norm_vol != 0.0) {
283      ret *= norm_vol/vol;
284    }
285  #endif
286    result[i] = scale*ret/norm+background;
287#else
288    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
289#endif
290  }
291}
292#endif
Note: See TracBrowser for help on using the repository browser.