source: sasmodels/sasmodels/kernel_template.c

Last change on this file was 1e7b0db0, checked in by wojciech, 8 years ago

sinc tranfered to sas_sinx_x

  • Property mode set to 100644
File size: 9.4 KB
Line 
1#line 1 "kernel_template.c"
2// GENERATED CODE --- DO NOT EDIT ---
3// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
4
5#ifdef __OPENCL_VERSION__
6# define USE_OPENCL
7#endif
8
9#define USE_KAHAN_SUMMATION 0
10
11// If opencl is not available, then we are compiling a C function
12// Note: if using a C++ compiler, then define kernel as extern "C"
13#ifndef USE_OPENCL
14// Use SAS_DOUBLE to force the use of double even for float kernels
15#  define SAS_DOUBLE dou ## ble
16#  ifdef __cplusplus
17      #include <cstdio>
18      #include <cmath>
19      using namespace std;
20      #if defined(_MSC_VER)
21         #include <limits>
22         #include <float.h>
23         #define kernel extern "C" __declspec( dllexport )
24         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
25         inline double fmin(double x, double y) { return x>y ? y : x; }
26         inline double fmax(double x, double y) { return x<y ? y : x; }
27         #define isnan(x) _isnan(x)
28         #define isinf(x) (!_finite(x))
29         #define isfinite(x) _finite(x)
30         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
31         #define INFINITY (std::numeric_limits<double>::infinity())
32         #define NEED_EXPM1
33         #define NEED_TGAMMA
34         #define NEED_ERF
35     #else
36         #define kernel extern "C"
37     #endif
38     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
39#  else
40     #include <stdio.h>
41     #if defined(__TINYC__)
42         #include <math.h>
43         // TODO: test isnan
44         inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away!
45         #undef isnan
46         #define isnan(x) _isnan(x)
47         // Defeat the double->float conversion since we don't have tgmath
48         inline SAS_DOUBLE trunc(SAS_DOUBLE x) { return x>=0?floor(x):-floor(-x); }
49         inline SAS_DOUBLE fmin(SAS_DOUBLE x, SAS_DOUBLE y) { return x>y ? y : x; }
50         inline SAS_DOUBLE fmax(SAS_DOUBLE x, SAS_DOUBLE y) { return x<y ? y : x; }
51         #define NEED_EXPM1
52         #define NEED_TGAMMA
53         #define NEED_ERF
54     #else
55         #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
56     #endif
57     // MSVC doesn't support C99, so no need for dllexport on C99 branch
58     #define kernel
59     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
60#  endif
61#  define global
62#  define local
63#  define constant const
64// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
65// OpenCL pown(a,b) = C99 pow(a,b), b integer
66#  define powr(a,b) pow(a,b)
67#  define pown(a,b) pow(a,b)
68#else
69#  if defined(USE_SINCOS)
70#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
71#  else
72#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
73#  endif
74#endif
75
76#if defined(NEED_EXPM1)
77   static SAS_DOUBLE expm1(SAS_DOUBLE x_in) {
78      double x = (double)x_in;  // go back to float for single precision kernels
79      // Adapted from the cephes math library.
80      // Copyright 1984 - 1992 by Stephen L. Moshier
81      if (x != x || x == 0.0) {
82         return x; // NaN and +/- 0
83      } else if (x < -0.5 || x > 0.5) {
84         return exp(x) - 1.0;
85      } else {
86         const double xsq = x*x;
87         const double p = (((
88            +1.2617719307481059087798E-4)*xsq
89            +3.0299440770744196129956E-2)*xsq
90            +9.9999999999999999991025E-1);
91         const double q = ((((
92            +3.0019850513866445504159E-6)*xsq
93            +2.5244834034968410419224E-3)*xsq
94            +2.2726554820815502876593E-1)*xsq
95            +2.0000000000000000000897E0);
96         double r = x * p;
97         r =  r / (q - r);
98         return r+r;
99       }
100   }
101#endif
102
103// Standard mathematical constants:
104//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
105//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
106// OpenCL defines M_constant_F for float constants, and nothing if double
107// is not enabled on the card, which is why these constants may be missing
108#ifndef M_PI
109#  define M_PI 3.141592653589793
110#endif
111#ifndef M_PI_2
112#  define M_PI_2 1.570796326794897
113#endif
114#ifndef M_PI_4
115#  define M_PI_4 0.7853981633974483
116#endif
117#ifndef M_E
118#  define M_E 2.718281828459045091
119#endif
120
121// Non-standard function library
122// pi/180, used for converting between degrees and radians
123// 4/3 pi for computing sphere volumes
124// square and cube for computing squares and cubes
125#ifndef M_PI_180
126#  define M_PI_180 0.017453292519943295
127#endif
128#ifndef M_4PI_3
129#  define M_4PI_3 4.18879020478639
130#endif
131//inline double square(double x) { return pow(x,2.0); }
132//inline double square(double x) { return pown(x,2); }
133inline double square(double x) { return x*x; }
134inline double cube(double x) { return x*x*x; }
135inline double sas_sinx_x(double x) { return x==0 ? 1.0 : sin(x)/x; }
136
137
138%(DEFINES)s
139
140%(SOURCES)s
141
142/*
143    ##########################################################
144    #                                                        #
145    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
146    #   !!                                              !!   #
147    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
148    #   !!                                              !!   #
149    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
150    #                                                        #
151    ##########################################################
152*/
153
154#ifdef IQ_KERNEL_NAME
155kernel void IQ_KERNEL_NAME(
156    global const double *q,
157    global double *result,
158    const int Nq,
159#ifdef IQ_OPEN_LOOPS
160  #ifdef USE_OPENCL
161    global double *loops_g,
162  #endif
163    local double *loops,
164    const double cutoff,
165    IQ_DISPERSION_LENGTH_DECLARATIONS,
166#endif
167    IQ_FIXED_PARAMETER_DECLARATIONS
168    )
169{
170#ifdef USE_OPENCL
171  #ifdef IQ_OPEN_LOOPS
172  // copy loops info to local memory
173  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
174  wait_group_events(1, &e);
175  #endif
176
177  int i = get_global_id(0);
178  if (i < Nq)
179#else
180  #pragma omp parallel for
181  for (int i=0; i < Nq; i++)
182#endif
183  {
184    const double qi = q[i];
185#ifdef IQ_OPEN_LOOPS
186    double ret=0.0, norm=0.0;
187    IQ_OPEN_LOOPS
188    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
189    //  const double radius = loops[2*(radius_i)];
190    //  const double radius_w = loops[2*(radius_i)+1];
191
192    const double weight = IQ_WEIGHT_PRODUCT;
193    if (weight > cutoff) {
194      const double scattering = Iq(qi, IQ_PARAMETERS);
195      // allow kernels to exclude invalid regions by returning NaN
196      if (!isnan(scattering)) {
197        ret += weight*scattering;
198      #ifdef VOLUME_PARAMETERS
199        norm += weight * form_volume(VOLUME_PARAMETERS);
200      #else
201        norm += weight;
202      #endif
203      }
204    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
205    }
206    IQ_CLOSE_LOOPS
207    // norm can only be zero if volume is zero, so no scattering
208    result[i] = (norm > 0. ? scale*ret/norm + background : background);
209#else
210    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
211#endif
212  }
213}
214#endif
215
216
217#ifdef IQXY_KERNEL_NAME
218kernel void IQXY_KERNEL_NAME(
219    global const double *qx,
220    global const double *qy,
221    global double *result,
222    const int Nq,
223#ifdef IQXY_OPEN_LOOPS
224  #ifdef USE_OPENCL
225    global double *loops_g,
226  #endif
227    local double *loops,
228    const double cutoff,
229    IQXY_DISPERSION_LENGTH_DECLARATIONS,
230#endif
231    IQXY_FIXED_PARAMETER_DECLARATIONS
232    )
233{
234#ifdef USE_OPENCL
235  #ifdef IQXY_OPEN_LOOPS
236  // copy loops info to local memory
237  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
238  wait_group_events(1, &e);
239  #endif
240
241  int i = get_global_id(0);
242  if (i < Nq)
243#else
244  #pragma omp parallel for
245  for (int i=0; i < Nq; i++)
246#endif
247  {
248    const double qxi = qx[i];
249    const double qyi = qy[i];
250    #if USE_KAHAN_SUMMATION
251    double accumulated_error = 0.0;
252    #endif
253#ifdef IQXY_OPEN_LOOPS
254    double ret=0.0, norm=0.0;
255    IQXY_OPEN_LOOPS
256    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
257    //  const double radius = loops[2*(radius_i)];
258    //  const double radius_w = loops[2*(radius_i)+1];
259    double weight = IQXY_WEIGHT_PRODUCT;
260    if (weight > cutoff) {
261
262      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
263      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
264      #if defined(IQXY_HAS_THETA)
265        // Force a nominal value for the spherical correction even when
266        // theta is +0/180 so that there are no divide by zero problems.
267        // For sin(theta) fixed at 0 and 180, we effectively multiply top and bottom
268        // by 1e-6, so the effect cancels.
269        const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6);
270        weight *= spherical_correction;
271      #endif
272      const double next = weight * scattering;
273      #if USE_KAHAN_SUMMATION
274        const double y = next - accumulated_error;
275        const double t = ret + y;
276        accumulated_error = (t - ret) - y;
277        ret = t;
278      #else
279        ret += next;
280      #endif
281      #ifdef VOLUME_PARAMETERS
282        norm += weight*form_volume(VOLUME_PARAMETERS);
283      #else
284        norm += weight;
285      #endif
286      }
287      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
288    }
289    IQXY_CLOSE_LOOPS
290    // norm can only be zero if volume is zero, so no scattering
291    result[i] = (norm>0. ? scale*ret/norm + background : background);
292#else
293    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
294#endif
295  }
296}
297#endif
Note: See TracBrowser for help on using the repository browser.