source: sasmodels/sasmodels/kernel_template.c @ e3a9733

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since e3a9733 was e3a9733, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

support kahan summation to avoid roundoff error, but don't turn it on

  • Property mode set to 100644
File size: 7.4 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8#define USE_KAHAN_SUMMATION 0
9
10// If opencl is not available, then we are compiling a C function
11// Note: if using a C++ compiler, then define kernel as extern "C"
12#ifndef USE_OPENCL
13#  ifdef __cplusplus
14     #include <cstdio>
15     #include <cmath>
16     using namespace std;
17     #if defined(_MSC_VER)
18     #   define kernel extern "C" __declspec( dllexport )
19         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
20         inline double fmin(double x, double y) { return x>y ? y : x; }
21         inline double fmax(double x, double y) { return x<y ? y : x; }
22     #else
23     #   define kernel extern "C"
24     #endif
25     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
26#  else
27     #include <stdio.h>
28     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
29     // MSVC doesn't support C99, so no need for dllexport on C99 branch
30     #define kernel
31     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
32#  endif
33#  define global
34#  define local
35#  define constant const
36// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
37// OpenCL pown(a,b) = C99 pow(a,b), b integer
38#  define powr(a,b) pow(a,b)
39#  define pown(a,b) pow(a,b)
40#else
41#  ifdef USE_SINCOS
42#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
43#  else
44#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
45#  endif
46#endif
47
48// Standard mathematical constants:
49//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
50//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
51// OpenCL defines M_constant_F for float constants, and nothing if double
52// is not enabled on the card, which is why these constants may be missing
53#ifndef M_PI
54#  define M_PI 3.141592653589793
55#endif
56#ifndef M_PI_2
57#  define M_PI_2 1.570796326794897
58#endif
59#ifndef M_PI_4
60#  define M_PI_4 0.7853981633974483
61#endif
62
63// Non-standard pi/180, used for converting between degrees and radians
64#ifndef M_PI_180
65#  define M_PI_180 0.017453292519943295
66#endif
67
68
69%(DEFINES)s
70
71%(SOURCES)s
72
73/*
74    ##########################################################
75    #                                                        #
76    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
77    #   !!                                              !!   #
78    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
79    #   !!                                              !!   #
80    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
81    #                                                        #
82    ##########################################################
83*/
84
85#ifdef IQ_KERNEL_NAME
86kernel void IQ_KERNEL_NAME(
87    global const double *q,
88    global double *result,
89    const int Nq,
90#ifdef IQ_OPEN_LOOPS
91  #ifdef USE_OPENCL
92    global double *loops_g,
93  #endif
94    local double *loops,
95    const double cutoff,
96    IQ_DISPERSION_LENGTH_DECLARATIONS,
97#endif
98    IQ_FIXED_PARAMETER_DECLARATIONS
99    )
100{
101#ifdef USE_OPENCL
102  #ifdef IQ_OPEN_LOOPS
103  // copy loops info to local memory
104  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
105  wait_group_events(1, &e);
106  #endif
107
108  int i = get_global_id(0);
109  if (i < Nq)
110#else
111  #pragma omp parallel for
112  for (int i=0; i < Nq; i++)
113#endif
114  {
115    const double qi = q[i];
116#ifdef IQ_OPEN_LOOPS
117    double ret=0.0, norm=0.0;
118  #ifdef VOLUME_PARAMETERS
119    double vol=0.0, norm_vol=0.0;
120  #endif
121    IQ_OPEN_LOOPS
122    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
123    //  const double radius = loops[2*(radius_i)];
124    //  const double radius_w = loops[2*(radius_i)+1];
125
126    const double weight = IQ_WEIGHT_PRODUCT;
127    if (weight > cutoff) {
128      const double scattering = Iq(qi, IQ_PARAMETERS);
129      // allow kernels to exclude invalid regions by returning NaN
130      if (!isnan(scattering)) {
131        ret += weight*scattering;
132        norm += weight;
133      #ifdef VOLUME_PARAMETERS
134        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
135        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
136        norm_vol += vol_weight;
137      #endif
138      }
139    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
140    }
141    IQ_CLOSE_LOOPS
142  #ifdef VOLUME_PARAMETERS
143    if (vol*norm_vol != 0.0) {
144      ret *= norm_vol/vol;
145    }
146  #endif
147    result[i] = scale*ret/norm+background;
148#else
149    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
150#endif
151  }
152}
153#endif
154
155
156#ifdef IQXY_KERNEL_NAME
157kernel void IQXY_KERNEL_NAME(
158    global const double *qx,
159    global const double *qy,
160    global double *result,
161    const int Nq,
162#ifdef IQXY_OPEN_LOOPS
163  #ifdef USE_OPENCL
164    global double *loops_g,
165  #endif
166    local double *loops,
167    const double cutoff,
168    IQXY_DISPERSION_LENGTH_DECLARATIONS,
169#endif
170    IQXY_FIXED_PARAMETER_DECLARATIONS
171    )
172{
173#ifdef USE_OPENCL
174  #ifdef IQXY_OPEN_LOOPS
175  // copy loops info to local memory
176  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
177  wait_group_events(1, &e);
178  #endif
179
180  int i = get_global_id(0);
181  if (i < Nq)
182#else
183  #pragma omp parallel for
184  for (int i=0; i < Nq; i++)
185#endif
186  {
187    const double qxi = qx[i];
188    const double qyi = qy[i];
189    #if USE_KAHAN_SUMMATION
190    double accumulated_error = 0.0;
191    #endif
192#ifdef IQXY_OPEN_LOOPS
193    double ret=0.0, norm=0.0;
194    #ifdef VOLUME_PARAMETERS
195    double vol=0.0, norm_vol=0.0;
196    #endif
197    IQXY_OPEN_LOOPS
198    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
199    //  const double radius = loops[2*(radius_i)];
200    //  const double radius_w = loops[2*(radius_i)+1];
201
202    const double weight = IQXY_WEIGHT_PRODUCT;
203    if (weight > cutoff) {
204
205      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
206      //if (scattering >= 0.0) { // scattering cannot be negative
207        // TODO: use correct angle for spherical correction
208        // Definition of theta and phi are probably reversed relative to the
209        // equation which gave rise to this correction, leading to an
210        // attenuation of the pattern as theta moves through pi/2.  Either
211        // reverse the meanings of phi and theta in the forms, or use phi
212        // rather than theta in this correction.  Current code uses cos(theta)
213        // so that values match those of sasview.
214      #if defined(IQXY_HAS_THETA) // && 0
215        const double spherical_correction
216          = (Ntheta>1 ? fabs(cos(M_PI_180*phi))*M_PI_2:1.0);
217        const double next = spherical_correction * weight * scattering;
218      #else
219        const double next = weight * scattering;
220      #endif
221      #if USE_KAHAN_SUMMATION
222        const double y = next - accumulated_error;
223        const double t = ret + y;
224        accumulated_error = (t - ret) - y;
225        ret = t;
226      #else
227        ret += next;
228      #endif
229        norm += weight;
230      #ifdef VOLUME_PARAMETERS
231        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
232        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
233      #endif
234        norm_vol += vol_weight;
235      //}
236      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
237    }
238    IQXY_CLOSE_LOOPS
239  #ifdef VOLUME_PARAMETERS
240    if (vol*norm_vol != 0.0) {
241      ret *= norm_vol/vol;
242    }
243  #endif
244    result[i] = scale*ret/norm+background;
245#else
246    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
247#endif
248  }
249}
250#endif
Note: See TracBrowser for help on using the repository browser.