source: sasmodels/sasmodels/kernel_template.c @ 750ffa5

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 750ffa5 was 750ffa5, checked in by Paul Kienzle <pkienzle@…>, 9 years ago

allow test of dll using single precision

  • Property mode set to 100644
File size: 6.9 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8// If opencl is not available, then we are compiling a C function
9// Note: if using a C++ compiler, then define kernel as extern "C"
10#ifndef USE_OPENCL
11#  ifdef __cplusplus
12     #include <cstdio>
13     #include <cmath>
14     using namespace std;
15     #if defined(_MSC_VER)
16     #   define kernel extern "C" __declspec( dllexport )
17         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
18     #else
19     #   define kernel extern "C"
20     #endif
21     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
22#  else
23     #include <stdio.h>
24     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
25     // MSVC doesn't support C99, so no need for dllexport on C99 branch
26     #define kernel
27     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
28#  endif
29#  define global
30#  define local
31#  define constant const
32// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
33// OpenCL pown(a,b) = C99 pow(a,b), b integer
34#  define powr(a,b) pow(a,b)
35#  define pown(a,b) pow(a,b)
36#else
37#  ifdef USE_SINCOS
38#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
39#  else
40#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
41#  endif
42#endif
43
44// Standard mathematical constants:
45//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
46//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
47// OpenCL defines M_constant_F for float constants, and nothing if double
48// is not enabled on the card, which is why these constants may be missing
49#ifndef M_PI
50#  define M_PI 3.141592653589793
51#endif
52#ifndef M_PI_2
53#  define M_PI_2 1.570796326794897
54#endif
55#ifndef M_PI_4
56#  define M_PI_4 0.7853981633974483
57#endif
58
59// Non-standard pi/180, used for converting between degrees and radians
60#ifndef M_PI_180
61#  define M_PI_180 0.017453292519943295
62#endif
63
64
65%(DEFINES)s
66
67%(SOURCES)s
68
69/*
70    ##########################################################
71    #                                                        #
72    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
73    #   !!                                              !!   #
74    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
75    #   !!                                              !!   #
76    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
77    #                                                        #
78    ##########################################################
79*/
80
81#ifdef IQ_KERNEL_NAME
82kernel void IQ_KERNEL_NAME(
83    global const double *q,
84    global double *result,
85    const int Nq,
86#ifdef IQ_OPEN_LOOPS
87  #ifdef USE_OPENCL
88    global double *loops_g,
89  #endif
90    local double *loops,
91    const double cutoff,
92    IQ_DISPERSION_LENGTH_DECLARATIONS,
93#endif
94    IQ_FIXED_PARAMETER_DECLARATIONS
95    )
96{
97#ifdef USE_OPENCL
98  #ifdef IQ_OPEN_LOOPS
99  // copy loops info to local memory
100  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
101  wait_group_events(1, &e);
102  #endif
103
104  int i = get_global_id(0);
105  if (i < Nq)
106#else
107  #pragma omp parallel for
108  for (int i=0; i < Nq; i++)
109#endif
110  {
111    const double qi = q[i];
112#ifdef IQ_OPEN_LOOPS
113    double ret=0.0, norm=0.0;
114  #ifdef VOLUME_PARAMETERS
115    double vol=0.0, norm_vol=0.0;
116  #endif
117    IQ_OPEN_LOOPS
118    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
119    //  const double radius = loops[2*(radius_i)];
120    //  const double radius_w = loops[2*(radius_i)+1];
121
122    const double weight = IQ_WEIGHT_PRODUCT;
123    if (weight > cutoff) {
124      const double scattering = Iq(qi, IQ_PARAMETERS);
125      if (scattering >= 0.0) { // scattering cannot be negative
126        ret += weight*scattering;
127        norm += weight;
128      #ifdef VOLUME_PARAMETERS
129        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
130        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
131        norm_vol += vol_weight;
132      #endif
133      }
134    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
135    }
136    IQ_CLOSE_LOOPS
137  #ifdef VOLUME_PARAMETERS
138    if (vol*norm_vol != 0.0) {
139      ret *= norm_vol/vol;
140    }
141  #endif
142    result[i] = scale*ret/norm+background;
143#else
144    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
145#endif
146  }
147}
148#endif
149
150
151#ifdef IQXY_KERNEL_NAME
152kernel void IQXY_KERNEL_NAME(
153    global const double *qx,
154    global const double *qy,
155    global double *result,
156    const int Nq,
157#ifdef IQXY_OPEN_LOOPS
158  #ifdef USE_OPENCL
159    global double *loops_g,
160  #endif
161    local double *loops,
162    const double cutoff,
163    IQXY_DISPERSION_LENGTH_DECLARATIONS,
164#endif
165    IQXY_FIXED_PARAMETER_DECLARATIONS
166    )
167{
168#ifdef USE_OPENCL
169  #ifdef IQXY_OPEN_LOOPS
170  // copy loops info to local memory
171  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
172  wait_group_events(1, &e);
173  #endif
174
175  int i = get_global_id(0);
176  if (i < Nq)
177#else
178  #pragma omp parallel for
179  for (int i=0; i < Nq; i++)
180#endif
181  {
182    const double qxi = qx[i];
183    const double qyi = qy[i];
184#ifdef IQXY_OPEN_LOOPS
185    double ret=0.0, norm=0.0;
186    #ifdef VOLUME_PARAMETERS
187    double vol=0.0, norm_vol=0.0;
188    #endif
189    IQXY_OPEN_LOOPS
190    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
191    //  const double radius = loops[2*(radius_i)];
192    //  const double radius_w = loops[2*(radius_i)+1];
193
194    const double weight = IQXY_WEIGHT_PRODUCT;
195    if (weight > cutoff) {
196
197      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
198      if (scattering >= 0.0) { // scattering cannot be negative
199        // TODO: use correct angle for spherical correction
200        // Definition of theta and phi are probably reversed relative to the
201        // equation which gave rise to this correction, leading to an
202        // attenuation of the pattern as theta moves through pi/2.  Either
203        // reverse the meanings of phi and theta in the forms, or use phi
204        // rather than theta in this correction.  Current code uses cos(theta)
205        // so that values match those of sasview.
206      #ifdef IQXY_HAS_THETA
207        const double spherical_correction
208          = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0);
209        ret += spherical_correction * weight * scattering;
210      #else
211        ret += weight * scattering;
212      #endif
213        norm += weight;
214      #ifdef VOLUME_PARAMETERS
215        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
216        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
217      #endif
218        norm_vol += vol_weight;
219      }
220      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
221    }
222    IQXY_CLOSE_LOOPS
223  #ifdef VOLUME_PARAMETERS
224    if (vol*norm_vol != 0.0) {
225      ret *= norm_vol/vol;
226    }
227  #endif
228    result[i] = scale*ret/norm+background;
229#else
230    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
231#endif
232  }
233}
234#endif
Note: See TracBrowser for help on using the repository browser.