source: sasmodels/sasmodels/kernel_template.c @ 6ee9d39

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 6ee9d39 was 6ee9d39, checked in by Paul Kienzle <pkienzle@…>, 10 years ago

windows compiler doesn't have fmin/fmax

  • Property mode set to 100644
File size: 7.0 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8// If opencl is not available, then we are compiling a C function
9// Note: if using a C++ compiler, then define kernel as extern "C"
10#ifndef USE_OPENCL
11#  ifdef __cplusplus
12     #include <cstdio>
13     #include <cmath>
14     using namespace std;
15     #if defined(_MSC_VER)
16     #   define kernel extern "C" __declspec( dllexport )
17         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
18         inline double fmin(double x, double y) { return x>y ? y : x; }
19         inline double fmax(double x, double y) { return x<y ? y : x; }
20     #else
21     #   define kernel extern "C"
22     #endif
23     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
24#  else
25     #include <stdio.h>
26     #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
27     // MSVC doesn't support C99, so no need for dllexport on C99 branch
28     #define kernel
29     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
30#  endif
31#  define global
32#  define local
33#  define constant const
34// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
35// OpenCL pown(a,b) = C99 pow(a,b), b integer
36#  define powr(a,b) pow(a,b)
37#  define pown(a,b) pow(a,b)
38#else
39#  ifdef USE_SINCOS
40#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
41#  else
42#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
43#  endif
44#endif
45
46// Standard mathematical constants:
47//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
48//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
49// OpenCL defines M_constant_F for float constants, and nothing if double
50// is not enabled on the card, which is why these constants may be missing
51#ifndef M_PI
52#  define M_PI 3.141592653589793
53#endif
54#ifndef M_PI_2
55#  define M_PI_2 1.570796326794897
56#endif
57#ifndef M_PI_4
58#  define M_PI_4 0.7853981633974483
59#endif
60
61// Non-standard pi/180, used for converting between degrees and radians
62#ifndef M_PI_180
63#  define M_PI_180 0.017453292519943295
64#endif
65
66
67%(DEFINES)s
68
69%(SOURCES)s
70
71/*
72    ##########################################################
73    #                                                        #
74    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
75    #   !!                                              !!   #
76    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
77    #   !!                                              !!   #
78    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
79    #                                                        #
80    ##########################################################
81*/
82
83#ifdef IQ_KERNEL_NAME
84kernel void IQ_KERNEL_NAME(
85    global const double *q,
86    global double *result,
87    const int Nq,
88#ifdef IQ_OPEN_LOOPS
89  #ifdef USE_OPENCL
90    global double *loops_g,
91  #endif
92    local double *loops,
93    const double cutoff,
94    IQ_DISPERSION_LENGTH_DECLARATIONS,
95#endif
96    IQ_FIXED_PARAMETER_DECLARATIONS
97    )
98{
99#ifdef USE_OPENCL
100  #ifdef IQ_OPEN_LOOPS
101  // copy loops info to local memory
102  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
103  wait_group_events(1, &e);
104  #endif
105
106  int i = get_global_id(0);
107  if (i < Nq)
108#else
109  #pragma omp parallel for
110  for (int i=0; i < Nq; i++)
111#endif
112  {
113    const double qi = q[i];
114#ifdef IQ_OPEN_LOOPS
115    double ret=0.0, norm=0.0;
116  #ifdef VOLUME_PARAMETERS
117    double vol=0.0, norm_vol=0.0;
118  #endif
119    IQ_OPEN_LOOPS
120    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
121    //  const double radius = loops[2*(radius_i)];
122    //  const double radius_w = loops[2*(radius_i)+1];
123
124    const double weight = IQ_WEIGHT_PRODUCT;
125    if (weight > cutoff) {
126      const double scattering = Iq(qi, IQ_PARAMETERS);
127      if (scattering >= 0.0) { // scattering cannot be negative
128        ret += weight*scattering;
129        norm += weight;
130      #ifdef VOLUME_PARAMETERS
131        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
132        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
133        norm_vol += vol_weight;
134      #endif
135      }
136    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
137    }
138    IQ_CLOSE_LOOPS
139  #ifdef VOLUME_PARAMETERS
140    if (vol*norm_vol != 0.0) {
141      ret *= norm_vol/vol;
142    }
143  #endif
144    result[i] = scale*ret/norm+background;
145#else
146    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
147#endif
148  }
149}
150#endif
151
152
153#ifdef IQXY_KERNEL_NAME
154kernel void IQXY_KERNEL_NAME(
155    global const double *qx,
156    global const double *qy,
157    global double *result,
158    const int Nq,
159#ifdef IQXY_OPEN_LOOPS
160  #ifdef USE_OPENCL
161    global double *loops_g,
162  #endif
163    local double *loops,
164    const double cutoff,
165    IQXY_DISPERSION_LENGTH_DECLARATIONS,
166#endif
167    IQXY_FIXED_PARAMETER_DECLARATIONS
168    )
169{
170#ifdef USE_OPENCL
171  #ifdef IQXY_OPEN_LOOPS
172  // copy loops info to local memory
173  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
174  wait_group_events(1, &e);
175  #endif
176
177  int i = get_global_id(0);
178  if (i < Nq)
179#else
180  #pragma omp parallel for
181  for (int i=0; i < Nq; i++)
182#endif
183  {
184    const double qxi = qx[i];
185    const double qyi = qy[i];
186#ifdef IQXY_OPEN_LOOPS
187    double ret=0.0, norm=0.0;
188    #ifdef VOLUME_PARAMETERS
189    double vol=0.0, norm_vol=0.0;
190    #endif
191    IQXY_OPEN_LOOPS
192    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
193    //  const double radius = loops[2*(radius_i)];
194    //  const double radius_w = loops[2*(radius_i)+1];
195
196    const double weight = IQXY_WEIGHT_PRODUCT;
197    if (weight > cutoff) {
198
199      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
200      if (scattering >= 0.0) { // scattering cannot be negative
201        // TODO: use correct angle for spherical correction
202        // Definition of theta and phi are probably reversed relative to the
203        // equation which gave rise to this correction, leading to an
204        // attenuation of the pattern as theta moves through pi/2.  Either
205        // reverse the meanings of phi and theta in the forms, or use phi
206        // rather than theta in this correction.  Current code uses cos(theta)
207        // so that values match those of sasview.
208      #ifdef IQXY_HAS_THETA
209        const double spherical_correction
210          = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0);
211        ret += spherical_correction * weight * scattering;
212      #else
213        ret += weight * scattering;
214      #endif
215        norm += weight;
216      #ifdef VOLUME_PARAMETERS
217        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
218        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
219      #endif
220        norm_vol += vol_weight;
221      }
222      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
223    }
224    IQXY_CLOSE_LOOPS
225  #ifdef VOLUME_PARAMETERS
226    if (vol*norm_vol != 0.0) {
227      ret *= norm_vol/vol;
228    }
229  #endif
230    result[i] = scale*ret/norm+background;
231#else
232    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
233#endif
234  }
235}
236#endif
Note: See TracBrowser for help on using the repository browser.