source: sasmodels/sasmodels/kernel_template.c @ b89f519

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since b89f519 was 95e861b, checked in by Paul Kienzle <pkienzle@…>, 9 years ago

allow printf in C kernels for debugging

  • Property mode set to 100644
File size: 6.7 KB
Line 
1// GENERATED CODE --- DO NOT EDIT ---
2// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4#ifdef __OPENCL_VERSION__
5# define USE_OPENCL
6#endif
7
8// If opencl is not available, then we are compiling a C function
9// Note: if using a C++ compiler, then define kernel as extern "C"
10#ifndef USE_OPENCL
11#  ifdef __cplusplus
12     #include <cstdio>
13     #include <cmath>
14     #if defined(_MSC_VER)
15     #define kernel extern "C" __declspec( dllexport )
16     #else
17     #define kernel extern "C"
18     #endif
19     using namespace std;
20     inline void SINCOS(double angle, double &svar, double &cvar)
21       { svar=sin(angle); cvar=cos(angle); }
22#  else
23     #include <stdio.h>
24     #include <math.h>
25     #if defined(_MSC_VER)
26     #define kernel __declspec( dllexport )
27     #else
28     #define kernel
29     #endif
30     #define SINCOS(angle,svar,cvar) do {svar=sin(angle);cvar=cos(angle);} while (0)
31#  endif
32     #if defined(_MSC_VER)
33     inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
34     #endif
35#  define global
36#  define local
37#  define constant const
38#  define powr(a,b) pow(a,b)
39#  define pown(a,b) pow(a,b)
40#else
41#  ifdef USE_SINCOS
42#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
43#  else
44#    define SINCOS(angle,svar,cvar) do {svar=sin(angle);cvar=cos(angle);} while (0)
45#  endif
46#endif
47
48// Standard mathematical constants:
49//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
50//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
51// OpenCL defines M_constant_F for float constants, and nothing if double
52// is not enabled on the card, which is why these constants may be missing
53#ifndef M_PI
54#  define M_PI 3.141592653589793
55#endif
56#ifndef M_PI_2
57#  define M_PI_2 1.570796326794897
58#endif
59#ifndef M_PI_4
60#  define M_PI_4 0.7853981633974483
61#endif
62
63// Non-standard pi/180, used for converting between degrees and radians
64#ifndef M_PI_180
65#  define M_PI_180 0.017453292519943295
66#endif
67
68
69%(DEFINES)s
70
71%(SOURCES)s
72
73/*
74    ##########################################################
75    #                                                        #
76    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
77    #   !!                                              !!   #
78    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
79    #   !!                                              !!   #
80    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
81    #                                                        #
82    ##########################################################
83*/
84
85#ifdef IQ_KERNEL_NAME
86kernel void IQ_KERNEL_NAME(
87    global const double *q,
88    global double *result,
89    const int Nq,
90#ifdef IQ_OPEN_LOOPS
91  #ifdef USE_OPENCL
92    global double *loops_g,
93  #endif
94    local double *loops,
95    const double cutoff,
96    IQ_DISPERSION_LENGTH_DECLARATIONS,
97#endif
98    IQ_FIXED_PARAMETER_DECLARATIONS
99    )
100{
101#ifdef USE_OPENCL
102  #ifdef IQ_OPEN_LOOPS
103  // copy loops info to local memory
104  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
105  wait_group_events(1, &e);
106  #endif
107
108  int i = get_global_id(0);
109  if (i < Nq)
110#else
111  #pragma omp parallel for
112  for (int i=0; i < Nq; i++)
113#endif
114  {
115    const double qi = q[i];
116#ifdef IQ_OPEN_LOOPS
117    double ret=0.0, norm=0.0;
118  #ifdef VOLUME_PARAMETERS
119    double vol=0.0, norm_vol=0.0;
120  #endif
121    IQ_OPEN_LOOPS
122    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
123    //  const double radius = loops[2*(radius_i)];
124    //  const double radius_w = loops[2*(radius_i)+1];
125
126    const double weight = IQ_WEIGHT_PRODUCT;
127    if (weight > cutoff) {
128      const double I = Iq(qi, IQ_PARAMETERS);
129      if (I>=0.0) { // scattering cannot be negative
130        ret += weight*I;
131        norm += weight;
132      #ifdef VOLUME_PARAMETERS
133        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
134        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
135        norm_vol += vol_weight;
136      #endif
137      }
138    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,I); }
139    }
140    IQ_CLOSE_LOOPS
141  #ifdef VOLUME_PARAMETERS
142    if (vol*norm_vol != 0.0) {
143      ret *= norm_vol/vol;
144    }
145  #endif
146    result[i] = scale*ret/norm+background;
147#else
148    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
149#endif
150  }
151}
152#endif
153
154
155#ifdef IQXY_KERNEL_NAME
156kernel void IQXY_KERNEL_NAME(
157    global const double *qx,
158    global const double *qy,
159    global double *result,
160    const int Nq,
161#ifdef IQXY_OPEN_LOOPS
162  #ifdef USE_OPENCL
163    global double *loops_g,
164  #endif
165    local double *loops,
166    const double cutoff,
167    IQXY_DISPERSION_LENGTH_DECLARATIONS,
168#endif
169    IQXY_FIXED_PARAMETER_DECLARATIONS
170    )
171{
172#ifdef USE_OPENCL
173  #ifdef IQXY_OPEN_LOOPS
174  // copy loops info to local memory
175  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
176  wait_group_events(1, &e);
177  #endif
178
179  int i = get_global_id(0);
180  if (i < Nq)
181#else
182  #pragma omp parallel for
183  for (int i=0; i < Nq; i++)
184#endif
185  {
186    const double qxi = qx[i];
187    const double qyi = qy[i];
188#ifdef IQXY_OPEN_LOOPS
189    double ret=0.0, norm=0.0;
190    #ifdef VOLUME_PARAMETERS
191    double vol=0.0, norm_vol=0.0;
192    #endif
193    IQXY_OPEN_LOOPS
194    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
195    //  const double radius = loops[2*(radius_i)];
196    //  const double radius_w = loops[2*(radius_i)+1];
197
198    const double weight = IQXY_WEIGHT_PRODUCT;
199    if (weight > cutoff) {
200
201      const double I = Iqxy(qxi, qyi, IQXY_PARAMETERS);
202      if (I>=0.0) { // scattering cannot be negative
203        // TODO: use correct angle for spherical correction
204        // Definition of theta and phi are probably reversed relative to the
205        // equation which gave rise to this correction, leading to an
206        // attenuation of the pattern as theta moves through pi/2.  Either
207        // reverse the meanings of phi and theta in the forms, or use phi
208        // rather than theta in this correction.  Current code uses cos(theta)
209        // so that values match those of sasview.
210      #ifdef IQXY_HAS_THETA
211        const double spherical_correction
212          = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0);
213        ret += spherical_correction * weight * I;
214      #else
215        ret += weight * I;
216      #endif
217        norm += weight;
218      #ifdef VOLUME_PARAMETERS
219        const double vol_weight = VOLUME_WEIGHT_PRODUCT;
220        vol += vol_weight*form_volume(VOLUME_PARAMETERS);
221      #endif
222        norm_vol += vol_weight;
223      }
224      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,I); }
225    }
226    IQXY_CLOSE_LOOPS
227  #ifdef VOLUME_PARAMETERS
228    if (vol*norm_vol != 0.0) {
229      ret *= norm_vol/vol;
230    }
231  #endif
232    result[i] = scale*ret/norm+background;
233#else
234    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
235#endif
236  }
237}
238#endif
Note: See TracBrowser for help on using the repository browser.