source: sasmodels/sasmodels/kernel_template.c @ ce176ca

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since ce176ca was 5efe850, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

Use tinycc if available; support float32 models in tinycc

  • Property mode set to 100644
File size: 9.3 KB
RevLine 
[3832f27]1#line 1 "kernel_template.c"
[f734e7d]2// GENERATED CODE --- DO NOT EDIT ---
3// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
4
5#ifdef __OPENCL_VERSION__
6# define USE_OPENCL
7#endif
8
[e3a9733]9#define USE_KAHAN_SUMMATION 0
10
[f734e7d]11// If opencl is not available, then we are compiling a C function
12// Note: if using a C++ compiler, then define kernel as extern "C"
13#ifndef USE_OPENCL
[5efe850]14// Use SAS_DOUBLE to force the use of double even for float kernels
15#  define SAS_DOUBLE dou ## ble
[f734e7d]16#  ifdef __cplusplus
[960cd80]17      #include <cstdio>
18      #include <cmath>
19      using namespace std;
20      #if defined(_MSC_VER)
[29f27df]21         #include <limits>
[caf768d]22         #include <float.h>
23         #define kernel extern "C" __declspec( dllexport )
[750ffa5]24         inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
[960cd80]25         inline double fmin(double x, double y) { return x>y ? y : x; }
26         inline double fmax(double x, double y) { return x<y ? y : x; }
[2a55a6f]27         #define isnan(x) _isnan(x)
28         #define isinf(x) (!_finite(x))
29         #define isfinite(x) _finite(x)
[960cd80]30         #define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
[2a55a6f]31         #define INFINITY (std::numeric_limits<double>::infinity())
[98cb4d7]32         #define NEED_EXPM1
33         #define NEED_TGAMMA
[f734e7d]34     #else
[caf768d]35         #define kernel extern "C"
[f734e7d]36     #endif
[750ffa5]37     inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
[f734e7d]38#  else
[95e861b]39     #include <stdio.h>
[2a55a6f]40     #if defined(__TINYC__)
41         #include <math.h>
42         // TODO: test isnan
43         inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away!
44         #undef isnan
45         #define isnan(x) _isnan(x)
[5efe850]46         // Defeat the double->float conversion since we don't have tgmath
47         inline SAS_DOUBLE trunc(SAS_DOUBLE x) { return x>=0?floor(x):-floor(-x); }
48         inline SAS_DOUBLE fmin(SAS_DOUBLE x, SAS_DOUBLE y) { return x>y ? y : x; }
49         inline SAS_DOUBLE fmax(SAS_DOUBLE x, SAS_DOUBLE y) { return x<y ? y : x; }
[2a55a6f]50         #define NEED_EXPM1
51         #define NEED_TGAMMA
52     #else
53         #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
54     #endif
[750ffa5]55     // MSVC doesn't support C99, so no need for dllexport on C99 branch
[f734e7d]56     #define kernel
[750ffa5]57     #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
[f734e7d]58#  endif
59#  define global
60#  define local
61#  define constant const
[750ffa5]62// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
63// OpenCL pown(a,b) = C99 pow(a,b), b integer
[f734e7d]64#  define powr(a,b) pow(a,b)
65#  define pown(a,b) pow(a,b)
66#else
[deac08c]67#  if defined(USE_SINCOS)
[f734e7d]68#    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
69#  else
[750ffa5]70#    define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
[f734e7d]71#  endif
72#endif
73
[98cb4d7]74#if defined(NEED_EXPM1)
[5efe850]75   static SAS_DOUBLE expm1(SAS_DOUBLE x_in) {
76      double x = (double)x_in;  // go back to float for single precision kernels
[98cb4d7]77      // Adapted from the cephes math library.
78      // Copyright 1984 - 1992 by Stephen L. Moshier
79      if (x != x || x == 0.0) {
80         return x; // NaN and +/- 0
81      } else if (x < -0.5 || x > 0.5) {
82         return exp(x) - 1.0;
83      } else {
84         const double xsq = x*x;
85         const double p = (((
86            +1.2617719307481059087798E-4)*xsq
87            +3.0299440770744196129956E-2)*xsq
88            +9.9999999999999999991025E-1);
89         const double q = ((((
90            +3.0019850513866445504159E-6)*xsq
91            +2.5244834034968410419224E-3)*xsq
92            +2.2726554820815502876593E-1)*xsq
93            +2.0000000000000000000897E0);
94         double r = x * p;
95         r =  r / (q - r);
96         return r+r;
97       }
98   }
99#endif
100
[f734e7d]101// Standard mathematical constants:
102//   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
103//   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
104// OpenCL defines M_constant_F for float constants, and nothing if double
105// is not enabled on the card, which is why these constants may be missing
106#ifndef M_PI
107#  define M_PI 3.141592653589793
108#endif
109#ifndef M_PI_2
110#  define M_PI_2 1.570796326794897
111#endif
112#ifndef M_PI_4
113#  define M_PI_4 0.7853981633974483
114#endif
[e7678b2]115#ifndef M_E
116#  define M_E 2.718281828459045091
117#endif
[f734e7d]118
[deac08c]119// Non-standard function library
120// pi/180, used for converting between degrees and radians
121// 4/3 pi for computing sphere volumes
122// square and cube for computing squares and cubes
[f734e7d]123#ifndef M_PI_180
124#  define M_PI_180 0.017453292519943295
125#endif
[deac08c]126#ifndef M_4PI_3
127#  define M_4PI_3 4.18879020478639
128#endif
[73860b6]129//inline double square(double x) { return pow(x,2.0); }
[deac08c]130//inline double square(double x) { return pown(x,2); }
[cf85329]131inline double square(double x) { return x*x; }
[deac08c]132inline double cube(double x) { return x*x*x; }
[cf85329]133inline double sinc(double x) { return x==0 ? 1.0 : sin(x)/x; }
[f734e7d]134
135
136%(DEFINES)s
137
138%(SOURCES)s
139
140/*
141    ##########################################################
142    #                                                        #
143    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
144    #   !!                                              !!   #
145    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
146    #   !!                                              !!   #
147    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
148    #                                                        #
149    ##########################################################
150*/
151
152#ifdef IQ_KERNEL_NAME
153kernel void IQ_KERNEL_NAME(
154    global const double *q,
155    global double *result,
156    const int Nq,
157#ifdef IQ_OPEN_LOOPS
158  #ifdef USE_OPENCL
159    global double *loops_g,
160  #endif
161    local double *loops,
162    const double cutoff,
163    IQ_DISPERSION_LENGTH_DECLARATIONS,
164#endif
165    IQ_FIXED_PARAMETER_DECLARATIONS
166    )
167{
168#ifdef USE_OPENCL
169  #ifdef IQ_OPEN_LOOPS
170  // copy loops info to local memory
171  event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
172  wait_group_events(1, &e);
173  #endif
174
175  int i = get_global_id(0);
176  if (i < Nq)
177#else
178  #pragma omp parallel for
179  for (int i=0; i < Nq; i++)
180#endif
181  {
182    const double qi = q[i];
183#ifdef IQ_OPEN_LOOPS
184    double ret=0.0, norm=0.0;
185    IQ_OPEN_LOOPS
186    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
187    //  const double radius = loops[2*(radius_i)];
188    //  const double radius_w = loops[2*(radius_i)+1];
189
190    const double weight = IQ_WEIGHT_PRODUCT;
191    if (weight > cutoff) {
[750ffa5]192      const double scattering = Iq(qi, IQ_PARAMETERS);
[c138211]193      // allow kernels to exclude invalid regions by returning NaN
194      if (!isnan(scattering)) {
[750ffa5]195        ret += weight*scattering;
[f734e7d]196      #ifdef VOLUME_PARAMETERS
[c4e7a5f]197        norm += weight * form_volume(VOLUME_PARAMETERS);
198      #else
199        norm += weight;
[f734e7d]200      #endif
[c138211]201      }
[750ffa5]202    //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
[f734e7d]203    }
204    IQ_CLOSE_LOOPS
[c4e7a5f]205    // norm can only be zero if volume is zero, so no scattering
206    result[i] = (norm > 0. ? scale*ret/norm + background : background);
[f734e7d]207#else
208    result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
209#endif
210  }
211}
212#endif
213
214
215#ifdef IQXY_KERNEL_NAME
216kernel void IQXY_KERNEL_NAME(
217    global const double *qx,
218    global const double *qy,
219    global double *result,
220    const int Nq,
221#ifdef IQXY_OPEN_LOOPS
222  #ifdef USE_OPENCL
223    global double *loops_g,
224  #endif
225    local double *loops,
226    const double cutoff,
227    IQXY_DISPERSION_LENGTH_DECLARATIONS,
228#endif
229    IQXY_FIXED_PARAMETER_DECLARATIONS
230    )
231{
232#ifdef USE_OPENCL
233  #ifdef IQXY_OPEN_LOOPS
234  // copy loops info to local memory
235  event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
236  wait_group_events(1, &e);
237  #endif
238
239  int i = get_global_id(0);
240  if (i < Nq)
241#else
242  #pragma omp parallel for
243  for (int i=0; i < Nq; i++)
244#endif
245  {
246    const double qxi = qx[i];
247    const double qyi = qy[i];
[e3a9733]248    #if USE_KAHAN_SUMMATION
249    double accumulated_error = 0.0;
250    #endif
[f734e7d]251#ifdef IQXY_OPEN_LOOPS
252    double ret=0.0, norm=0.0;
253    IQXY_OPEN_LOOPS
254    //for (int radius_i=0; radius_i < Nradius; radius_i++) {
255    //  const double radius = loops[2*(radius_i)];
256    //  const double radius_w = loops[2*(radius_i)+1];
[c4e7a5f]257    double weight = IQXY_WEIGHT_PRODUCT;
[f734e7d]258    if (weight > cutoff) {
259
[750ffa5]260      const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
[9c79c32]261      if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
[c4e7a5f]262      #if defined(IQXY_HAS_THETA)
263        // Force a nominal value for the spherical correction even when
264        // theta is +90/-90 so that there are no divide by zero problems.
265        // For cos(theta) fixed at 90, we effectively multiply top and bottom
266        // by 1e-6, so the effect cancels.
[0278e3f]267        const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6);
[c4e7a5f]268        weight *= spherical_correction;
[e3a9733]269      #endif
[c4e7a5f]270      const double next = weight * scattering;
[e3a9733]271      #if USE_KAHAN_SUMMATION
272        const double y = next - accumulated_error;
273        const double t = ret + y;
274        accumulated_error = (t - ret) - y;
275        ret = t;
[f734e7d]276      #else
[e3a9733]277        ret += next;
[f734e7d]278      #endif
279      #ifdef VOLUME_PARAMETERS
[c4e7a5f]280        norm += weight*form_volume(VOLUME_PARAMETERS);
281      #else
282        norm += weight;
[718514b]283      #endif
[9c79c32]284      }
[750ffa5]285      //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
[f734e7d]286    }
287    IQXY_CLOSE_LOOPS
[c4e7a5f]288    // norm can only be zero if volume is zero, so no scattering
289    result[i] = (norm>0. ? scale*ret/norm + background : background);
[f734e7d]290#else
291    result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
292#endif
293  }
294}
295#endif
Note: See TracBrowser for help on using the repository browser.