#line 1 "kernel_template.c" // GENERATED CODE --- DO NOT EDIT --- // Code is produced by sasmodels.gen from sasmodels/models/MODEL.c #ifdef __OPENCL_VERSION__ # define USE_OPENCL #endif #define USE_KAHAN_SUMMATION 0 // If opencl is not available, then we are compiling a C function // Note: if using a C++ compiler, then define kernel as extern "C" #ifndef USE_OPENCL // Use SAS_DOUBLE to force the use of double even for float kernels # define SAS_DOUBLE dou ## ble # ifdef __cplusplus #include #include using namespace std; #if defined(_MSC_VER) #include #include #define kernel extern "C" __declspec( dllexport ) inline double trunc(double x) { return x>=0?floor(x):-floor(-x); } inline double fmin(double x, double y) { return x>y ? y : x; } inline double fmax(double x, double y) { return x::quiet_NaN()) // non-signalling NaN #define INFINITY (std::numeric_limits::infinity()) #define NEED_EXPM1 #define NEED_TGAMMA #define NEED_ERF #else #define kernel extern "C" #endif inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); } # else #include #if defined(__TINYC__) #include // TODO: test isnan inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away! #undef isnan #define isnan(x) _isnan(x) // Defeat the double->float conversion since we don't have tgmath inline SAS_DOUBLE trunc(SAS_DOUBLE x) { return x>=0?floor(x):-floor(-x); } inline SAS_DOUBLE fmin(SAS_DOUBLE x, SAS_DOUBLE y) { return x>y ? y : x; } inline SAS_DOUBLE fmax(SAS_DOUBLE x, SAS_DOUBLE y) { return x // C99 type-generic math, so sin(float) => sinf #endif // MSVC doesn't support C99, so no need for dllexport on C99 branch #define kernel #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) # endif # define global # define local # define constant const // OpenCL powr(a,b) = C99 pow(a,b), b >= 0 // OpenCL pown(a,b) = C99 pow(a,b), b integer # define powr(a,b) pow(a,b) # define pown(a,b) pow(a,b) #else # if defined(USE_SINCOS) # define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar) # else # define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) # endif #endif #if defined(NEED_EXPM1) static SAS_DOUBLE expm1(SAS_DOUBLE x_in) { double x = (double)x_in; // go back to float for single precision kernels // Adapted from the cephes math library. // Copyright 1984 - 1992 by Stephen L. Moshier if (x != x || x == 0.0) { return x; // NaN and +/- 0 } else if (x < -0.5 || x > 0.5) { return exp(x) - 1.0; } else { const double xsq = x*x; const double p = ((( +1.2617719307481059087798E-4)*xsq +3.0299440770744196129956E-2)*xsq +9.9999999999999999991025E-1); const double q = (((( +3.0019850513866445504159E-6)*xsq +2.5244834034968410419224E-3)*xsq +2.2726554820815502876593E-1)*xsq +2.0000000000000000000897E0); double r = x * p; r = r / (q - r); return r+r; } } #endif // Standard mathematical constants: // M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4, // M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2) // OpenCL defines M_constant_F for float constants, and nothing if double // is not enabled on the card, which is why these constants may be missing #ifndef M_PI # define M_PI 3.141592653589793 #endif #ifndef M_PI_2 # define M_PI_2 1.570796326794897 #endif #ifndef M_PI_4 # define M_PI_4 0.7853981633974483 #endif #ifndef M_E # define M_E 2.718281828459045091 #endif // Non-standard function library // pi/180, used for converting between degrees and radians // 4/3 pi for computing sphere volumes // square and cube for computing squares and cubes #ifndef M_PI_180 # define M_PI_180 0.017453292519943295 #endif #ifndef M_4PI_3 # define M_4PI_3 4.18879020478639 #endif //inline double square(double x) { return pow(x,2.0); } //inline double square(double x) { return pown(x,2); } inline double square(double x) { return x*x; } inline double cube(double x) { return x*x*x; } inline double sinc(double x) { return x==0 ? 1.0 : sin(x)/x; } %(DEFINES)s %(SOURCES)s /* ########################################################## # # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # # !! !! # # !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! # # !! !! # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # # # ########################################################## */ #ifdef IQ_KERNEL_NAME kernel void IQ_KERNEL_NAME( global const double *q, global double *result, const int Nq, #ifdef IQ_OPEN_LOOPS #ifdef USE_OPENCL global double *loops_g, #endif local double *loops, const double cutoff, IQ_DISPERSION_LENGTH_DECLARATIONS, #endif IQ_FIXED_PARAMETER_DECLARATIONS ) { #ifdef USE_OPENCL #ifdef IQ_OPEN_LOOPS // copy loops info to local memory event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0); wait_group_events(1, &e); #endif int i = get_global_id(0); if (i < Nq) #else #pragma omp parallel for for (int i=0; i < Nq; i++) #endif { const double qi = q[i]; #ifdef IQ_OPEN_LOOPS double ret=0.0, norm=0.0; IQ_OPEN_LOOPS //for (int radius_i=0; radius_i < Nradius; radius_i++) { // const double radius = loops[2*(radius_i)]; // const double radius_w = loops[2*(radius_i)+1]; const double weight = IQ_WEIGHT_PRODUCT; if (weight > cutoff) { const double scattering = Iq(qi, IQ_PARAMETERS); // allow kernels to exclude invalid regions by returning NaN if (!isnan(scattering)) { ret += weight*scattering; #ifdef VOLUME_PARAMETERS norm += weight * form_volume(VOLUME_PARAMETERS); #else norm += weight; #endif } //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); } } IQ_CLOSE_LOOPS // norm can only be zero if volume is zero, so no scattering result[i] = (norm > 0. ? scale*ret/norm + background : background); #else result[i] = scale*Iq(qi, IQ_PARAMETERS) + background; #endif } } #endif #ifdef IQXY_KERNEL_NAME kernel void IQXY_KERNEL_NAME( global const double *qx, global const double *qy, global double *result, const int Nq, #ifdef IQXY_OPEN_LOOPS #ifdef USE_OPENCL global double *loops_g, #endif local double *loops, const double cutoff, IQXY_DISPERSION_LENGTH_DECLARATIONS, #endif IQXY_FIXED_PARAMETER_DECLARATIONS ) { #ifdef USE_OPENCL #ifdef IQXY_OPEN_LOOPS // copy loops info to local memory event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0); wait_group_events(1, &e); #endif int i = get_global_id(0); if (i < Nq) #else #pragma omp parallel for for (int i=0; i < Nq; i++) #endif { const double qxi = qx[i]; const double qyi = qy[i]; #if USE_KAHAN_SUMMATION double accumulated_error = 0.0; #endif #ifdef IQXY_OPEN_LOOPS double ret=0.0, norm=0.0; IQXY_OPEN_LOOPS //for (int radius_i=0; radius_i < Nradius; radius_i++) { // const double radius = loops[2*(radius_i)]; // const double radius_w = loops[2*(radius_i)+1]; double weight = IQXY_WEIGHT_PRODUCT; if (weight > cutoff) { const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS); if (!isnan(scattering)) { // if scattering is bad, exclude it from sum #if defined(IQXY_HAS_THETA) // Force a nominal value for the spherical correction even when // theta is +90/-90 so that there are no divide by zero problems. // For cos(theta) fixed at 90, we effectively multiply top and bottom // by 1e-6, so the effect cancels. const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6); weight *= spherical_correction; #endif const double next = weight * scattering; #if USE_KAHAN_SUMMATION const double y = next - accumulated_error; const double t = ret + y; accumulated_error = (t - ret) - y; ret = t; #else ret += next; #endif #ifdef VOLUME_PARAMETERS norm += weight*form_volume(VOLUME_PARAMETERS); #else norm += weight; #endif } //else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); } } IQXY_CLOSE_LOOPS // norm can only be zero if volume is zero, so no scattering result[i] = (norm>0. ? scale*ret/norm + background : background); #else result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background; #endif } } #endif