kernel_template.c @ ce176ca

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since ce176ca was 5efe850, checked in by Paul Kienzle <pkienzle@…>, 8 years ago
Use tinycc if available; support float32 models in tinycc
Property mode set to `100644`
File size: 9.3 KB

Rev	Line
[3832f27]	1	#line 1 "kernel_template.c"
[f734e7d]	2	// GENERATED CODE --- DO NOT EDIT ---
	3	// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
	4
	5	#ifdef __OPENCL_VERSION__
	6	# define USE_OPENCL
	7	#endif
	8
[e3a9733]	9	#define USE_KAHAN_SUMMATION 0
	10
[f734e7d]	11	// If opencl is not available, then we are compiling a C function
	12	// Note: if using a C++ compiler, then define kernel as extern "C"
	13	#ifndef USE_OPENCL
[5efe850]	14	// Use SAS_DOUBLE to force the use of double even for float kernels
	15	# define SAS_DOUBLE dou ## ble
[f734e7d]	16	# ifdef __cplusplus
[960cd80]	17	#include <cstdio>
	18	#include <cmath>
	19	using namespace std;
	20	#if defined(_MSC_VER)
[29f27df]	21	#include <limits>
[caf768d]	22	#include <float.h>
	23	#define kernel extern "C" __declspec( dllexport )
[750ffa5]	24	inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
[960cd80]	25	inline double fmin(double x, double y) { return x>y ? y : x; }
	26	inline double fmax(double x, double y) { return x<y ? y : x; }
[2a55a6f]	27	#define isnan(x) _isnan(x)
	28	#define isinf(x) (!_finite(x))
	29	#define isfinite(x) _finite(x)
[960cd80]	30	#define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
[2a55a6f]	31	#define INFINITY (std::numeric_limits<double>::infinity())
[98cb4d7]	32	#define NEED_EXPM1
	33	#define NEED_TGAMMA
[f734e7d]	34	#else
[caf768d]	35	#define kernel extern "C"
[f734e7d]	36	#endif
[750ffa5]	37	inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
[f734e7d]	38	# else
[95e861b]	39	#include <stdio.h>
[2a55a6f]	40	#if defined(__TINYC__)
	41	#include <math.h>
	42	// TODO: test isnan
	43	inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away!
	44	#undef isnan
	45	#define isnan(x) _isnan(x)
[5efe850]	46	// Defeat the double->float conversion since we don't have tgmath
	47	inline SAS_DOUBLE trunc(SAS_DOUBLE x) { return x>=0?floor(x):-floor(-x); }
	48	inline SAS_DOUBLE fmin(SAS_DOUBLE x, SAS_DOUBLE y) { return x>y ? y : x; }
	49	inline SAS_DOUBLE fmax(SAS_DOUBLE x, SAS_DOUBLE y) { return x<y ? y : x; }
[2a55a6f]	50	#define NEED_EXPM1
	51	#define NEED_TGAMMA
	52	#else
	53	#include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
	54	#endif
[750ffa5]	55	// MSVC doesn't support C99, so no need for dllexport on C99 branch
[f734e7d]	56	#define kernel
[750ffa5]	57	#define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
[f734e7d]	58	# endif
	59	# define global
	60	# define local
	61	# define constant const
[750ffa5]	62	// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
	63	// OpenCL pown(a,b) = C99 pow(a,b), b integer
[f734e7d]	64	# define powr(a,b) pow(a,b)
	65	# define pown(a,b) pow(a,b)
	66	#else
[deac08c]	67	# if defined(USE_SINCOS)
[f734e7d]	68	# define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
	69	# else
[750ffa5]	70	# define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
[f734e7d]	71	# endif
	72	#endif
	73
[98cb4d7]	74	#if defined(NEED_EXPM1)
[5efe850]	75	static SAS_DOUBLE expm1(SAS_DOUBLE x_in) {
	76	double x = (double)x_in; // go back to float for single precision kernels
[98cb4d7]	77	// Adapted from the cephes math library.
	78	// Copyright 1984 - 1992 by Stephen L. Moshier
	79	if (x != x \|\| x == 0.0) {
	80	return x; // NaN and +/- 0
	81	} else if (x < -0.5 \|\| x > 0.5) {
	82	return exp(x) - 1.0;
	83	} else {
	84	const double xsq = x*x;
	85	const double p = (((
	86	+1.2617719307481059087798E-4)*xsq
	87	+3.0299440770744196129956E-2)*xsq
	88	+9.9999999999999999991025E-1);
	89	const double q = ((((
	90	+3.0019850513866445504159E-6)*xsq
	91	+2.5244834034968410419224E-3)*xsq
	92	+2.2726554820815502876593E-1)*xsq
	93	+2.0000000000000000000897E0);
	94	double r = x * p;
	95	r = r / (q - r);
	96	return r+r;
	97	}
	98	}
	99	#endif
	100
[f734e7d]	101	// Standard mathematical constants:
	102	// M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
	103	// M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
	104	// OpenCL defines M_constant_F for float constants, and nothing if double
	105	// is not enabled on the card, which is why these constants may be missing
	106	#ifndef M_PI
	107	# define M_PI 3.141592653589793
	108	#endif
	109	#ifndef M_PI_2
	110	# define M_PI_2 1.570796326794897
	111	#endif
	112	#ifndef M_PI_4
	113	# define M_PI_4 0.7853981633974483
	114	#endif
[e7678b2]	115	#ifndef M_E
	116	# define M_E 2.718281828459045091
	117	#endif
[f734e7d]	118
[deac08c]	119	// Non-standard function library
	120	// pi/180, used for converting between degrees and radians
	121	// 4/3 pi for computing sphere volumes
	122	// square and cube for computing squares and cubes
[f734e7d]	123	#ifndef M_PI_180
	124	# define M_PI_180 0.017453292519943295
	125	#endif
[deac08c]	126	#ifndef M_4PI_3
	127	# define M_4PI_3 4.18879020478639
	128	#endif
[73860b6]	129	//inline double square(double x) { return pow(x,2.0); }
[deac08c]	130	//inline double square(double x) { return pown(x,2); }
[cf85329]	131	inline double square(double x) { return x*x; }
[deac08c]	132	inline double cube(double x) { return xxx; }
[cf85329]	133	inline double sinc(double x) { return x==0 ? 1.0 : sin(x)/x; }
[f734e7d]	134
	135
	136	%(DEFINES)s
	137
	138	%(SOURCES)s
	139
	140	/*
	141	##########################################################
	142	# #
	143	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	144	# !! !! #
	145	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
	146	# !! !! #
	147	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	148	# #
	149	##########################################################
	150	*/
	151
	152	#ifdef IQ_KERNEL_NAME
	153	kernel void IQ_KERNEL_NAME(
	154	global const double *q,
	155	global double *result,
	156	const int Nq,
	157	#ifdef IQ_OPEN_LOOPS
	158	#ifdef USE_OPENCL
	159	global double *loops_g,
	160	#endif
	161	local double *loops,
	162	const double cutoff,
	163	IQ_DISPERSION_LENGTH_DECLARATIONS,
	164	#endif
	165	IQ_FIXED_PARAMETER_DECLARATIONS
	166	)
	167	{
	168	#ifdef USE_OPENCL
	169	#ifdef IQ_OPEN_LOOPS
	170	// copy loops info to local memory
	171	event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
	172	wait_group_events(1, &e);
	173	#endif
	174
	175	int i = get_global_id(0);
	176	if (i < Nq)
	177	#else
	178	#pragma omp parallel for
	179	for (int i=0; i < Nq; i++)
	180	#endif
	181	{
	182	const double qi = q[i];
	183	#ifdef IQ_OPEN_LOOPS
	184	double ret=0.0, norm=0.0;
	185	IQ_OPEN_LOOPS
	186	//for (int radius_i=0; radius_i < Nradius; radius_i++) {
	187	// const double radius = loops[2*(radius_i)];
	188	// const double radius_w = loops[2*(radius_i)+1];
	189
	190	const double weight = IQ_WEIGHT_PRODUCT;
	191	if (weight > cutoff) {
[750ffa5]	192	const double scattering = Iq(qi, IQ_PARAMETERS);
[c138211]	193	// allow kernels to exclude invalid regions by returning NaN
	194	if (!isnan(scattering)) {
[750ffa5]	195	ret += weight*scattering;
[f734e7d]	196	#ifdef VOLUME_PARAMETERS
[c4e7a5f]	197	norm += weight * form_volume(VOLUME_PARAMETERS);
	198	#else
	199	norm += weight;
[f734e7d]	200	#endif
[c138211]	201	}
[750ffa5]	202	//else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
[f734e7d]	203	}
	204	IQ_CLOSE_LOOPS
[c4e7a5f]	205	// norm can only be zero if volume is zero, so no scattering
	206	result[i] = (norm > 0. ? scale*ret/norm + background : background);
[f734e7d]	207	#else
	208	result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
	209	#endif
	210	}
	211	}
	212	#endif
	213
	214
	215	#ifdef IQXY_KERNEL_NAME
	216	kernel void IQXY_KERNEL_NAME(
	217	global const double *qx,
	218	global const double *qy,
	219	global double *result,
	220	const int Nq,
	221	#ifdef IQXY_OPEN_LOOPS
	222	#ifdef USE_OPENCL
	223	global double *loops_g,
	224	#endif
	225	local double *loops,
	226	const double cutoff,
	227	IQXY_DISPERSION_LENGTH_DECLARATIONS,
	228	#endif
	229	IQXY_FIXED_PARAMETER_DECLARATIONS
	230	)
	231	{
	232	#ifdef USE_OPENCL
	233	#ifdef IQXY_OPEN_LOOPS
	234	// copy loops info to local memory
	235	event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
	236	wait_group_events(1, &e);
	237	#endif
	238
	239	int i = get_global_id(0);
	240	if (i < Nq)
	241	#else
	242	#pragma omp parallel for
	243	for (int i=0; i < Nq; i++)
	244	#endif
	245	{
	246	const double qxi = qx[i];
	247	const double qyi = qy[i];
[e3a9733]	248	#if USE_KAHAN_SUMMATION
	249	double accumulated_error = 0.0;
	250	#endif
[f734e7d]	251	#ifdef IQXY_OPEN_LOOPS
	252	double ret=0.0, norm=0.0;
	253	IQXY_OPEN_LOOPS
	254	//for (int radius_i=0; radius_i < Nradius; radius_i++) {
	255	// const double radius = loops[2*(radius_i)];
	256	// const double radius_w = loops[2*(radius_i)+1];
[c4e7a5f]	257	double weight = IQXY_WEIGHT_PRODUCT;
[f734e7d]	258	if (weight > cutoff) {
	259
[750ffa5]	260	const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
[9c79c32]	261	if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
[c4e7a5f]	262	#if defined(IQXY_HAS_THETA)
	263	// Force a nominal value for the spherical correction even when
	264	// theta is +90/-90 so that there are no divide by zero problems.
	265	// For cos(theta) fixed at 90, we effectively multiply top and bottom
	266	// by 1e-6, so the effect cancels.
[0278e3f]	267	const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6);
[c4e7a5f]	268	weight *= spherical_correction;
[e3a9733]	269	#endif
[c4e7a5f]	270	const double next = weight * scattering;
[e3a9733]	271	#if USE_KAHAN_SUMMATION
	272	const double y = next - accumulated_error;
	273	const double t = ret + y;
	274	accumulated_error = (t - ret) - y;
	275	ret = t;
[f734e7d]	276	#else
[e3a9733]	277	ret += next;
[f734e7d]	278	#endif
	279	#ifdef VOLUME_PARAMETERS
[c4e7a5f]	280	norm += weight*form_volume(VOLUME_PARAMETERS);
	281	#else
	282	norm += weight;
[718514b]	283	#endif
[9c79c32]	284	}
[750ffa5]	285	//else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
[f734e7d]	286	}
	287	IQXY_CLOSE_LOOPS
[c4e7a5f]	288	// norm can only be zero if volume is zero, so no scattering
	289	result[i] = (norm>0. ? scale*ret/norm + background : background);
[f734e7d]	290	#else
	291	result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
	292	#endif
	293	}
	294	}
	295	#endif

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_template.c @ ce176ca

Download in other formats: