kernel_template.c @ 5efe850

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since 5efe850 was 5efe850, checked in by Paul Kienzle <pkienzle@…>, 8 years ago
Use tinycc if available; support float32 models in tinycc
Property mode set to `100644`
File size: 9.3 KB

Line
1	#line 1 "kernel_template.c"
2	// GENERATED CODE --- DO NOT EDIT ---
3	// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
4
5	#ifdef __OPENCL_VERSION__
6	# define USE_OPENCL
7	#endif
8
9	#define USE_KAHAN_SUMMATION 0
10
11	// If opencl is not available, then we are compiling a C function
12	// Note: if using a C++ compiler, then define kernel as extern "C"
13	#ifndef USE_OPENCL
14	// Use SAS_DOUBLE to force the use of double even for float kernels
15	# define SAS_DOUBLE dou ## ble
16	# ifdef __cplusplus
17	#include <cstdio>
18	#include <cmath>
19	using namespace std;
20	#if defined(_MSC_VER)
21	#include <limits>
22	#include <float.h>
23	#define kernel extern "C" __declspec( dllexport )
24	inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
25	inline double fmin(double x, double y) { return x>y ? y : x; }
26	inline double fmax(double x, double y) { return x<y ? y : x; }
27	#define isnan(x) _isnan(x)
28	#define isinf(x) (!_finite(x))
29	#define isfinite(x) _finite(x)
30	#define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
31	#define INFINITY (std::numeric_limits<double>::infinity())
32	#define NEED_EXPM1
33	#define NEED_TGAMMA
34	#else
35	#define kernel extern "C"
36	#endif
37	inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
38	# else
39	#include <stdio.h>
40	#if defined(__TINYC__)
41	#include <math.h>
42	// TODO: test isnan
43	inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away!
44	#undef isnan
45	#define isnan(x) _isnan(x)
46	// Defeat the double->float conversion since we don't have tgmath
47	inline SAS_DOUBLE trunc(SAS_DOUBLE x) { return x>=0?floor(x):-floor(-x); }
48	inline SAS_DOUBLE fmin(SAS_DOUBLE x, SAS_DOUBLE y) { return x>y ? y : x; }
49	inline SAS_DOUBLE fmax(SAS_DOUBLE x, SAS_DOUBLE y) { return x<y ? y : x; }
50	#define NEED_EXPM1
51	#define NEED_TGAMMA
52	#else
53	#include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
54	#endif
55	// MSVC doesn't support C99, so no need for dllexport on C99 branch
56	#define kernel
57	#define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
58	# endif
59	# define global
60	# define local
61	# define constant const
62	// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
63	// OpenCL pown(a,b) = C99 pow(a,b), b integer
64	# define powr(a,b) pow(a,b)
65	# define pown(a,b) pow(a,b)
66	#else
67	# if defined(USE_SINCOS)
68	# define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
69	# else
70	# define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
71	# endif
72	#endif
73
74	#if defined(NEED_EXPM1)
75	static SAS_DOUBLE expm1(SAS_DOUBLE x_in) {
76	double x = (double)x_in; // go back to float for single precision kernels
77	// Adapted from the cephes math library.
78	// Copyright 1984 - 1992 by Stephen L. Moshier
79	if (x != x \|\| x == 0.0) {
80	return x; // NaN and +/- 0
81	} else if (x < -0.5 \|\| x > 0.5) {
82	return exp(x) - 1.0;
83	} else {
84	const double xsq = x*x;
85	const double p = (((
86	+1.2617719307481059087798E-4)*xsq
87	+3.0299440770744196129956E-2)*xsq
88	+9.9999999999999999991025E-1);
89	const double q = ((((
90	+3.0019850513866445504159E-6)*xsq
91	+2.5244834034968410419224E-3)*xsq
92	+2.2726554820815502876593E-1)*xsq
93	+2.0000000000000000000897E0);
94	double r = x * p;
95	r = r / (q - r);
96	return r+r;
97	}
98	}
99	#endif
100
101	// Standard mathematical constants:
102	// M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
103	// M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
104	// OpenCL defines M_constant_F for float constants, and nothing if double
105	// is not enabled on the card, which is why these constants may be missing
106	#ifndef M_PI
107	# define M_PI 3.141592653589793
108	#endif
109	#ifndef M_PI_2
110	# define M_PI_2 1.570796326794897
111	#endif
112	#ifndef M_PI_4
113	# define M_PI_4 0.7853981633974483
114	#endif
115	#ifndef M_E
116	# define M_E 2.718281828459045091
117	#endif
118
119	// Non-standard function library
120	// pi/180, used for converting between degrees and radians
121	// 4/3 pi for computing sphere volumes
122	// square and cube for computing squares and cubes
123	#ifndef M_PI_180
124	# define M_PI_180 0.017453292519943295
125	#endif
126	#ifndef M_4PI_3
127	# define M_4PI_3 4.18879020478639
128	#endif
129	//inline double square(double x) { return pow(x,2.0); }
130	//inline double square(double x) { return pown(x,2); }
131	inline double square(double x) { return x*x; }
132	inline double cube(double x) { return xxx; }
133	inline double sinc(double x) { return x==0 ? 1.0 : sin(x)/x; }
134
135
136	%(DEFINES)s
137
138	%(SOURCES)s
139
140	/*
141	##########################################################
142	# #
143	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
144	# !! !! #
145	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
146	# !! !! #
147	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
148	# #
149	##########################################################
150	*/
151
152	#ifdef IQ_KERNEL_NAME
153	kernel void IQ_KERNEL_NAME(
154	global const double *q,
155	global double *result,
156	const int Nq,
157	#ifdef IQ_OPEN_LOOPS
158	#ifdef USE_OPENCL
159	global double *loops_g,
160	#endif
161	local double *loops,
162	const double cutoff,
163	IQ_DISPERSION_LENGTH_DECLARATIONS,
164	#endif
165	IQ_FIXED_PARAMETER_DECLARATIONS
166	)
167	{
168	#ifdef USE_OPENCL
169	#ifdef IQ_OPEN_LOOPS
170	// copy loops info to local memory
171	event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
172	wait_group_events(1, &e);
173	#endif
174
175	int i = get_global_id(0);
176	if (i < Nq)
177	#else
178	#pragma omp parallel for
179	for (int i=0; i < Nq; i++)
180	#endif
181	{
182	const double qi = q[i];
183	#ifdef IQ_OPEN_LOOPS
184	double ret=0.0, norm=0.0;
185	IQ_OPEN_LOOPS
186	//for (int radius_i=0; radius_i < Nradius; radius_i++) {
187	// const double radius = loops[2*(radius_i)];
188	// const double radius_w = loops[2*(radius_i)+1];
189
190	const double weight = IQ_WEIGHT_PRODUCT;
191	if (weight > cutoff) {
192	const double scattering = Iq(qi, IQ_PARAMETERS);
193	// allow kernels to exclude invalid regions by returning NaN
194	if (!isnan(scattering)) {
195	ret += weight*scattering;
196	#ifdef VOLUME_PARAMETERS
197	norm += weight * form_volume(VOLUME_PARAMETERS);
198	#else
199	norm += weight;
200	#endif
201	}
202	//else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
203	}
204	IQ_CLOSE_LOOPS
205	// norm can only be zero if volume is zero, so no scattering
206	result[i] = (norm > 0. ? scale*ret/norm + background : background);
207	#else
208	result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
209	#endif
210	}
211	}
212	#endif
213
214
215	#ifdef IQXY_KERNEL_NAME
216	kernel void IQXY_KERNEL_NAME(
217	global const double *qx,
218	global const double *qy,
219	global double *result,
220	const int Nq,
221	#ifdef IQXY_OPEN_LOOPS
222	#ifdef USE_OPENCL
223	global double *loops_g,
224	#endif
225	local double *loops,
226	const double cutoff,
227	IQXY_DISPERSION_LENGTH_DECLARATIONS,
228	#endif
229	IQXY_FIXED_PARAMETER_DECLARATIONS
230	)
231	{
232	#ifdef USE_OPENCL
233	#ifdef IQXY_OPEN_LOOPS
234	// copy loops info to local memory
235	event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
236	wait_group_events(1, &e);
237	#endif
238
239	int i = get_global_id(0);
240	if (i < Nq)
241	#else
242	#pragma omp parallel for
243	for (int i=0; i < Nq; i++)
244	#endif
245	{
246	const double qxi = qx[i];
247	const double qyi = qy[i];
248	#if USE_KAHAN_SUMMATION
249	double accumulated_error = 0.0;
250	#endif
251	#ifdef IQXY_OPEN_LOOPS
252	double ret=0.0, norm=0.0;
253	IQXY_OPEN_LOOPS
254	//for (int radius_i=0; radius_i < Nradius; radius_i++) {
255	// const double radius = loops[2*(radius_i)];
256	// const double radius_w = loops[2*(radius_i)+1];
257	double weight = IQXY_WEIGHT_PRODUCT;
258	if (weight > cutoff) {
259
260	const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
261	if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
262	#if defined(IQXY_HAS_THETA)
263	// Force a nominal value for the spherical correction even when
264	// theta is +90/-90 so that there are no divide by zero problems.
265	// For cos(theta) fixed at 90, we effectively multiply top and bottom
266	// by 1e-6, so the effect cancels.
267	const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6);
268	weight *= spherical_correction;
269	#endif
270	const double next = weight * scattering;
271	#if USE_KAHAN_SUMMATION
272	const double y = next - accumulated_error;
273	const double t = ret + y;
274	accumulated_error = (t - ret) - y;
275	ret = t;
276	#else
277	ret += next;
278	#endif
279	#ifdef VOLUME_PARAMETERS
280	norm += weight*form_volume(VOLUME_PARAMETERS);
281	#else
282	norm += weight;
283	#endif
284	}
285	//else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
286	}
287	IQXY_CLOSE_LOOPS
288	// norm can only be zero if volume is zero, so no scattering
289	result[i] = (norm>0. ? scale*ret/norm + background : background);
290	#else
291	result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
292	#endif
293	}
294	}
295	#endif

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_template.c @ 5efe850

Download in other formats: