kernel_template.c

Last change on this file was 1e7b0db0, checked in by wojciech, 7 years ago
sinc tranfered to sas_sinx_x
Property mode set to `100644`
File size: 9.4 KB

Line
1	#line 1 "kernel_template.c"
2	// GENERATED CODE --- DO NOT EDIT ---
3	// Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
4
5	#ifdef __OPENCL_VERSION__
6	# define USE_OPENCL
7	#endif
8
9	#define USE_KAHAN_SUMMATION 0
10
11	// If opencl is not available, then we are compiling a C function
12	// Note: if using a C++ compiler, then define kernel as extern "C"
13	#ifndef USE_OPENCL
14	// Use SAS_DOUBLE to force the use of double even for float kernels
15	# define SAS_DOUBLE dou ## ble
16	# ifdef __cplusplus
17	#include <cstdio>
18	#include <cmath>
19	using namespace std;
20	#if defined(_MSC_VER)
21	#include <limits>
22	#include <float.h>
23	#define kernel extern "C" __declspec( dllexport )
24	inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
25	inline double fmin(double x, double y) { return x>y ? y : x; }
26	inline double fmax(double x, double y) { return x<y ? y : x; }
27	#define isnan(x) _isnan(x)
28	#define isinf(x) (!_finite(x))
29	#define isfinite(x) _finite(x)
30	#define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
31	#define INFINITY (std::numeric_limits<double>::infinity())
32	#define NEED_EXPM1
33	#define NEED_TGAMMA
34	#define NEED_ERF
35	#else
36	#define kernel extern "C"
37	#endif
38	inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
39	# else
40	#include <stdio.h>
41	#if defined(__TINYC__)
42	#include <math.h>
43	// TODO: test isnan
44	inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away!
45	#undef isnan
46	#define isnan(x) _isnan(x)
47	// Defeat the double->float conversion since we don't have tgmath
48	inline SAS_DOUBLE trunc(SAS_DOUBLE x) { return x>=0?floor(x):-floor(-x); }
49	inline SAS_DOUBLE fmin(SAS_DOUBLE x, SAS_DOUBLE y) { return x>y ? y : x; }
50	inline SAS_DOUBLE fmax(SAS_DOUBLE x, SAS_DOUBLE y) { return x<y ? y : x; }
51	#define NEED_EXPM1
52	#define NEED_TGAMMA
53	#define NEED_ERF
54	#else
55	#include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
56	#endif
57	// MSVC doesn't support C99, so no need for dllexport on C99 branch
58	#define kernel
59	#define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
60	# endif
61	# define global
62	# define local
63	# define constant const
64	// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
65	// OpenCL pown(a,b) = C99 pow(a,b), b integer
66	# define powr(a,b) pow(a,b)
67	# define pown(a,b) pow(a,b)
68	#else
69	# if defined(USE_SINCOS)
70	# define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
71	# else
72	# define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
73	# endif
74	#endif
75
76	#if defined(NEED_EXPM1)
77	static SAS_DOUBLE expm1(SAS_DOUBLE x_in) {
78	double x = (double)x_in; // go back to float for single precision kernels
79	// Adapted from the cephes math library.
80	// Copyright 1984 - 1992 by Stephen L. Moshier
81	if (x != x \|\| x == 0.0) {
82	return x; // NaN and +/- 0
83	} else if (x < -0.5 \|\| x > 0.5) {
84	return exp(x) - 1.0;
85	} else {
86	const double xsq = x*x;
87	const double p = (((
88	+1.2617719307481059087798E-4)*xsq
89	+3.0299440770744196129956E-2)*xsq
90	+9.9999999999999999991025E-1);
91	const double q = ((((
92	+3.0019850513866445504159E-6)*xsq
93	+2.5244834034968410419224E-3)*xsq
94	+2.2726554820815502876593E-1)*xsq
95	+2.0000000000000000000897E0);
96	double r = x * p;
97	r = r / (q - r);
98	return r+r;
99	}
100	}
101	#endif
102
103	// Standard mathematical constants:
104	// M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
105	// M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
106	// OpenCL defines M_constant_F for float constants, and nothing if double
107	// is not enabled on the card, which is why these constants may be missing
108	#ifndef M_PI
109	# define M_PI 3.141592653589793
110	#endif
111	#ifndef M_PI_2
112	# define M_PI_2 1.570796326794897
113	#endif
114	#ifndef M_PI_4
115	# define M_PI_4 0.7853981633974483
116	#endif
117	#ifndef M_E
118	# define M_E 2.718281828459045091
119	#endif
120
121	// Non-standard function library
122	// pi/180, used for converting between degrees and radians
123	// 4/3 pi for computing sphere volumes
124	// square and cube for computing squares and cubes
125	#ifndef M_PI_180
126	# define M_PI_180 0.017453292519943295
127	#endif
128	#ifndef M_4PI_3
129	# define M_4PI_3 4.18879020478639
130	#endif
131	//inline double square(double x) { return pow(x,2.0); }
132	//inline double square(double x) { return pown(x,2); }
133	inline double square(double x) { return x*x; }
134	inline double cube(double x) { return xxx; }
135	inline double sas_sinx_x(double x) { return x==0 ? 1.0 : sin(x)/x; }
136
137
138	%(DEFINES)s
139
140	%(SOURCES)s
141
142	/*
143	##########################################################
144	# #
145	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
146	# !! !! #
147	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
148	# !! !! #
149	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
150	# #
151	##########################################################
152	*/
153
154	#ifdef IQ_KERNEL_NAME
155	kernel void IQ_KERNEL_NAME(
156	global const double *q,
157	global double *result,
158	const int Nq,
159	#ifdef IQ_OPEN_LOOPS
160	#ifdef USE_OPENCL
161	global double *loops_g,
162	#endif
163	local double *loops,
164	const double cutoff,
165	IQ_DISPERSION_LENGTH_DECLARATIONS,
166	#endif
167	IQ_FIXED_PARAMETER_DECLARATIONS
168	)
169	{
170	#ifdef USE_OPENCL
171	#ifdef IQ_OPEN_LOOPS
172	// copy loops info to local memory
173	event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
174	wait_group_events(1, &e);
175	#endif
176
177	int i = get_global_id(0);
178	if (i < Nq)
179	#else
180	#pragma omp parallel for
181	for (int i=0; i < Nq; i++)
182	#endif
183	{
184	const double qi = q[i];
185	#ifdef IQ_OPEN_LOOPS
186	double ret=0.0, norm=0.0;
187	IQ_OPEN_LOOPS
188	//for (int radius_i=0; radius_i < Nradius; radius_i++) {
189	// const double radius = loops[2*(radius_i)];
190	// const double radius_w = loops[2*(radius_i)+1];
191
192	const double weight = IQ_WEIGHT_PRODUCT;
193	if (weight > cutoff) {
194	const double scattering = Iq(qi, IQ_PARAMETERS);
195	// allow kernels to exclude invalid regions by returning NaN
196	if (!isnan(scattering)) {
197	ret += weight*scattering;
198	#ifdef VOLUME_PARAMETERS
199	norm += weight * form_volume(VOLUME_PARAMETERS);
200	#else
201	norm += weight;
202	#endif
203	}
204	//else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
205	}
206	IQ_CLOSE_LOOPS
207	// norm can only be zero if volume is zero, so no scattering
208	result[i] = (norm > 0. ? scale*ret/norm + background : background);
209	#else
210	result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
211	#endif
212	}
213	}
214	#endif
215
216
217	#ifdef IQXY_KERNEL_NAME
218	kernel void IQXY_KERNEL_NAME(
219	global const double *qx,
220	global const double *qy,
221	global double *result,
222	const int Nq,
223	#ifdef IQXY_OPEN_LOOPS
224	#ifdef USE_OPENCL
225	global double *loops_g,
226	#endif
227	local double *loops,
228	const double cutoff,
229	IQXY_DISPERSION_LENGTH_DECLARATIONS,
230	#endif
231	IQXY_FIXED_PARAMETER_DECLARATIONS
232	)
233	{
234	#ifdef USE_OPENCL
235	#ifdef IQXY_OPEN_LOOPS
236	// copy loops info to local memory
237	event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
238	wait_group_events(1, &e);
239	#endif
240
241	int i = get_global_id(0);
242	if (i < Nq)
243	#else
244	#pragma omp parallel for
245	for (int i=0; i < Nq; i++)
246	#endif
247	{
248	const double qxi = qx[i];
249	const double qyi = qy[i];
250	#if USE_KAHAN_SUMMATION
251	double accumulated_error = 0.0;
252	#endif
253	#ifdef IQXY_OPEN_LOOPS
254	double ret=0.0, norm=0.0;
255	IQXY_OPEN_LOOPS
256	//for (int radius_i=0; radius_i < Nradius; radius_i++) {
257	// const double radius = loops[2*(radius_i)];
258	// const double radius_w = loops[2*(radius_i)+1];
259	double weight = IQXY_WEIGHT_PRODUCT;
260	if (weight > cutoff) {
261
262	const double scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
263	if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
264	#if defined(IQXY_HAS_THETA)
265	// Force a nominal value for the spherical correction even when
266	// theta is +0/180 so that there are no divide by zero problems.
267	// For sin(theta) fixed at 0 and 180, we effectively multiply top and bottom
268	// by 1e-6, so the effect cancels.
269	const double spherical_correction = fmax(fabs(cos(M_PI_180*theta)), 1.e-6);
270	weight *= spherical_correction;
271	#endif
272	const double next = weight * scattering;
273	#if USE_KAHAN_SUMMATION
274	const double y = next - accumulated_error;
275	const double t = ret + y;
276	accumulated_error = (t - ret) - y;
277	ret = t;
278	#else
279	ret += next;
280	#endif
281	#ifdef VOLUME_PARAMETERS
282	norm += weight*form_volume(VOLUME_PARAMETERS);
283	#else
284	norm += weight;
285	#endif
286	}
287	//else { printf("exclude qx,qy,I:%%g,%%g,%%g\n",qi,scattering); }
288	}
289	IQXY_CLOSE_LOOPS
290	// norm can only be zero if volume is zero, so no scattering
291	result[i] = (norm>0. ? scale*ret/norm + background : background);
292	#else
293	result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
294	#endif
295	}
296	}
297	#endif

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_template.c

Download in other formats: