kernel_iq.c @ 9ee2756

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since 9ee2756 was 9ee2756, checked in by Paul Kienzle <pkienzle@…>, 7 years ago
simplify kernel wrapper code and combine OpenCL with DLL in one file
Property mode set to `100644`
File size: 21.8 KB

Line
1	/*
2	##########################################################
3	# #
4	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
5	# !! !! #
6	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
7	# !! !! #
8	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
9	# #
10	##########################################################
11	*/
12
13	// NOTE: the following macros are defined in generate.py:
14	//
15	// MAX_PD : the maximum number of dispersity loops allowed
16	// NUM_PARS : the number of parameters in the parameter table
17	// NUM_VALUES : the number of values to skip at the start of the
18	// values array before you get to the dispersity values.
19	// NUM_MAGNETIC : the number of magnetic parameters
20	// PARAMETER_TABLE : list of parameter declarations used to create the
21	// ParameterTable type.
22	// KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic. This code is
23	// included three times, once for each kernel type.
24	// MAGNETIC_PARS : a comma-separated list of indices to the sld
25	// parameters in the parameter table.
26	// MAGNETIC_PAR1, ... : the first few indices of slds in the parameter table.
27	// MAGNETIC : defined when the magnetic kernel is being instantiated
28	// CALL_VOLUME(table) : call the form volume function
29	// CALL_IQ(q, table) : call the Iq function for 1D calcs.
30	// CALL_IQ_A(q, table) : call the Iq function with \|q\| for 2D data.
31	// CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
32	// CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
33	// INVALID(table) : test if the current point is feesible to calculate. This
34	// will be defined in the kernel definition file.
35
36	#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
37	#define _PAR_BLOCK_
38
39	typedef struct {
40	#if MAX_PD > 0
41	int32_t pd_par[MAX_PD]; // id of the nth polydispersity variable
42	int32_t pd_length[MAX_PD]; // length of the nth polydispersity weight vector
43	int32_t pd_offset[MAX_PD]; // offset of pd weights in the value & weight vector
44	int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level
45	#endif // MAX_PD > 0
46	int32_t num_eval; // total number of voxels in hypercube
47	int32_t num_weights; // total length of the weights vector
48	int32_t num_active; // number of non-trivial pd loops
49	int32_t theta_par; // id of first orientation variable
50	} ProblemDetails;
51
52	// Intel HD 4000 needs private arrays to be a multiple of 4 long
53	typedef struct {
54	PARAMETER_TABLE
55	} ParameterTable;
56	typedef union {
57	ParameterTable table;
58	double vector[4*((NUM_PARS+3)/4)];
59	} ParameterBlock;
60	#endif // _PAR_BLOCK_
61
62	#if defined(MAGNETIC) && NUM_MAGNETIC > 0
63	// ===== Helper functions for magnetism =====
64
65	// Return value restricted between low and high
66	static double clip(double value, double low, double high)
67	{
68	return (value < low ? low : (value > high ? high : value));
69	}
70
71	// Compute spin cross sections given in_spin and out_spin
72	// To convert spin cross sections to sld b:
73	// uu * (sld - m_sigma_x);
74	// dd * (sld + m_sigma_x);
75	// ud * (m_sigma_y + 1j*m_sigma_z);
76	// du * (m_sigma_y - 1j*m_sigma_z);
77	static void set_spins(double in_spin, double out_spin, double spins[4])
78	{
79	in_spin = clip(in_spin, 0.0, 1.0);
80	out_spin = clip(out_spin, 0.0, 1.0);
81	spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
82	spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin)); // du
83	spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin))); // ud
84	spins[3] = sqrt(sqrt(in_spin * out_spin)); // uu
85	}
86
87	// Compute the magnetic sld
88	static double mag_sld(double qx, double qy, double p,
89	double mx, double my, double sld)
90	{
91	const double perp = qymx - qxmy;
92	return sld + perp*p;
93	}
94
95	#endif
96
97	// ===== Helper functions for orientation and jitter =====
98
99	// To change the definition of the angles, run explore/angles.py, which
100	// uses sympy to generate the equations.
101
102	#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
103	#define _QAC_SECTION
104
105	typedef struct {
106	double R31, R32;
107	} QACRotation;
108
109	// Fill in the rotation matrix R from the view angles (theta, phi) and the
110	// jitter angles (dtheta, dphi). This matrix can be applied to all of the
111	// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
112	static void
113	qac_rotation(
114	QACRotation *rotation,
115	double theta, double phi,
116	double dtheta, double dphi)
117	{
118	double sin_theta, cos_theta;
119	double sin_phi, cos_phi;
120
121	// reverse view matrix
122	SINCOS(theta*M_PI_180, sin_theta, cos_theta);
123	SINCOS(phi*M_PI_180, sin_phi, cos_phi);
124	const double V11 = cos_phi*cos_theta;
125	const double V12 = sin_phi*cos_theta;
126	const double V21 = -sin_phi;
127	const double V22 = cos_phi;
128	const double V31 = sin_theta*cos_phi;
129	const double V32 = sin_phi*sin_theta;
130
131	// reverse jitter matrix
132	SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
133	SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
134	const double J31 = sin_theta;
135	const double J32 = -sin_phi*cos_theta;
136	const double J33 = cos_phi*cos_theta;
137
138	// reverse matrix
139	rotation->R31 = J31V11 + J32V21 + J33*V31;
140	rotation->R32 = J31V12 + J32V22 + J33*V32;
141	}
142
143	// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
144	// returning R*[qx,qy]' = [qa,qc]'
145	static double
146	qac_apply(
147	QACRotation rotation,
148	double qx, double qy,
149	double qa_out, double qc_out)
150	{
151	const double dqc = rotation.R31qx + rotation.R32qy;
152	// Indirect calculation of qab, from qab^2 = \|q\|^2 - qc^2
153	const double dqa = sqrt(-dqcdqc + qxqx + qy*qy);
154
155	*qa_out = dqa;
156	*qc_out = dqc;
157	}
158	#endif // _QAC_SECTION
159
160	#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
161	#define _QABC_SECTION
162
163	typedef struct {
164	double R11, R12;
165	double R21, R22;
166	double R31, R32;
167	} QABCRotation;
168
169	// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
170	// jitter angles (dtheta, dphi, dpsi). This matrix can be applied to all of the
171	// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
172	static void
173	qabc_rotation(
174	QABCRotation *rotation,
175	double theta, double phi, double psi,
176	double dtheta, double dphi, double dpsi)
177	{
178	double sin_theta, cos_theta;
179	double sin_phi, cos_phi;
180	double sin_psi, cos_psi;
181
182	// reverse view matrix
183	SINCOS(theta*M_PI_180, sin_theta, cos_theta);
184	SINCOS(phi*M_PI_180, sin_phi, cos_phi);
185	SINCOS(psi*M_PI_180, sin_psi, cos_psi);
186	const double V11 = -sin_phisin_psi + cos_phicos_psi*cos_theta;
187	const double V12 = sin_phicos_psicos_theta + sin_psi*cos_phi;
188	const double V21 = -sin_phicos_psi - sin_psicos_phi*cos_theta;
189	const double V22 = -sin_phisin_psicos_theta + cos_phi*cos_psi;
190	const double V31 = sin_theta*cos_phi;
191	const double V32 = sin_phi*sin_theta;
192
193	// reverse jitter matrix
194	SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
195	SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
196	SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
197	const double J11 = cos_psi*cos_theta;
198	const double J12 = sin_phisin_thetacos_psi + sin_psi*cos_phi;
199	const double J13 = sin_phisin_psi - sin_thetacos_phi*cos_psi;
200	const double J21 = -sin_psi*cos_theta;
201	const double J22 = -sin_phisin_psisin_theta + cos_phi*cos_psi;
202	const double J23 = sin_phicos_psi + sin_psisin_theta*cos_phi;
203	const double J31 = sin_theta;
204	const double J32 = -sin_phi*cos_theta;
205	const double J33 = cos_phi*cos_theta;
206
207	// reverse matrix
208	rotation->R11 = J11V11 + J12V21 + J13*V31;
209	rotation->R12 = J11V12 + J12V22 + J13*V32;
210	rotation->R21 = J21V11 + J22V21 + J23*V31;
211	rotation->R22 = J21V12 + J22V22 + J23*V32;
212	rotation->R31 = J31V11 + J32V21 + J33*V31;
213	rotation->R32 = J31V12 + J32V22 + J33*V32;
214	}
215
216	// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
217	// returning R*[qx,qy]' = [qa,qb,qc]'
218	static double
219	qabc_apply(
220	QABCRotation rotation,
221	double qx, double qy,
222	double qa_out, double qb_out, double *qc_out)
223	{
224	qa_out = rotation.R11qx + rotation.R12*qy;
225	qb_out = rotation.R21qx + rotation.R22*qy;
226	qc_out = rotation.R31qx + rotation.R32*qy;
227	}
228
229	#endif // _QABC_SECTION
230
231
232	// ==================== KERNEL CODE ========================
233
234	kernel
235	void KERNEL_NAME(
236	int32_t nq, // number of q values
237	const int32_t pd_start, // where we are in the polydispersity loop
238	const int32_t pd_stop, // where we are stopping in the polydispersity loop
239	global const ProblemDetails *details,
240	global const double *values,
241	global const double *q, // nq q values, with padding to boundary
242	global double *result, // nq+1 return values, again with padding
243	const double cutoff // cutoff in the polydispersity weight product
244	)
245	{
246	#ifdef USE_OPENCL
247	// who we are and what element we are working with
248	const int q_index = get_global_id(0);
249	if (q_index >= nq) return;
250	#else
251	// Define q_index here so that debugging statements can be written to work
252	// for both OpenCL and DLL using:
253	// if (q_index == 0) {printf(...);}
254	int q_index = 0;
255	#endif
256
257	// Storage for the current parameter values. These will be updated as we
258	// walk the polydispersity cube.
259	ParameterBlock local_values;
260
261	#if defined(MAGNETIC) && NUM_MAGNETIC>0
262	// Location of the sld parameters in the parameter vector.
263	// These parameters are updated with the effective sld due to magnetism.
264	#if NUM_MAGNETIC > 3
265	const int32_t slds[] = { MAGNETIC_PARS };
266	#endif
267
268	// Interpret polarization cross section.
269	// up_frac_i = values[NUM_PARS+2];
270	// up_frac_f = values[NUM_PARS+3];
271	// up_angle = values[NUM_PARS+4];
272	// TODO: could precompute more magnetism parameters before calling the kernel.
273	double spins[4];
274	double cos_mspin, sin_mspin;
275	set_spins(values[NUM_PARS+2], values[NUM_PARS+3], spins);
276	SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
277	#endif // MAGNETIC
278
279	// Fill in the initial variables
280	// values[0] is scale
281	// values[1] is background
282	#ifdef USE_OPENMP
283	#pragma omp parallel for
284	#endif
285	for (int i=0; i < NUM_PARS; i++) {
286	local_values.vector[i] = values[2+i];
287	//if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
288	}
289	//if (q_index==0) printf("NUM_VALUES:%d NUM_PARS:%d MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
290	//if (q_index==0) printf("start:%d stop:%d\n", pd_start, pd_stop);
291
292	// If pd_start is zero that means that we are starting a new calculation,
293	// and must initialize the result to zero. Otherwise, we are restarting
294	// the calculation from somewhere in the middle of the dispersity mesh,
295	// and we update the value rather than reset it. Similarly for the
296	// normalization factor, which is stored as the final value in the
297	// results vector (one past the number of q values).
298	//
299	// The code differs slightly between opencl and dll since opencl is only
300	// seeing one q value (stored in the variable "this_result") while the dll
301	// version must loop over all q.
302	#ifdef USE_OPENCL
303	double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
304	double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
305	#else // !USE_OPENCL
306	double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
307	if (pd_start == 0) {
308	#ifdef USE_OPENMP
309	#pragma omp parallel for
310	#endif
311	for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
312	}
313	//if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
314	#endif // !USE_OPENCL
315
316
317	// ====== macros to set up the parts of the loop =======
318	/*
319	Based on the level of the loop, uses C preprocessor magic to construct
320	level-specific looping variables, including these from loop level 3:
321
322	int n3 : length of loop for mesh level 3
323	int i3 : current position in the loop for level 3, which is calculated
324	from a combination of pd_start, pd_stride[3] and pd_length[3].
325	int p3 : is the index into the parameter table for mesh level 3
326	double v3[] : pointer into dispersity array to values for loop 3
327	double w3[] : pointer into dispersity array to weights for loop 3
328	double weight3 : the product of weights from levels 3 and up, computed
329	as weight5weight4w3[i3]. Note that we need an outermost
330	value weight5 set to 1.0 for this to work properly.
331
332	After expansion, the loop struction will look like the following:
333
334	// --- PD_INIT(4) ---
335	const int n4 = pd_length[4];
336	const int p4 = pd_par[4];
337	global const double *v4 = pd_value + pd_offset[4];
338	global const double *w4 = pd_weight + pd_offset[4];
339	int i4 = (pd_start/pd_stride[4])%n4; // position in level 4 at pd_start
340
341	// --- PD_INIT(3) ---
342	const int n3 = pd_length[3];
343	...
344	int i3 = (pd_start/pd_stride[3])%n3; // position in level 3 at pd_start
345
346	PD_INIT(2)
347	PD_INIT(1)
348	PD_INIT(0)
349
350	// --- PD_OUTERMOST_WEIGHT(5) ---
351	const double weight5 = 1.0;
352
353	// --- PD_OPEN(4,5) ---
354	while (i4 < n4) {
355	parameter[p4] = v4[i4]; // set the value for pd parameter 4 at this mesh point
356	const double weight4 = w4[i4] * weight5;
357
358	// from PD_OPEN(3,4)
359	while (i3 < n3) {
360	parameter[p3] = v3[i3]; // set the value for pd parameter 3 at this mesh point
361	const double weight3 = w3[i3] * weight4;
362
363	PD_OPEN(3,2)
364	PD_OPEN(2,1)
365	PD_OPEN(0,1)
366
367	... main loop body ...
368
369	++step; // increment counter representing position in dispersity mesh
370
371	PD_CLOSE(0)
372	PD_CLOSE(1)
373	PD_CLOSE(2)
374
375	// --- PD_CLOSE(3) ---
376	if (step >= pd_stop) break;
377	++i3;
378	}
379	i3 = 0; // reset loop counter for next round through the loop
380
381	// --- PD_CLOSE(4) ---
382	if (step >= pd_stop) break;
383	++i4;
384	}
385	i4 = 0; // reset loop counter even though no more rounds through the loop
386
387	*/
388
389	// Define looping variables
390	#define PD_INIT(_LOOP) \
391	const int n##_LOOP = details->pd_length[_LOOP]; \
392	const int p##_LOOP = details->pd_par[_LOOP]; \
393	global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
394	global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
395	int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
396
397	// Jump into the middle of the polydispersity loop
398	#define PD_OPEN(_LOOP,_OUTER) \
399	while (i##_LOOP < n##_LOOP) { \
400	local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
401	const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
402
403	// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
404	#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
405	#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
406
407	// Close out the loop
408	#define PD_CLOSE(_LOOP) \
409	if (step >= pd_stop) break; \
410	++i##_LOOP; \
411	} \
412	i##_LOOP = 0;
413
414	// The main loop body is essentially:
415	//
416	// BUILD_ROTATION // construct the rotation matrix qxy => qabc
417	// for each q
418	// FETCH_Q // set qx,qy from the q input vector
419	// APPLY_ROTATION // convert qx,qy to qa,qb,qc
420	// CALL_KERNEL // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
421	//
422	// Depending on the shape type (radial, axial, triaxial), the variables
423	// and calling parameters will be slightly different. These macros
424	// capture the differences in one spot so the rest of the code is easier
425	// to read.
426	#if defined(CALL_IQ)
427	// unoriented 1D
428	double qk;
429	#define FETCH_Q do { qk = q[q_index]; } while (0)
430	#define BUILD_ROTATION do {} while(0)
431	#define APPLY_ROTATION do {} while(0)
432	#define CALL_KERNEL CALL_IQ(qk, local_values.table)
433
434	#elif defined(CALL_IQ_A)
435	// unoriented 2D
436	double qx, qy;
437	#define FETCH_Q do { qx = q[2q_index]; qy = q[2q_index+1]; } while (0)
438	#define BUILD_ROTATION do {} while(0)
439	#define APPLY_ROTATION do {} while(0)
440	#define CALL_KERNEL CALL_IQ_A(sqrt(qxqx+qyqy), local_values.table)
441
442	#elif defined(CALL_IQ_AC)
443	// oriented symmetric 2D
444	double qx, qy;
445	#define FETCH_Q do { qx = q[2q_index]; qy = q[2q_index+1]; } while (0)
446	double qa, qc;
447	QACRotation rotation;
448	// Grab the "view" angles (theta, phi) from the initial parameter table.
449	// These are separate from the "jitter" angles (dtheta, dphi) which are
450	// stored with the dispersity values and copied to the local parameter
451	// table as we go through the mesh.
452	const double theta = values[details->theta_par+2];
453	const double phi = values[details->theta_par+3];
454	#define BUILD_ROTATION qac_rotation(&rotation, \
455	theta, phi, local_values.table.theta, local_values.table.phi)
456	#define APPLY_ROTATION qac_apply(rotation, qx, qy, &qa, &qc)
457	#define CALL_KERNEL CALL_IQ_AC(qa, qc, local_values.table)
458
459	#elif defined(CALL_IQ_ABC)
460	// oriented asymmetric 2D
461	double qx, qy;
462	#define FETCH_Q do { qx = q[2q_index]; qy = q[2q_index+1]; } while (0)
463	double qa, qb, qc;
464	QABCRotation rotation;
465	// Grab the "view" angles (theta, phi, psi) from the initial parameter table.
466	// These are separate from the "jitter" angles (dtheta, dphi, dpsi) which are
467	// stored with the dispersity values and copied to the local parameter
468	// table as we go through the mesh.
469	const double theta = values[details->theta_par+2];
470	const double phi = values[details->theta_par+3];
471	const double psi = values[details->theta_par+4];
472	#define BUILD_ROTATION qabc_rotation(&rotation, \
473	theta, phi, psi, local_values.table.theta, \
474	local_values.table.phi, local_values.table.psi)
475	#define APPLY_ROTATION qabc_apply(rotation, qx, qy, &qa, &qb, &qc)
476	#define CALL_KERNEL CALL_IQ_ABC(qa, qb, qc, local_values.table)
477
478	#endif
479
480	// ====== construct the loops =======
481
482	// Pointers to the start of the dispersity and weight vectors, if needed.
483	#if MAX_PD>0
484	global const double *pd_value = values + NUM_VALUES;
485	global const double *pd_weight = pd_value + details->num_weights;
486	#endif
487
488	// The variable "step" is the current position in the dispersity loop.
489	// It will be incremented each time a new point in the mesh is accumulated,
490	// and used to test whether we have reached pd_stop.
491	int step = pd_start;
492
493	// define looping variables
494	#if MAX_PD>4
495	PD_INIT(4)
496	#endif
497	#if MAX_PD>3
498	PD_INIT(3)
499	#endif
500	#if MAX_PD>2
501	PD_INIT(2)
502	#endif
503	#if MAX_PD>1
504	PD_INIT(1)
505	#endif
506	#if MAX_PD>0
507	PD_INIT(0)
508	#endif
509
510	// open nested loops
511	PD_OUTERMOST_WEIGHT(MAX_PD)
512	#if MAX_PD>4
513	PD_OPEN(4,5)
514	#endif
515	#if MAX_PD>3
516	PD_OPEN(3,4)
517	#endif
518	#if MAX_PD>2
519	PD_OPEN(2,3)
520	#endif
521	#if MAX_PD>1
522	PD_OPEN(1,2)
523	#endif
524	#if MAX_PD>0
525	PD_OPEN(0,1)
526	#endif
527
528
529	//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
530
531	// ====== loop body =======
532	#ifdef INVALID
533	if (!INVALID(local_values.table))
534	#endif
535	{
536	// Accumulate I(q)
537	// Note: weight==0 must always be excluded
538	if (weight0 > cutoff) {
539	pd_norm += weight0 * CALL_VOLUME(local_values.table);
540	BUILD_ROTATION;
541
542	#ifndef USE_OPENCL
543	// DLL needs to explicitly loop over the q values.
544	#ifdef USE_OPENMP
545	#pragma omp parallel for
546	#endif
547	for (q_index=0; q_index<nq; q_index++)
548	#endif // !USE_OPENCL
549	{
550
551	FETCH_Q;
552	APPLY_ROTATION;
553
554	// ======= COMPUTE SCATTERING ==========
555	#if defined(MAGNETIC) && NUM_MAGNETIC > 0
556	// Constant across orientation, polydispersity for given qx, qy
557	double scattering = 0.0;
558	// TODO: what is the magnetic scattering at q=0
559	const double qsq = qxqx + qyqy;
560	if (qsq > 1.e-16) {
561	double p[4]; // dd, du, ud, uu
562	p[0] = (qycos_mspin + qxsin_mspin)/qsq;
563	p[3] = -p[0];
564	p[1] = p[2] = (qysin_mspin - qxcos_mspin)/qsq;
565
566	for (int index=0; index<4; index++) {
567	const double xs = spins[index];
568	if (xs > 1.e-8) {
569	const int spin_flip = (index==1) \|\| (index==2);
570	const double pk = p[index];
571	for (int axis=0; axis<=spin_flip; axis++) {
572	#define SLD(_M_offset, _sld_offset) \
573	local_values.vector[_sld_offset] = xs * (axis \
574	? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \
575	: mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \
576	(spin_flip ? 0.0 : values[_sld_offset+2])))
577	#define M1 NUM_PARS+5
578	#if NUM_MAGNETIC==1
579	SLD(M1, MAGNETIC_PAR1);
580	#elif NUM_MAGNETIC==2
581	SLD(M1, MAGNETIC_PAR1);
582	SLD(M1+3, MAGNETIC_PAR2);
583	#elif NUM_MAGNETIC==3
584	SLD(M1, MAGNETIC_PAR1);
585	SLD(M1+3, MAGNETIC_PAR2);
586	SLD(M1+6, MAGNETIC_PAR3);
587	#else
588	for (int sk=0; sk<NUM_MAGNETIC; sk++) {
589	SLD(M1+3*sk, slds[sk]);
590	}
591	#endif
592	#undef SLD
593	scattering += CALL_KERNEL;
594	}
595	}
596	}
597	}
598	#else // !MAGNETIC
599	const double scattering = CALL_KERNEL;
600	#endif // !MAGNETIC
601	//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight, weight0);
602
603	#ifdef USE_OPENCL
604	this_result += weight0 * scattering;
605	#else // !USE_OPENCL
606	result[q_index] += weight0 * scattering;
607	#endif // !USE_OPENCL
608	}
609	}
610	}
611
612	// close nested loops
613	++step;
614	#if MAX_PD>0
615	PD_CLOSE(0)
616	#endif
617	#if MAX_PD>1
618	PD_CLOSE(1)
619	#endif
620	#if MAX_PD>2
621	PD_CLOSE(2)
622	#endif
623	#if MAX_PD>3
624	PD_CLOSE(3)
625	#endif
626	#if MAX_PD>4
627	PD_CLOSE(4)
628	#endif
629
630	// clear the macros in preparation for the next kernel
631	#undef PD_INIT
632	#undef PD_OPEN
633	#undef PD_CLOSE
634	#undef FETCH_Q
635	#undef BUILD_ROTATION
636	#undef APPLY_ROTATION
637	#undef CALL_KERNEL
638
639	// Remember the current result and the updated norm.
640	#ifdef USE_OPENCL
641	result[q_index] = this_result;
642	if (q_index == 0) result[nq] = pd_norm;
643	//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
644	#else // !USE_OPENCL
645	result[nq] = pd_norm;
646	//printf("res: %g/%g\n", result[0], pd_norm);
647	#endif // !USE_OPENCL
648	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.c @ 9ee2756

Download in other formats: