kernel_iq.c @ 9acade6

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since 9acade6 was bde38b5, checked in by Paul Kienzle <pkienzle@…>, 8 years ago
simplify kernel calling
Property mode set to `100644`
File size: 11.7 KB

Line
1
2	/*
3	##########################################################
4	# #
5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
6	# !! !! #
7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
8	# !! !! #
9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
10	# #
11	##########################################################
12	*/
13
14	#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
15	#define _PAR_BLOCK_
16
17	typedef struct {
18	#if MAX_PD > 0
19	int32_t pd_par[MAX_PD]; // id of the nth polydispersity variable
20	int32_t pd_length[MAX_PD]; // length of the nth polydispersity weight vector
21	int32_t pd_offset[MAX_PD]; // offset of pd weights in the value & weight vector
22	int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level
23	#endif // MAX_PD > 0
24	int32_t num_eval; // total number of voxels in hypercube
25	int32_t num_weights; // total length of the weights vector
26	int32_t num_active; // number of non-trivial pd loops
27	int32_t theta_par; // id of spherical correction variable
28	} ProblemDetails;
29
30	// Intel HD 4000 needs private arrays to be a multiple of 4 long
31	typedef struct {
32	PARAMETER_TABLE
33	} ParameterTable;
34	typedef union {
35	ParameterTable table;
36	double vector[4*((NUM_PARS+3)/4)];
37	} ParameterBlock;
38	#endif // _PAR_BLOCK_
39
40
41	#if defined(MAGNETIC) && NUM_MAGNETIC>0
42
43	// Return value restricted between low and high
44	static double clip(double value, double low, double high)
45	{
46	return (value < low ? low : (value > high ? high : value));
47	}
48
49	// Compute spin cross sections given in_spin and out_spin
50	// To convert spin cross sections to sld b:
51	// uu * (sld - m_sigma_x);
52	// dd * (sld + m_sigma_x);
53	// ud * (m_sigma_y + 1j*m_sigma_z);
54	// du * (m_sigma_y - 1j*m_sigma_z);
55	static void set_spins(double in_spin, double out_spin, double spins[4])
56	{
57	in_spin = clip(in_spin, 0.0, 1.0);
58	out_spin = clip(out_spin, 0.0, 1.0);
59	spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
60	spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin)); // du
61	spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin))); // ud
62	spins[3] = sqrt(sqrt(in_spin * out_spin)); // uu
63	}
64
65	static double mag_sld(double qx, double qy, double p,
66	double mx, double my, double sld)
67	{
68	const double perp = qymx - qxmy;
69	return sld + perp*p;
70	}
71
72	#endif // MAGNETIC
73
74	kernel
75	void KERNEL_NAME(
76	int32_t nq, // number of q values
77	const int32_t pd_start, // where we are in the polydispersity loop
78	const int32_t pd_stop, // where we are stopping in the polydispersity loop
79	global const ProblemDetails *details,
80	global const double *values,
81	global const double *q, // nq q values, with padding to boundary
82	global double *result, // nq+1 return values, again with padding
83	const double cutoff // cutoff in the polydispersity weight product
84	)
85	{
86	// Storage for the current parameter values. These will be updated as we
87	// walk the polydispersity cube.
88	ParameterBlock local_values;
89
90	#if defined(MAGNETIC) && NUM_MAGNETIC>0
91	// Location of the sld parameters in the parameter vector.
92	// These parameters are updated with the effective sld due to magnetism.
93	#if NUM_MAGNETIC > 3
94	const int32_t slds[] = { MAGNETIC_PARS };
95	#endif
96
97	// TODO: could precompute these outside of the kernel.
98	// Interpret polarization cross section.
99	// up_frac_i = values[NUM_PARS+2];
100	// up_frac_f = values[NUM_PARS+3];
101	// up_angle = values[NUM_PARS+4];
102	double spins[4];
103	double cos_mspin, sin_mspin;
104	set_spins(values[NUM_PARS+2], values[NUM_PARS+3], spins);
105	SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
106	#endif // MAGNETIC
107
108	// Fill in the initial variables
109	// values[0] is scale
110	// values[1] is background
111	#ifdef USE_OPENMP
112	#pragma omp parallel for
113	#endif
114	for (int i=0; i < NUM_PARS; i++) {
115	local_values.vector[i] = values[2+i];
116	//printf("p%d = %g\n",i, local_values.vector[i]);
117	}
118	//printf("NUM_VALUES:%d NUM_PARS:%d MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
119	//printf("start:%d stop:%d\n", pd_start, pd_stop);
120
121	double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
122	if (pd_start == 0) {
123	#ifdef USE_OPENMP
124	#pragma omp parallel for
125	#endif
126	for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
127	}
128	//printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
129
130	#if MAX_PD>0
131	global const double *pd_value = values + NUM_VALUES;
132	global const double *pd_weight = pd_value + details->num_weights;
133	#endif
134
135	// Jump into the middle of the polydispersity loop
136	#if MAX_PD>4
137	int n4=details->pd_length[4];
138	int i4=(pd_start/details->pd_stride[4])%n4;
139	const int p4=details->pd_par[4];
140	global const double *v4 = pd_value + details->pd_offset[4];
141	global const double *w4 = pd_weight + details->pd_offset[4];
142	#endif
143	#if MAX_PD>3
144	int n3=details->pd_length[3];
145	int i3=(pd_start/details->pd_stride[3])%n3;
146	const int p3=details->pd_par[3];
147	global const double *v3 = pd_value + details->pd_offset[3];
148	global const double *w3 = pd_weight + details->pd_offset[3];
149	//printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES);
150	#endif
151	#if MAX_PD>2
152	int n2=details->pd_length[2];
153	int i2=(pd_start/details->pd_stride[2])%n2;
154	const int p2=details->pd_par[2];
155	global const double *v2 = pd_value + details->pd_offset[2];
156	global const double *w2 = pd_weight + details->pd_offset[2];
157	#endif
158	#if MAX_PD>1
159	int n1=details->pd_length[1];
160	int i1=(pd_start/details->pd_stride[1])%n1;
161	const int p1=details->pd_par[1];
162	global const double *v1 = pd_value + details->pd_offset[1];
163	global const double *w1 = pd_weight + details->pd_offset[1];
164	#endif
165	#if MAX_PD>0
166	int n0=details->pd_length[0];
167	int i0=(pd_start/details->pd_stride[0])%n0;
168	const int p0=details->pd_par[0];
169	global const double *v0 = pd_value + details->pd_offset[0];
170	global const double *w0 = pd_weight + details->pd_offset[0];
171	//printf("w0:%p, values:%p, diff:%ld, %d\n",w0,values,(w0-values), NUM_VALUES);
172	#endif
173
174
175	#if MAX_PD>0
176	const int theta_par = details->theta_par;
177	const int fast_theta = (theta_par == p0);
178	const int slow_theta = (theta_par >= 0 && !fast_theta);
179	double spherical_correction = 1.0;
180	#else
181	// Note: if not polydisperse the weights cancel and we don't need the
182	// spherical correction.
183	const double spherical_correction = 1.0;
184	#endif
185
186	int step = pd_start;
187
188	#if MAX_PD>4
189	const double weight5 = 1.0;
190	while (i4 < n4) {
191	local_values.vector[p4] = v4[i4];
192	double weight4 = w4[i4] * weight5;
193	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, local_values.vector[p4], weight4);
194	#elif MAX_PD>3
195	const double weight4 = 1.0;
196	#endif
197	#if MAX_PD>3
198	while (i3 < n3) {
199	local_values.vector[p3] = v3[i3];
200	double weight3 = w3[i3] * weight4;
201	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, local_values.vector[p3], weight3);
202	#elif MAX_PD>2
203	const double weight3 = 1.0;
204	#endif
205	#if MAX_PD>2
206	while (i2 < n2) {
207	local_values.vector[p2] = v2[i2];
208	double weight2 = w2[i2] * weight3;
209	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, local_values.vector[p2], weight2);
210	#elif MAX_PD>1
211	const double weight2 = 1.0;
212	#endif
213	#if MAX_PD>1
214	while (i1 < n1) {
215	local_values.vector[p1] = v1[i1];
216	double weight1 = w1[i1] * weight2;
217	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, local_values.vector[p1], weight1);
218	#elif MAX_PD>0
219	const double weight1 = 1.0;
220	#endif
221	#if MAX_PD>0
222	if (slow_theta) { // Theta is not in inner loop
223	spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[theta_par])), 1.e-6);
224	}
225	while(i0 < n0) {
226	local_values.vector[p0] = v0[i0];
227	double weight0 = w0[i0] * weight1;
228	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, local_values.vector[p0], weight0);
229	if (fast_theta) { // Theta is in inner loop
230	spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[p0])), 1.e-6);
231	}
232	#else
233	const double weight0 = 1.0;
234	#endif
235
236	//printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");
237	//printf("sphcor: %g\n", spherical_correction);
238
239	#ifdef INVALID
240	if (!INVALID(local_values.table))
241	#endif
242	{
243	// Accumulate I(q)
244	// Note: weight==0 must always be excluded
245	if (weight0 > cutoff) {
246	// spherical correction is set at a minimum of 1e-6, otherwise there
247	// would be problems looking at models with theta=90.
248	const double weight = weight0 * spherical_correction;
249	pd_norm += weight * CALL_VOLUME(local_values.table);
250
251	#ifdef USE_OPENMP
252	#pragma omp parallel for
253	#endif
254	for (int q_index=0; q_index<nq; q_index++) {
255	#if defined(MAGNETIC) && NUM_MAGNETIC > 0
256	const double qx = q[2*q_index];
257	const double qy = q[2*q_index+1];
258	const double qsq = qxqx + qyqy;
259
260	// Constant across orientation, polydispersity for given qx, qy
261	double scattering = 0.0;
262	// TODO: what is the magnetic scattering at q=0
263	if (qsq > 1.e-16) {
264	double p[4]; // dd, du, ud, uu
265	p[0] = (qycos_mspin + qxsin_mspin)/qsq;
266	p[3] = -p[0];
267	p[1] = p[2] = (qysin_mspin - qxcos_mspin)/qsq;
268
269	for (int index=0; index<4; index++) {
270	const double xs = spins[index];
271	if (xs > 1.e-8) {
272	const int spin_flip = (index==1) \|\| (index==2);
273	const double pk = p[index];
274	for (int axis=0; axis<=spin_flip; axis++) {
275	#define M1 NUM_PARS+5
276	#define M2 NUM_PARS+8
277	#define M3 NUM_PARS+13
278	#define SLD(_M_offset, _sld_offset) \
279	local_values.vector[_sld_offset] = xs * (axis \
280	? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \
281	: mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \
282	(spin_flip ? 0.0 : values[_sld_offset+2])))
283	#if NUM_MAGNETIC==1
284	SLD(M1, MAGNETIC_PAR1);
285	#elif NUM_MAGNETIC==2
286	SLD(M1, MAGNETIC_PAR1);
287	SLD(M2, MAGNETIC_PAR2);
288	#elif NUM_MAGNETIC==3
289	SLD(M1, MAGNETIC_PAR1);
290	SLD(M2, MAGNETIC_PAR2);
291	SLD(M3, MAGNETIC_PAR3);
292	#else
293	for (int sk=0; sk<NUM_MAGNETIC; sk++) {
294	SLD(M1+3*sk, slds[sk]);
295	}
296	#endif
297	scattering += CALL_IQ(q, q_index, local_values.table);
298	}
299	}
300	}
301	}
302	#else // !MAGNETIC
303	const double scattering = CALL_IQ(q, q_index, local_values.table);
304	#endif // !MAGNETIC
305	//printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0);
306	result[q_index] += weight * scattering;
307	}
308	}
309	}
310	++step;
311	#if MAX_PD>0
312	if (step >= pd_stop) break;
313	++i0;
314	}
315	i0 = 0;
316	#endif
317	#if MAX_PD>1
318	if (step >= pd_stop) break;
319	++i1;
320	}
321	i1 = 0;
322	#endif
323	#if MAX_PD>2
324	if (step >= pd_stop) break;
325	++i2;
326	}
327	i2 = 0;
328	#endif
329	#if MAX_PD>3
330	if (step >= pd_stop) break;
331	++i3;
332	}
333	i3 = 0;
334	#endif
335	#if MAX_PD>4
336	if (step >= pd_stop) break;
337	++i4;
338	}
339	i4 = 0;
340	#endif
341
342	//printf("res: %g/%g\n", result[0], pd_norm);
343	// Remember the updated norm.
344	result[nq] = pd_norm;
345	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.c @ 9acade6

Download in other formats: