kernel_iq.c @ 208f0a4

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since 208f0a4 was 208f0a4, checked in by Paul Kienzle <pkienzle@…>, 8 years ago
throw in support for spherical correction
Property mode set to `100644`
File size: 15.4 KB

Line
1
2	/*
3	##########################################################
4	# #
5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
6	# !! !! #
7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
8	# !! !! #
9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
10	# #
11	##########################################################
12	*/
13
14	/*
15	The environment needs to provide the following #defines:
16
17	USE_OPENCL is defined if running in opencl
18	KERNEL declares a function to be available externally
19	KERNEL_NAME is the name of the function being declared
20	NPARS is the number of parameters in the kernel
21	PARAMETER_DECL is the declaration of the parameters to the kernel.
22
23	Cylinder:
24
25	#define PARAMETER_DECL \
26	double length; \
27	double radius; \
28	double sld; \
29	double sld_solvent
30
31	Note: scale and background are not included
32
33	Multi-shell cylinder (10 shell max):
34
35	#define PARAMETER_DECL \
36	double num_shells; \
37	double length; \
38	double radius[10]; \
39	double sld[10]; \
40	double sld_solvent
41
42	CALL_IQ(q, nq, i, pars) is the declaration of a call to the kernel.
43
44	Cylinder:
45
46	#define CALL_IQ(q, nq, i, var) \
47	Iq(q[i], \
48	var.length, \
49	var.radius, \
50	var.sld, \
51	var.sld_solvent)
52
53	Multi-shell cylinder:
54	#define CALL_IQ(q, nq, i, var) \
55	Iq(q[i], \
56	var.num_shells, \
57	var.length, \
58	var.radius, \
59	var.sld, \
60	var.sld_solvent)
61
62	CALL_VOLUME(var) is similar, but for calling the form volume.
63
64	INVALID is a test for model parameters in the correct range
65
66	Cylinder:
67
68	#define INVALID(var) 0
69
70	BarBell:
71
72	#define INVALID(var) (var.bell_radius > var.radius)
73
74	Model with complicated constraints:
75
76	inline bool constrained(p1, p2, p3) { return expression; }
77	#define INVALID(var) constrained(var.p1, var.p2, var.p3)
78
79	IQ_FUNC could be Iq or Iqxy
80	IQ_PARS could be q[i] or q[2i],q[2i+1]
81
82	Our design supports a limited number of polydispersity loops, wherein
83	we need to cycle through the values of the polydispersity, calculate
84	the I(q, p) for each combination of parameters, and perform a normalized
85	weighted sum across all the weights. Parameters may be passed to the
86	underlying calculation engine as scalars or vectors, but the polydispersity
87	calculator treats the parameter set as one long vector.
88
89	Let's assume we have 6 parameters in the model, with two polydisperse::
90
91	0: scale {scl = constant}
92	1: background {bkg = constant}
93	5: length {l = vector of 30pts}
94	4: radius {r = vector of 10pts}
95	3: sld {s = constant/(radius*2length)}
96	2: sld_solvent {s2 = constant}
97
98	This generates the following call to the kernel (where x stands for an
99	arbitrary value that is not used by the kernel evaluator):
100
101	NPARS = 4 // scale and background are in all models
102	problem {
103	pd_par = {5, 4, x, x} // parameters radius and length vary
104	pd_length = {30, 10, 0, 0} // length has more, so it is first
105	pd_offset = {10, 0, x, x} // length starts at index 10 in weights
106	pd_stride = {1, 30, 300, 300} // cumulative product of pd length
107	pd_isvol = {1, 1, x, x} // true if weight is a volume weight
108	par_offset = {2, 3, 303, 313} // parameter offsets
109	par_coord = {0, 3, 2, 1} // bitmap of parameter dependencies
110	fast_coord_index = {5, 3, x, x}
111	fast_coord_count = 2 // two parameters vary with length distribution
112	theta_var = -1 // no spherical correction
113	fast_theta = 0 // spherical correction angle is not pd 1
114	}
115
116	weight = { l0, .., l29, r0, .., r9}
117	pars = { scl, bkg, l0, ..., l29, r0, r1, ..., r9,
118	s[l0,r0], ... s[l0,r9], s[l1,r0], ... s[l29,r9] , s2}
119
120	nq = 130
121	q = { q0, q1, ..., q130, x, x } # pad to 8 element boundary
122	result = {r1, ..., r130, norm, vol, vol_norm, x, x, x, x, x, x, x}
123
124
125	The polydisperse parameters are stored in as an array of parameter
126	indices, one for each polydisperse parameter, stored in pd_par[n].
127	Non-polydisperse parameters do not appear in this array. Each polydisperse
128	parameter has a weight vector whose length is stored in pd_length[n],
129	The weights are stored in a contiguous vector of weights for all
130	parameters, with the starting position for the each parameter stored
131	in pd_offset[n]. The values corresponding to the weights are stored
132	together in a separate weights[] vector, with offset stored in
133	par_offset[pd_par[n]]. Polydisperse parameters should be stored in
134	decreasing order of length for highest efficiency.
135
136	We limit the number of polydisperse dimensions to MAX_PD (currently 4).
137	This cuts the size of the structure in half compared to allowing a
138	separate polydispersity for each parameter. This will help a little
139	bit for models with large numbers of parameters, such as the onion model.
140
141	Parameters may be coordinated. That is, we may have the value of one
142	parameter depend on a set of other parameters, some of which may be
143	polydisperse. For example, if sld is inversely proportional to the
144	volume of a cylinder, and the length and radius are independently
145	polydisperse, then for each combination of length and radius we need a
146	separate value for the sld. The caller must provide a coordination table
147	for each parameter containing the value for each parameter given the
148	value of the polydisperse parameters v1, v2, etc. The tables for each
149	parameter are arranged contiguously in a vector, with offset[k] giving the
150	starting location of parameter k in the vector. Each parameter defines
151	coord[k] as a bit mask indicating which polydispersity parameters the
152	parameter depends upon. Usually this is zero, indicating that the parameter
153	is independent, but for the cylinder example given, the bits for the
154	radius and length polydispersity parameters would both be set, the result
155	being a (#radius x #length) table, or maybe a (#length x #radius) table
156	if length comes first in the polydispersity table.
157
158	NB: If we can guarantee that a compiler and OpenCL driver are available,
159	we could instead create the coordination function on the fly for each
160	parameter, saving memory and transfer time, but requiring a C compiler
161	as part of the environment.
162
163	In ordering the polydisperse parameters by decreasing length we can
164	iterate over the longest dispersion weight vector first. All parameters
165	coordinated with this weight vector (the 'fast' parameters), can be
166	updated with a simple increment to the next position in the parameter
167	value table. The indices of these parameters is stored in fast_coord_index[],
168	with fast_coord_count being the number of fast parameters. A total
169	of NPARS slots is allocated to allow for the case that all parameters
170	are coordinated with the fast index, though this will likely be mostly
171	empty. When the fast increment count reaches the end of the weight
172	vector, then the index of the second polydisperse parameter must be
173	incremented, and all of its coordinated parameters updated. Because this
174	operation is not in the inner loop, a slower algorithm can be used.
175
176	If there is no polydispersity we pretend that it is polydisperisty with one
177	parameter, pd_start=0 and pd_stop=1. We may or may not short circuit the
178	calculation in this case, depending on how much time it saves.
179
180	The problem details structure can be allocated and sent in as an integer
181	array using the read-only flag. This allows us to copy it once per fit
182	along with the weights vector, since features such as the number of
183	polydisperity elements per pd parameter or the coordinated won't change
184	between function evaluations. A new parameter vector is sent for
185	each I(q) evaluation.
186
187	To protect against expensive evaluations taking all the GPU resource
188	on large fits, the entire polydispersity will not be computed at once.
189	Instead, a start and stop location will be sent, indicating where in the
190	polydispersity loop the calculation should start and where it should
191	stop. We can do this for arbitrary start/stop points since we have
192	unwound the nested loop. Instead, we use the same technique as array
193	index translation, using div and mod to figure out the i,j,k,...
194	indices in the virtual nested loop.
195
196	The results array will be initialized to zero for polydispersity loop
197	entry zero, and preserved between calls to [start, stop] so that the
198	results accumulate by the time the loop has completed. Background and
199	scale will be applied when the loop reaches the end. This does require
200	that the results array be allocated read-write, which is less efficient
201	for the GPU, but it makes the calling sequence much more manageable.
202
203	Scale and background cannot be coordinated with other polydisperse parameters
204
205	Oriented objects in 2-D need a spherical correction on the angular variation
206	in order to preserve the 'surface area' of the weight distribution.
207
208	TODO: cutoff
209	*/
210
211	#define MAX_PD 4 // MAX_PD is the max number of polydisperse parameters
212	#define PD_2N 16 // PD_2N is the size of the coordination step table
213
214	typedef struct {
215	int pd_par[MAX_PD]; // index of the nth polydispersity variable
216	int pd_length[MAX_PD]; // length of the nth polydispersity weight vector
217	int pd_offset[MAX_PD]; // offset of pd weights in the par & weight vector
218	int pd_stride[MAX_PD]; // stride to move to the next index at this level
219	int pd_isvol[MAX_PD]; // True if parameter is a volume weighting parameter
220	int par_offset[NPARS]; // offset of par values in the par & weight vector
221	int par_coord[NPARS]; // polydispersity coordination bitvector
222	int fast_coord_index[NPARS]; // index of the fast coordination parameters
223	int fast_coord_count; // number of parameters coordinated with pd 1
224	int theta_var; // id of spherical correction variable
225	int fast_theta; // true if spherical correction depends on pd 1
226	} ProblemDetails;
227
228	typedef struct {
229	PARAMETER_DECL;
230	} ParameterBlock;
231
232	#define FULL_KERNEL_NAME KERNEL_NAME ## _ ## IQ_FUNC
233	KERNEL
234	void FULL_KERNEL_NAME(
235	int nq, // number of q values
236	global const ProblemDetails *problem,
237	global const double *weights,
238	global const double *pars,
239	global const double *q, // nq q values, with padding to boundary
240	global double *result, // nq return values, again with padding
241	const double cutoff, // cutoff in the polydispersity weight product
242	const int pd_start, // where we are in the polydispersity loop
243	const int pd_stop, // where we are stopping in the polydispersity loop
244	)
245	{
246
247	// Storage for the current parameter values. These will be updated as we
248	// walk the polydispersity cube.
249	local ParameterBlock local_pars; // current parameter values
250	const double *parvec = &local_pars; // Alias named parameters with a vector
251
252	local int offset[NPARS-2];
253
254	#if defined(USE_SHORTCUT_OPTIMIZATION)
255	if (pd_length[0] == 1) {
256	// Shouldn't need to copy!!
257	for (int k=0; k < NPARS; k++) {
258	parvec[k] = pars[k+2]; // skip scale and background
259	}
260
261	#ifdef USE_OPENMP
262	#pragma omp parallel for
263	#endif
264	for (int i=0; i < nq; i++) {
265	{
266	const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
267	result[i] += pars[0]*scattering + pars[1];
268	}
269	return;
270	}
271	#endif
272
273
274	// Since we are no longer looping over the entire polydispersity hypercube
275	// for each q, we need to track the normalization values for each q in a
276	// separate work vector.
277	double norm; // contains sum over weights
278	double vol; // contains sum over volume
279	double norm_vol; // contains weights over volume
280
281	// Initialize the results to zero
282	if (pd_start == 0) {
283	norm_vol = 0.0;
284	norm = 0.0;
285	vol = 0.0;
286
287	#ifdef USE_OPENMP
288	#pragma omp parallel for
289	#endif
290	for (int i=0; i < nq; i++) {
291	result[i] = 0.0;
292	}
293	} else {
294	//Pulling values from previous segment
295	norm = result[nq];
296	vol = result[nq+1];
297	norm_vol = results[nq+2];
298	}
299
300	// Location in the polydispersity hypercube, one index per dimension.
301	local int pd_index[PD_MAX];
302
303	// Trigger the reset behaviour that happens at the end the fast loop
304	// by setting the initial index >= weight vector length.
305	pd_index[0] = pd_length[0];
306
307	// need product of weights at every Iq calc, so keep product of
308	// weights from the outer loops so that weight = partial_weight * fast_weight
309	double partial_weight = NAN; // product of weight w4w3w2 but not w1
310	double partial_volweight = NAN;
311	double weight = 1.0; // set to 1 in case there are no weights
312	double vol_weight = 1.0; // set to 1 in case there are no vol weights
313	double spherical_correction = 1.0; // correction for latitude variation
314
315	// Loop over the weights then loop over q, accumulating values
316	for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
317	// check if indices need to be updated
318	if (pd_index[0] >= pd_length[0]) {
319
320	// RESET INDICES
321	pd_index[0] = loop_index%pd_length[0];
322	partial_weight = 1.0;
323	partial_volweight = 1.0;
324	for (int k=1; k < MAX_PD; k++) {
325	pd_index[k] = (loop_index%pd_length[k])/pd_stride[k];
326	const double wi = weights[pd_offset[0]+pd_index[0]];
327	partial_weight *= wi;
328	if (pd_isvol[k]) partial_volweight *= wi;
329	}
330	for (int k=0; k < NPARS; k++) {
331	int coord = par_coord[k];
332	int this_offset = par_offset[k];
333	int block_size = 1;
334	for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
335	if (coord&1) {
336	this_offset += block_size * pd_index[bit];
337	block_size *= pd_length[bit];
338	}
339	coord /= 2;
340	}
341	offset[k] = this_offset;
342	parvec[k] = pars[this_offset];
343	}
344	weight = partial_weight * weights[pd_offset[0]+pd_index[0]]
345	if (theta_var >= 0) {
346	spherical_correction = fabs(cos(M_PI_180*parvec[theta_var]));
347	}
348	if (!fast_theta) weight *= spherical_correction;
349
350	} else {
351
352	// INCREMENT INDICES
353	pd_index[0] += 1;
354	const double wi = weights[pd_offset[0]+pd_index[0]];
355	weight = partial_weight*wi;
356	if (pd_isvol[0]) vol_weight *= wi;
357	for (int k=0; k < problem->fast_coord_count; k++) {
358	parvec[ fast_coord_index[k]]
359	= pars[offset[fast_coord_index[k]] + pd_index[0]];
360	}
361	if (fast_theta) weight = fabs(cos(M_PI_180parvec[theta_var]));
362
363	}
364
365	#ifdef INVALID
366	if (INVALID(local_pars)) continue;
367	#endif
368
369	if (weight > cutoff) {
370	norm += weight;
371	vol += vol_weight * CALL_VOLUME(local_pars);
372	norm_vol += vol_weight;
373
374	#ifdef USE_OPENMP
375	#pragma omp parallel for
376	#endif
377	for (int i=0; i < nq; i++) {
378	{
379	const double scattering = CALL_IQ(q, nq, i, local_pars);
380	//const double scattering = Iq(q[i], IQ_PARAMETERS);
381	result[i] += weight*scattering;
382	}
383	}
384	//Makes a normalization avialable for the next round
385	result[nq] = norm;
386	result[nq+1] = vol;
387	result[nq+2] = norm_vol;
388
389	//End of the PD loop we can normalize
390	if (pd_stop == pd_stride[MAX_PD-1]) {}
391	#ifdef USE_OPENMP
392	#pragma omp parallel for
393	#endif
394	for (int i=0; i < nq; i++) {
395	if (vol*norm_vol != 0.0) {
396	result[i] *= norm_vol/vol;
397	}
398	result[i] = pars[0]*result[i]/norm + pars[1];
399	}
400	}
401	}
402	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.c @ 208f0a4

Download in other formats: