kernel_iq.c @ f35f1dd

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since f35f1dd was f35f1dd, checked in by Paul Kienzle <pkienzle@…>, 8 years ago
do volume weighting in new kernel
Property mode set to `100644`
File size: 14.6 KB

Line
1
2	/*
3	##########################################################
4	# #
5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
6	# !! !! #
7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
8	# !! !! #
9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
10	# #
11	##########################################################
12	*/
13
14	/*
15	The environment needs to provide the following #defines:
16
17	USE_OPENCL is defined if running in opencl
18	KERNEL declares a function to be available externally
19	KERNEL_NAME is the name of the function being declared
20	NPARS is the number of parameters in the kernel
21	PARAMETER_DECL is the declaration of the parameters to the kernel.
22
23	Cylinder:
24
25	#define PARAMETER_DECL \
26	double length; \
27	double radius; \
28	double sld; \
29	double sld_solvent
30
31	Note: scale and background are not included
32
33	Multi-shell cylinder (10 shell max):
34
35	#define PARAMETER_DECL \
36	double num_shells; \
37	double length; \
38	double radius[10]; \
39	double sld[10]; \
40	double sld_solvent
41
42	PARAMETER_CALL(var) is the declaration of a call to the kernel.
43
44	Cylinder:
45
46	#define PARAMETER_CALL(var) \
47	var.length, \
48	var.radius, \
49	var.sld, \
50	var.sld_solvent
51
52	Multi-shell cylinder:
53	#define PARAMETER_CALL(var) \
54	var.num_shells, \
55	var.length, \
56	var.radius, \
57	var.sld, \
58	var.sld_solvent
59
60	INVALID is a test for model parameters in the correct range
61
62	Cylinder:
63
64	#define INVALID(var) 0
65
66	BarBell:
67
68	#define INVALID(var) (var.bell_radius > var.radius)
69
70	Model with complicated constraints:
71
72	inline bool constrained(p1, p2, p3) { return expression; }
73	#define INVALID(var) constrained(var.p1, var.p2, var.p3)
74
75	IQ_FUNC could be Iq or Iqxy
76	IQ_PARS could be q[i] or q[2i],q[2i+1]
77
78	Our design supports a limited number of polydispersity loops, wherein
79	we need to cycle through the values of the polydispersity, calculate
80	the I(q, p) for each combination of parameters, and perform a normalized
81	weighted sum across all the weights. Parameters may be passed to the
82	underlying calculation engine as scalars or vectors, but the polydispersity
83	calculator treats the parameter set as one long vector.
84
85	Let's assume we have 6 parameters in the model, with two polydisperse::
86
87	0: scale {scl = constant}
88	1: background {bkg = constant}
89	5: length {l = vector of 30pts}
90	4: radius {r = vector of 10pts}
91	3: sld {s = constant/(radius*2length)}
92	2: sld_solvent {s2 = constant}
93
94	This generates the following call to the kernel (where x stands for an
95	arbitrary value that is not used by the kernel evaluator):
96
97	NPARS = 4 // scale and background are in all models
98	problem {
99	pd_par = {5, 4, x, x} // parameters radius and length vary
100	pd_length = {30, 10, 0, 0} // length has more, so it is first
101	pd_offset = {10, 0, x, x} // length starts at index 10 in weights
102	pd_stride = {1, 30, 300, 300} // cumulative product of pd length
103	pd_isvol = {1, 1, x, x} // true if weight is a volume weight
104	par_offset = {2, 3, 303, 313} // parameter offsets
105	par_coord = {0, 3, 2, 1} // bitmap of parameter dependencies
106	fast_coord_count = 2 // two parameters vary with length distribution
107	fast_coord_index = {5, 3, x, x, x, x}
108	}
109
110	weight = { l0, .., l29, r0, .., r9}
111	pars = { scl, bkg, l0, ..., l29, r0, r1, ..., r9,
112	s[l0,r0], ... s[l0,r9], s[l1,r0], ... s[l29,r9] , s2}
113
114	nq = 130
115	q = { q0, q1, ..., q130, x, x } # pad to 8 element boundary
116	result = {r1, ..., r130, norm, vol, vol_norm, x, x, x, x, x, x, x}
117
118
119	The polydisperse parameters are stored in as an array of parameter
120	indices, one for each polydisperse parameter, stored in pd_par[n].
121	Non-polydisperse parameters do not appear in this array. Each polydisperse
122	parameter has a weight vector whose length is stored in pd_length[n],
123	The weights are stored in a contiguous vector of weights for all
124	parameters, with the starting position for the each parameter stored
125	in pd_offset[n]. The values corresponding to the weights are stored
126	together in a separate weights[] vector, with offset stored in
127	par_offset[pd_par[n]]. Polydisperse parameters should be stored in
128	decreasing order of length for highest efficiency.
129
130	We limit the number of polydisperse dimensions to MAX_PD (currently 4).
131	This cuts the size of the structure in half compared to allowing a
132	separate polydispersity for each parameter. This will help a little
133	bit for models with large numbers of parameters, such as the onion model.
134
135	Parameters may be coordinated. That is, we may have the value of one
136	parameter depend on a set of other parameters, some of which may be
137	polydisperse. For example, if sld is inversely proportional to the
138	volume of a cylinder, and the length and radius are independently
139	polydisperse, then for each combination of length and radius we need a
140	separate value for the sld. The caller must provide a coordination table
141	for each parameter containing the value for each parameter given the
142	value of the polydisperse parameters v1, v2, etc. The tables for each
143	parameter are arranged contiguously in a vector, with offset[k] giving the
144	starting location of parameter k in the vector. Each parameter defines
145	coord[k] as a bit mask indicating which polydispersity parameters the
146	parameter depends upon. Usually this is zero, indicating that the parameter
147	is independent, but for the cylinder example given, the bits for the
148	radius and length polydispersity parameters would both be set, the result
149	being a (#radius x #length) table, or maybe a (#length x #radius) table
150	if length comes first in the polydispersity table.
151
152	NB: If we can guarantee that a compiler and OpenCL driver are available,
153	we could instead create the coordination function on the fly for each
154	parameter, saving memory and transfer time, but requiring a C compiler
155	as part of the environment.
156
157	In ordering the polydisperse parameters by decreasing length we can
158	iterate over the longest dispersion weight vector first. All parameters
159	coordinated with this weight vector (the 'fast' parameters), can be
160	updated with a simple increment to the next position in the parameter
161	value table. The indices of these parameters is stored in fast_coord_index[],
162	with fast_coord_count being the number of fast parameters. A total
163	of NPARS slots is allocated to allow for the case that all parameters
164	are coordinated with the fast index, though this will likely be mostly
165	empty. When the fast increment count reaches the end of the weight
166	vector, then the index of the second polydisperse parameter must be
167	incremented, and all of its coordinated parameters updated. Because this
168	operation is not in the inner loop, a slower algorithm can be used.
169
170	If there is no polydispersity we pretend that it is polydisperisty with one
171	parameter, pd_start=0 and pd_stop=1. We may or may not short circuit the
172	calculation in this case, depending on how much time it saves.
173
174	The problem details structure can be allocated and sent in as an integer
175	array using the read-only flag. This allows us to copy it once per fit
176	along with the weights vector, since features such as the number of
177	polydisperity elements per pd parameter or the coordinated won't change
178	between function evaluations. A new parameter vector is sent for
179	each I(q) evaluation.
180
181	To protect against expensive evaluations taking all the GPU resource
182	on large fits, the entire polydispersity will not be computed at once.
183	Instead, a start and stop location will be sent, indicating where in the
184	polydispersity loop the calculation should start and where it should
185	stop. We can do this for arbitrary start/stop points since we have
186	unwound the nested loop. Instead, we use the same technique as array
187	index translation, using div and mod to figure out the i,j,k,...
188	indices in the virtual nested loop.
189
190	The results array will be initialized to zero for polydispersity loop
191	entry zero, and preserved between calls to [start, stop] so that the
192	results accumulate by the time the loop has completed. Background and
193	scale will be applied when the loop reaches the end. This does require
194	that the results array be allocated read-write, which is less efficient
195	for the GPU, but it makes the calling sequence much more manageable.
196
197	Scale and background cannot be coordinated with other polydisperse parameters
198
199	TODO: cutoff
200	*/
201
202	#define MAX_PD 4 // MAX_PD is the max number of polydisperse parameters
203	#define PD_2N 16 // PD_2N is the size of the coordination step table
204
205	typedef struct {
206	int pd_par[MAX_PD]; // index of the nth polydispersity variable
207	int pd_length[MAX_PD]; // length of the nth polydispersity weight vector
208	int pd_offset[MAX_PD]; // offset of pd weights in the par & weight vector
209	int pd_stride[MAX_PD]; // stride to move to the next index at this level
210	int pd_isvol[MAX_PD]; // True if parameter is a volume weighting parameter
211	int par_offset[NPARS]; // offset of par values in the par & weight vector
212	int par_coord[NPARS]; // polydispersity coordination bitvector
213	int fast_coord_count; // number of parameters coordinated with pd 1
214	int fast_coord_index[NPARS]; // index of the fast coordination parameters
215	} ProblemDetails;
216
217	typedef struct {
218	PARAMETER_DECL;
219	} ParameterBlock;
220
221	#define FULL_KERNEL_NAME KERNEL_NAME ## _ ## IQ_FUNC
222	KERNEL
223	void FULL_KERNEL_NAME(
224	int nq, // number of q values
225	global const ProblemDetails *problem,
226	global const double *weights,
227	global const double *pars,
228	global const double *q, // nq q values, with padding to boundary
229	global double *result, // nq return values, again with padding
230	const double cutoff, // cutoff in the polydispersity weight product
231	const int pd_start, // where we are in the polydispersity loop
232	const int pd_stop, // where we are stopping in the polydispersity loop
233	)
234	{
235
236	// Storage for the current parameter values. These will be updated as we
237	// walk the polydispersity cube.
238	local ParameterBlock local_pars; // current parameter values
239	const double *parvec = &local_pars; // Alias named parameters with a vector
240
241	local int offset[NPARS-2];
242
243	#if defined(USE_SHORTCUT_OPTIMIZATION)
244	if (pd_length[0] == 1) {
245	// Shouldn't need to copy!!
246	for (int k=0; k < NPARS; k++) {
247	parvec[k] = pars[k+2]; // skip scale and background
248	}
249
250	#ifdef USE_OPENMP
251	#pragma omp parallel for
252	#endif
253	for (int i=0; i < nq; i++) {
254	{
255	const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
256	result[i] += pars[0]*scattering + pars[1];
257	}
258	return;
259	}
260	#endif
261
262
263	// Since we are no longer looping over the entire polydispersity hypercube
264	// for each q, we need to track the normalization values for each q in a
265	// separate work vector.
266	double norm; // contains sum over weights
267	double vol; // contains sum over volume
268	double norm_vol; // contains weights over volume
269
270	// Initialize the results to zero
271	if (pd_start == 0) {
272	norm_vol = 0.0;
273	norm = 0.0;
274	vol = 0.0;
275
276	#ifdef USE_OPENMP
277	#pragma omp parallel for
278	#endif
279	for (int i=0; i < nq; i++) {
280	result[i] = 0.0;
281	}
282	} else {
283	//Pulling values from previous segment
284	norm = result[nq];
285	vol = result[nq+1];
286	norm_vol = results[nq+2];
287	}
288
289	// Location in the polydispersity hypercube, one index per dimension.
290	local int pd_index[PD_MAX];
291
292	// Trigger the reset behaviour that happens at the end the fast loop
293	// by setting the initial index >= weight vector length.
294	pd_index[0] = pd_length[0];
295
296	// need product of weights at every Iq calc, so keep product of
297	// weights from the outer loops so that weight = partial_weight * fast_weight
298	double partial_weight = NAN; // product of weight w4w3w2 but not w1
299	double partial_volweight = NAN;
300	double weight = 1.0; // set to 1 in case there are no weights
301	double vol_weight = 1.0; // set to 1 in case there are no vol weights
302
303	// Loop over the weights then loop over q, accumulating values
304	for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
305	// check if indices need to be updated
306	if (pd_index[0] >= pd_length[0]) {
307	pd_index[0] = loop_index%pd_length[0];
308	partial_weight = 1.0;
309	partial_volweight = 1.0;
310	for (int k=1; k < MAX_PD; k++) {
311	pd_index[k] = (loop_index%pd_length[k])/pd_stride[k];
312	const double wi = weights[pd_offset[0]+pd_index[0]];
313	partial_weight *= wi;
314	if (pd_isvol[k]) partial_volweight *= wi;
315	}
316	weight = partial_weight * weights[pd_offset[0]+pd_index[0]]
317	for (int k=0; k < NPARS; k++) {
318	int coord = par_coord[k];
319	int this_offset = par_offset[k];
320	int block_size = 1;
321	for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
322	if (coord&1) {
323	this_offset += block_size * pd_index[bit];
324	block_size *= pd_length[bit];
325	}
326	coord /= 2;
327	}
328	offset[k] = this_offset;
329	parvec[k] = pars[this_offset];
330	}
331	} else {
332	pd_index[0] += 1;
333	const double wi = weights[pd_offset[0]+pd_index[0]];
334	weight = partial_weight*wi;
335	if (pd_isvol[0]) vol_weight *= wi;
336	for (int k=0; k < problem->fast_coord_count; k++) {
337	parvec[ fast_coord_index[k]]
338	= pars[offset[fast_coord_index[k]] + pd_index[0]];
339	}
340	}
341	#ifdef INVALID
342	if (INVALID(local_pars)) continue;
343	#endif
344	if (weight > cutoff) {
345	norm += weight;
346	vol += vol_weight * form_volume(VOLUME_PARAMETERS);
347	norm_vol += vol_weight;
348
349	#ifdef USE_OPENMP
350	#pragma omp parallel for
351	#endif
352	for (int i=0; i < nq; i++) {
353	{
354	const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
355	//const double scattering = Iq(q[i], IQ_PARAMETERS);
356	result[i] += weight*scattering;
357	}
358	}
359	//Makes a normalization avialable for the next round
360	result[nq] = norm;
361	result[nq+1] = vol;
362	result[nq+2] = norm_vol;
363
364	//End of the PD loop we can normalize
365	if (pd_stop == pd_stride[MAX_PD-1]) {}
366	#ifdef USE_OPENMP
367	#pragma omp parallel for
368	#endif
369	for (int i=0; i < nq; i++) {
370	if (vol*norm_vol != 0.0) {
371	result[i] *= norm_vol/vol;
372	}
373	result[i] = pars[0]*result[i]/norm + pars[1];
374	}
375	}
376	}
377	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.c @ f35f1dd

Download in other formats: