kernel_iq.c @ 0cbcec4

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since 0cbcec4 was 0cbcec4, checked in by wojciech, 8 years ago
Refactoring changes
Property mode set to `100644`
File size: 12.9 KB

Line
1
2	/*
3	##########################################################
4	# #
5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
6	# !! !! #
7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
8	# !! !! #
9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
10	# #
11	##########################################################
12	*/
13
14	/*
15	The environment needs to provide the following #defines
16	USE_OPENCL is defined if running in opencl
17	KERNEL declares a function to be available externally
18	KERNEL_NAME is the name of the function being declared
19	NPARS is the number of parameters in the kernel
20	PARAMETER_TABLE is the declaration of the parameters to the kernel.
21
22	Cylinder:
23
24	#define PARAMETER_TABLE \
25	double length; \
26	double radius; \
27	double sld; \
28	double sld_solvent
29
30	Multi-shell cylinder (10 shell max):
31
32	#define PARAMETER_DECL \
33	double num_shells; \
34	double length; \
35	double radius[10]; \
36	double sld[10]; \
37	double sld_solvent
38
39	PARAMETER_CALL(var) is the declaration of a call to the kernel.
40
41	Cylinder:
42
43	#define PARAMETER_CALL(var) \
44	var.length, \
45	var.radius, \
46	var.sld, \
47	var.sld_solvent
48
49	Multi-shell cylinder:
50	#define PARAMETER_CALL(var) \
51	var.num_shells, \
52	var.length, \
53	var.radius, \
54	var.sld, \
55	var.sld_solvent
56
57	INVALID is a test for model parameters in the correct range
58
59	Cylinder:
60
61	#define INVALID(var) 0
62
63	BarBell:
64
65	#define INVALID(var) (var.bell_radius > var.radius)
66
67	Our design supports a limited number of polydispersity loops, wherein
68	we need to cycle through the values of the polydispersity, calculate
69	the I(q, p) for each combination of parameters, and perform a normalized
70	weighted sum across all the weights. Parameters may be passed to the
71	underlying calculation engine as scalars or vectors, but the polydispersity
72	calculator treats the parameter set as one long vector.
73
74	Let's assume we have 6 polydisperse parameters
75	0 : sld_solvent {s1 = constant}
76	1: sld_material {s2 = constant}
77	2: radius {r = vector of 10pts}
78	3: lenghth {l = vector of 30pts}
79	4: background {b = constant}
80	5: scale {s = constant}
81
82	The polydisperse parameters are stored in as an array of parameter
83	indices, one for each polydisperse parameter, stored in pd_par[n].
84	For the above mentioned expample that will give pd_par = {3, 2, x, x},
85	where x stands for abitrary number and 3 corresponds to longest parameter (lenght).
86	Non-polydisperse parameters do not appear in this array. Each polydisperse
87	parameter has a weight vector whose length is stored in pd_length[n],
88	In our case pd_length = {30,10,0,0}. The weights are stored in
89	a contiguous vector of weights for all parameters, with the starting position
90	for the each parameter stored in pd_offset[n] (pd_offset = {10,0,x,x}.
91	The values corresponding to the weights are stored together in a
92	separate weights[] vector, with offset stored in par_offset[pd_par[n]].
93	In the above mentioned example weight = {r0, .., r9, l0, .., l29}.
94	Polydisperse parameters should be stored in decreasing order of length
95	for highest efficiency.
96
97	We limit the number of polydisperse dimensions to MAX_PD (currently 4).
98	This cuts the size of the structure in half compared to allowing a
99	separate polydispersity for each parameter. This will help a little
100	bit for models with large numbers of parameters, such as the onion model.
101
102	Parameters may be coordinated. That is, we may have the value of one
103	parameter depend on a set of other parameters, some of which may be
104	polydisperse. For example, if sld is inversely proportional to the
105	volume of a cylinder, and the length and radius are independently
106	polydisperse, then for each combination of length and radius we need a
107	separate value for the sld. The caller must provide a coordination table
108	for each parameter containing the value for each parameter given the
109	value of the polydisperse parameters v1, v2, etc. The tables for each
110	parameter are arranged contiguously in a vector, with offset[k] giving the
111	starting location of parameter k in the vector. Each parameter defines
112	coord[k] as a bit mask indicating which polydispersity parameters the
113	parameter depends upon. Usually this is zero, indicating that the parameter
114	is independent, but for the cylinder example given, the bits for the
115	radius and length polydispersity parameters would both be set, the result
116	being a (#radius x #length) table, or maybe a (#length x #radius) table
117	if length comes first in the polydispersity table.
118
119	NB: If we can guarantee that a compiler and OpenCL driver are available,
120	we could instead create the coordination function on the fly for each
121	parameter, saving memory and transfer time, but requiring a C compiler
122	as part of the environment.
123
124	In ordering the polydisperse parameters by decreasing length we can
125	iterate over the longest dispersion weight vector first. All parameters
126	coordinated with this weight vector (the 'fast' parameters), can be
127	updated with a simple increment to the next position in the parameter
128	value table. The indices of these parameters is stored in fast_coord_index[],
129	with fast_coord_count being the number of fast parameters. A total
130	of NPARS slots is allocated to allow for the case that all parameters
131	are coordinated with the fast index, though this will likely be mostly
132	empty. When the fast increment count reaches the end of the weight
133	vector, then the index of the second polydisperse parameter must be
134	incremented, and all of its coordinated parameters updated. Because this
135	operation is not in the inner loop, a slower algorithm can be used.
136
137	If there is no polydispersity we pretend that it is polydisperisty with one
138	parameter.
139
140	The problem details structure can be allocated and sent in as an integer
141	array using the read-only flag. This allows us to copy it once per fit
142	along with the weights vector, since features such as the number of
143	polydisperity elements per pd parameter or the coordinated won't change
144	between function evaluations. A new parameter vector is sent for
145	each I(q) evaluation.
146
147	To protect against expensive evaluations taking all the GPU resource
148	on large fits, the entire polydispersity will not be computed at once.
149	Instead, a start and stop location will be sent, indicating where in the
150	polydispersity loop the calculation should start and where it should
151	stop. We can do this for arbitrary start/stop points since we have
152	unwound the nested loop. Instead, we use the same technique as array
153	index translation, using div and mod to figure out the i,j,k,...
154	indices in the virtual nested loop.
155
156	The results array will be initialized to zero for polydispersity loop
157	entry zero, and preserved between calls to [start, stop] so that the
158	results accumulate by the time the loop has completed. Background and
159	scale will be applied when the loop reaches the end. This does require
160	that the results array be allocated read-write, which is less efficient
161	for the GPU, but it makes the calling sequence much more manageable.
162
163	Scale and background cannot be coordinated with other polydisperse parameters
164
165	TODO: cutoff
166	*/
167
168	#define MAX_PD 4 // MAX_PD is the max number of polydisperse parameters
169	#define PD_2N 16 // PD_2N is the size of the coordination step table
170
171	typedef struct {
172	int pd_par[MAX_PD]; // index of the nth polydispersity variable
173	int pd_length[MAX_PD]; // length of the nth polydispersity weight vector
174	int pd_offset[MAX_PD]; // offset of pd weights in the par & weight vector
175	int pd_stride[MAX_PD]; // stride to move to the next index at this level
176	int par_offset[NPARS]; // offset of par values in the par & weight vector
177	int par_coord[NPARS]; // polydispersity coordination bitvector
178	int fast_coord_count; // number of parameters coordinated with pd 1
179	int fast_coord_index[NPARS]; // index of the fast coordination parameters
180	} ProblemDetails;
181
182	typedef struct {
183	PARAMETER_DECL;
184	} ParameterBlock;
185
186
187	KERNEL
188	void KERNEL_NAME(
189	int nq, // number of q values
190	global const ProblemDetails *problem,
191	global const double *weights,
192	global const double *pars,
193	global const double *q, // nq q values, with padding to boundary
194	global double *result, // nq return values, again with padding
195	global int *offset, // nq+padding space for current parameter index
196	const double cutoff, // cutoff in the polydispersity weight product
197	const int pd_start, // where we are in the polydispersity loop
198	const int pd_stop, // where we are stopping in the polydispersity loop
199	)
200	{
201
202	// Storage for the current parameter values. These will be updated as we
203	// walk the polydispersity cube.
204	local ParameterBlock local_pars;
205	const double *parvec = &local_pars; // Alias named parameters with a vector
206
207	#if defined(USE_SHORTCUT_OPTIMIZATION)
208	if (pd_length[0] == 1) {
209	// Shouldn't need to copy!!
210	for (int k=0; k < NPARS; k++) {
211	parvec[k] = pars[k];
212	}
213
214	#ifdef USE_OPENMP
215	#pragma omp parallel for
216	#endif
217	for (int i=0; i < nq; i++) {
218	{
219	const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
220	result[i] += local_pars.scale*scattering + local_pars.background;
221	}
222	return;
223	}
224	#endif
225
226
227	// Since we are no longer looping over the entire polydispersity hypercube
228	// for each q, we need to track the normalization values for each q in a
229	// separate work vector.
230	double norm; // contains sum over weights
231	double vol; // contains sum over volume
232	double norm_vol; // contains weights over volume
233
234	// Initialize the results to zero
235	if (pd_start == 0) {
236	norm_vol = 0.0;
237	norm = 0.0;
238	vol = 0.0;
239
240	#ifdef USE_OPENMP
241	#pragma omp parallel for
242	#endif
243	for (int i=0; i < nq; i++) {
244	result[i] = 0.0;
245	}
246	} else {
247	//Pulling values from previous segment
248	norm = result[nq];
249	vol = result[nq+1];
250	norm_vol = results[nq+2];
251	}
252
253	// Location in the polydispersity hypercube, one index per dimension.
254	local int pd_index[PD_MAX];
255
256	// Set the initial index greater than its vector length in order to
257	// trigger the reset behaviour that happens at the end the fast loop.
258	pd_index[0] = pd_length[0];
259
260	// need product of weights at every Iq calc, so keep product of
261	// weights from the outer loops so that weight = partial_weight * fast_weight
262	double partial_weight = NAN; // product of weight w4w3w2 but not w1
263
264	// Loop over the weights then loop over q, accumulating values
265	for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
266	// check if indices need to be updated
267	if (pd_index[0] >= pd_length[0]) {
268	pd_index[0] = loop_index%pd_length[0];
269	partial_weight = 1.0;
270	for (int k=1; k < MAX_PD; k++) {
271	pd_index[k] = (loop_index%pd_length[k])/pd_stride[k];
272	partial_weight *= weights[pd_offset[k]+pd_index[k]];
273	}
274	weight = partial_weight * weights[pd_offset[0]+pd_index[0]]
275	for (int k=0; k < NPARS; k++) {
276	int coord = par_coord[k];
277	int this_offset = par_offset[k];
278	int block_size = 1;
279	for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
280	if (coord&1) {
281	this_offset += block_size * pd_index[bit];
282	block_size *= pd_length[bit];
283	}
284	coord /= 2;
285	}
286	offset[k] = this_offset;
287	parvec[k] = pars[this_offset];
288	}
289	} else {
290	pd_index[0] += 1;
291	weight = partial_weight*weights[pd_offset[0]+pd_index[0]];
292	for (int k=0; k < problem->fast_coord_count; k++) {
293	parvec[ fast_coord_index[k]]
294	= pars[offset[fast_coord_index[k]] + pd_index[0]];
295	}
296	}
297	#ifdef INVALID
298	if (INVALID(local_pars)) continue;
299	#endif
300	if (weight > cutoff) {
301	const double vol_weight = VOLUME_WEIGHT_PRODUCT;
302	const double weighted_vol = vol_weight*form_volume(VOLUME_PARAMETERS);
303	norm += weight;
304	vol += weighted_vol;
305	norm_vol += vol_weight;
306
307	#ifdef USE_OPENMP
308	#pragma omp parallel for
309	#endif
310	for (int i=0; i < nq; i++) {
311	{
312	const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
313	//const double scattering = Iq(q[i], IQ_PARAMETERS);
314	result[i] += weight*scattering;
315	}
316	}
317	//Makes a normalization avialable for the next round
318	result[nq] = norm;
319	result[nq+1] = vol;
320	results[nq+2] = norm_vol;
321
322	//End of the PD loop we can normalize
323	if (pd_stop == pd_stride[MAX_PD-1]) {}
324	#ifdef USE_OPENMP
325	#pragma omp parallel for
326	#endif
327	for (int i=0; i < nq; i++) {
328	if (vol*norm_vol != 0.0) {
329	result[i] *= norm_vol/vol;
330	}
331	result[i] = local_pars.scale*result[i]/norm+local_pars.background;
332	}
333	}
334	}
335	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.c @ 0cbcec4

Download in other formats: