kernel_iq.c @ 44f39a2

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since 44f39a2 was 44f39a2, checked in by wojciech, 8 years ago
A few comments added
Property mode set to `100644`
File size: 11.8 KB

Line
1
2	/*
3	##########################################################
4	# #
5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
6	# !! !! #
7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
8	# !! !! #
9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
10	# #
11	##########################################################
12	*/
13
14	/*
15	The environment needs to provide the following #defines
16	USE_OPENCL is defined if running in opencl
17	KERNEL declares a function to be available externally
18	KERNEL_NAME is the name of the function being declared
19	NPARS is the number of parameters in the kernel
20	PARAMETER_TABLE is the declaration of the parameters to the kernel.
21
22	Cylinder:
23
24	#define PARAMETER_TABLE \
25	double length; \
26	double radius; \
27	double sld; \
28	double sld_solvent
29
30	Multi-shell cylinder (10 shell max):
31
32	#define PARAMETER_DECL \
33	double num_shells; \
34	double length; \
35	double radius[10]; \
36	double sld[10]; \
37	double sld_solvent
38
39	PARAMETER_CALL(var) is the declaration of a call to the kernel.
40
41	Cylinder:
42
43	#define PARAMETER_CALL(var) \
44	var.length, \
45	var.radius, \
46	var.sld, \
47	var.sld_solvent
48
49	Multi-shell cylinder:
50	#define PARAMETER_CALL(var) \
51	var.num_shells, \
52	var.length, \
53	var.radius, \
54	var.sld, \
55	var.sld_solvent
56
57	Our design supports a limited number of polydispersity loops, wherein
58	we need to cycle through the values of the polydispersity, calculate
59	the I(q, p) for each combination of parameters, and perform a normalized
60	weighted sum across all the weights. Parameters may be passed to the
61	underlying calculation engine as scalars or vectors, but the polydispersity
62	calculator treats the parameter set as one long vector.
63
64	Let's assume we have 6 polydisperse parameters
65	0 : sld_solvent {s1 = constant}
66	1: sld_material {s2 = constant}
67	2: radius {r = vector of 10pts}
68	3: lenghth {l = vector of 30pts}
69	4: background {b = constant}
70	5: scale {s = constant}
71
72	The polydisperse parameters are stored in as an array of parameter
73	indices, one for each polydisperse parameter, stored in pd_par[n].
74	For the above mentioned expample that will give pd_par = {3, 2, x, x},
75	where x stands for abitrary number and 3 corresponds to longest parameter (lenght).
76	Non-polydisperse parameters do not appear in this array. Each polydisperse
77	parameter has a weight vector whose length is stored in pd_length[n],
78	In our case pd_length = {30,10,0,0}. The weights are stored in
79	a contiguous vector of weights for all parameters, with the starting position
80	for the each parameter stored in pd_offset[n] (pd_offset = {10,0,x,x}.
81	The values corresponding to the weights are stored together in a
82	separate weights[] vector, with offset stored in par_offset[pd_par[n]].
83	In the above mentioned example weight = {r0, .., r9, l0, .., l29}.
84	Polydisperse parameters should be stored in decreasing order of length
85	for highest efficiency.
86
87	We limit the number of polydisperse dimensions to MAX_PD (currently 4).
88	This cuts the size of the structure in half compared to allowing a
89	separate polydispersity for each parameter. This will help a little
90	bit for models with large numbers of parameters, such as the onion model.
91
92	Parameters may be coordinated. That is, we may have the value of one
93	parameter depend on a set of other parameters, some of which may be
94	polydisperse. For example, if sld is inversely proportional to the
95	volume of a cylinder, and the length and radius are independently
96	polydisperse, then for each combination of length and radius we need a
97	separate value for the sld. The caller must provide a coordination table
98	for each parameter containing the value for each parameter given the
99	value of the polydisperse parameters v1, v2, etc. The tables for each
100	parameter are arranged contiguously in a vector, with offset[k] giving the
101	starting location of parameter k in the vector. Each parameter defines
102	coord[k] as a bit mask indicating which polydispersity parameters the
103	parameter depends upon. Usually this is zero, indicating that the parameter
104	is independent, but for the cylinder example given, the bits for the
105	radius and length polydispersity parameters would both be set, the result
106	being a (#radius x #length) table, or maybe a (#length x #radius) table
107	if length comes first in the polydispersity table.
108
109	NB: If we can guarantee that a compiler and OpenCL driver are available,
110	we could instead create the coordination function on the fly for each
111	parameter, saving memory and transfer time, but requiring a C compiler
112	as part of the environment.
113
114	In ordering the polydisperse parameters by decreasing length we can
115	iterate over the longest dispersion weight vector first. All parameters
116	coordinated with this weight vector (the 'fast' parameters), can be
117	updated with a simple increment to the next position in the parameter
118	value table. The indices of these parameters is stored in fast_coord_index[],
119	with fast_coord_count being the number of fast parameters. A total
120	of NPARS slots is allocated to allow for the case that all parameters
121	are coordinated with the fast index, though this will likely be mostly
122	empty. When the fast increment count reaches the end of the weight
123	vector, then the index of the second polydisperse parameter must be
124	incremented, and all of its coordinated parameters updated. Because this
125	operation is not in the inner loop, a slower algorithm can be used.
126
127	The problem details structure can be allocated and sent in as an integer
128	array using the read-only flag. This allows us to copy it once per fit
129	along with the weights vector, since features such as the number of
130	polydisperity elements per pd parameter or the coordinated won't change
131	between function evaluations. A new parameter vector is sent for
132	each I(q) evaluation.
133
134	To protect against expensive evaluations taking all the GPU resource
135	on large fits, the entire polydispersity will not be computed at once.
136	Instead, a start and stop location will be sent, indicating where in the
137	polydispersity loop the calculation should start and where it should
138	stop. We can do this for arbitrary start/stop points since we have
139	unwound the nested loop. Instead, we use the same technique as array
140	index translation, using div and mod to figure out the i,j,k,...
141	indices in the virtual nested loop.
142
143	The results array will be initialized to zero for polydispersity loop
144	entry zero, and preserved between calls to [start, stop] so that the
145	results accumulate by the time the loop has completed. Background and
146	scale will be applied when the loop reaches the end. This does require
147	that the results array be allocated read-write, which is less efficient
148	for the GPU, but it makes the calling sequence much more manageable.
149	*/
150
151	#define MAX_PD 4 // MAX_PD is the max number of polydisperse parameters
152	#define PD_2N 16 // PD_2N is the size of the coordination step table
153
154	typedef struct {
155	int pd_par[MAX_PD]; // index of the nth polydispersity variable
156	int pd_length[MAX_PD]; // length of the nth polydispersity weight vector
157	int pd_offset[MAX_PD]; // offset of pd weights in the par & weight vector
158	int pd_stride[MAX_PD]; // stride to move to the next index at this level
159	int par_offset[NPARS]; // offset of par values in the par & weight vector
160	int par_coord[NPARS]; // polydispersity coordination bitvector
161	int fast_coord_count; // number of parameters coordinated with pd 1
162	int fast_coord_index[NPARS]; // index of the fast coordination parameters
163	} ProblemDetails;
164
165	typedef struct {
166	PARAMETER_DECL;
167	} ParameterBlock;
168
169
170	KERNEL
171	void KERNEL_NAME(
172	int nq, // number of q values
173	global const ProblemDetails *problem,
174	global const double *weights,
175	global const double *pars,
176	global const double *q, // nq q values, with padding to boundary
177	global double *result, // nq return values, again with padding
178	global double work, // 3(nq+padding) space for normalization
179	global int *int_work, // nq+padding space for current position
180	const double cutoff, // cutoff in the polydispersity weight product
181	const int pd_start, // where we are in the polydispersity loop
182	const int pd_stop, // where we are stopping in the polydispersity loop
183	)
184	{
185	// Storage for the current parameter values. These will be updated as we
186	// walk the polydispersity cube.
187	local ParameterBlock local_pars;
188	const double *parvec = &local_pars; // Alias named parameters with a vector
189
190	// Since we are no longer looping over the entire polydispersity hypercube
191	// for each q, we need to track the normalization values for each q in a
192	// separate work vector.
193	double *norm = work; // contains sum over weights
194	double *vol = norm + (nq + padding); // contains sum over volume
195	double *norm_vol = vol + (nq + padding);
196
197	// Initialize the results to zero
198	if (pd_start == 0) {
199	#ifdef USE_OPENMP
200	#pragma omp parallel for
201	#endif
202	for (int i=0; i < Nq; i++) {
203	norm_vol[i] = 0.0;
204	norm[i] = 0.0;
205	vol[i] = 0.0;
206	result[i] = 0.0;
207	}
208	}
209
210	// Location in the polydispersity cube, one index per dimension.
211	local int pd_index[PD_MAX];
212
213	// Set the initial index greater than its vector length in order to
214	// trigger the reset behaviour that happens at the end the fast loop.
215	pd_index[0] = pd_length[0];
216
217	// Loop over the weights then loop over q, accumulating values
218	// par
219	double partial_weight = NaN;
220	for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
221	// check if indices need to be updated
222	if (pd_index[0] >= pd_length[0]) {
223	pd_index[0] = loop_index%pd_length[0];
224	partial_weight = 1.0;
225	for (int k=0; k < MAX_PD; k++) {
226	pd_index[k] = (loop_index%pd_length[k])/pd_stride[k];
227	partial_weight *= weights[pd_offset[k]+pd_index[k]];
228	}
229	weight = partial_weight * weights[pd_offset[0]+pd_index[0]]
230	for (int k=0; k < NPARS; k++) {
231	int coord = par_coord[k];
232	int this_offset = 0;
233	int block_size = 1;
234	for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
235	if (coord&1) {
236	this_offset += block_size * pd_index[bit];
237	block_size *= pd_length[bit];
238	}
239	coord /= 2;
240	}
241	offset[k] = this_offset;
242	}
243	} else {
244	pd_index[0] += 1;
245	weight = partial_weight*weights[pd_offset[0]+pd_index[0]];
246	for (int k=0; k < problem->fast_coord_count; k++) {
247	parvec[ fast_coord_index[k]]
248	= pars[offset[fast_coord_index[k]] + pd_index[0]];
249	}
250	}
251	if (weight > cutoff) {
252	const double vol_weight = VOLUME_WEIGHT_PRODUCT;
253	const double weighted_vol = vol_weight*form_volume(VOLUME_PARAMTERS);
254	#ifdef USE_OPENMP
255	#pragma omp parallel for
256	#endif
257	for (int i=0; i < Nq; i++) {
258	{
259	const double scattering = Iq(qi, IQ_PARAMETERS);
260	// allow kernels to exclude invalid regions by returning NaN
261	if (!isnan(scattering)) {
262	result[i] += weight*scattering;
263	// can almost get away with only having one constant rather than
264	// one constant per q. Maybe want a "is_valid" test?
265	norm[i] += weight;
266	vol[i] += weighted_vol;
267	norm_vol[i] += vol_weight;
268	}
269	}
270	}
271
272	if (pd_stop == pd_stride[MAX_PD-1]) {}
273	#ifdef USE_OPENMP
274	#pragma omp parallel for
275	#endif
276	for (int i=0; i < Nq; i++) {
277	if (vol[i]*norm_vol[i] != 0.0) {
278	result[i] *= norm_vol[i]/vol[i];
279	}
280	result[i] = scale*result[i]/norm[i]+background;
281	}
282	}
283	}
284	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.c @ 44f39a2

Download in other formats: