kernel_iq.c @ 3044216

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since 3044216 was 3044216, checked in by wojciech, 8 years ago
Polydispersity loop proototype
Property mode set to `100644`
File size: 12.8 KB

Rev	Line
[2e44ac7]	1
	2	/*
	3	##########################################################
	4	# #
	5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	6	# !! !! #
	7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
	8	# !! !! #
	9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	10	# #
	11	##########################################################
	12	*/
	13
	14	/*
	15	The environment needs to provide the following #defines
	16	USE_OPENCL is defined if running in opencl
	17	KERNEL declares a function to be available externally
	18	KERNEL_NAME is the name of the function being declared
	19	NPARS is the number of parameters in the kernel
	20	PARAMETER_TABLE is the declaration of the parameters to the kernel.
	21
	22	Cylinder:
	23
	24	#define PARAMETER_TABLE \
	25	double length; \
	26	double radius; \
	27	double sld; \
	28	double sld_solvent
	29
	30	Multi-shell cylinder (10 shell max):
	31
	32	#define PARAMETER_DECL \
	33	double num_shells; \
	34	double length; \
	35	double radius[10]; \
	36	double sld[10]; \
	37	double sld_solvent
	38
	39	PARAMETER_CALL(var) is the declaration of a call to the kernel.
	40
	41	Cylinder:
	42
	43	#define PARAMETER_CALL(var) \
	44	var.length, \
	45	var.radius, \
	46	var.sld, \
	47	var.sld_solvent
	48
	49	Multi-shell cylinder:
	50	#define PARAMETER_CALL(var) \
	51	var.num_shells, \
	52	var.length, \
	53	var.radius, \
	54	var.sld, \
	55	var.sld_solvent
	56
[3044216]	57	INVALID is a test for model parameters in the correct range
	58
	59	Cylinder:
	60
	61	#define INVALID(var) 0
	62
	63	BarBell:
	64
	65	#define INVALID(var) (var.bell_radius > var.radius)
	66
[2e44ac7]	67	Our design supports a limited number of polydispersity loops, wherein
	68	we need to cycle through the values of the polydispersity, calculate
	69	the I(q, p) for each combination of parameters, and perform a normalized
	70	weighted sum across all the weights. Parameters may be passed to the
	71	underlying calculation engine as scalars or vectors, but the polydispersity
	72	calculator treats the parameter set as one long vector.
	73
[44f39a2]	74	Let's assume we have 6 polydisperse parameters
	75	0 : sld_solvent {s1 = constant}
	76	1: sld_material {s2 = constant}
	77	2: radius {r = vector of 10pts}
	78	3: lenghth {l = vector of 30pts}
	79	4: background {b = constant}
	80	5: scale {s = constant}
	81
[2e44ac7]	82	The polydisperse parameters are stored in as an array of parameter
	83	indices, one for each polydisperse parameter, stored in pd_par[n].
[44f39a2]	84	For the above mentioned expample that will give pd_par = {3, 2, x, x},
	85	where x stands for abitrary number and 3 corresponds to longest parameter (lenght).
[2e44ac7]	86	Non-polydisperse parameters do not appear in this array. Each polydisperse
[44f39a2]	87	parameter has a weight vector whose length is stored in pd_length[n],
	88	In our case pd_length = {30,10,0,0}. The weights are stored in
	89	a contiguous vector of weights for all parameters, with the starting position
	90	for the each parameter stored in pd_offset[n] (pd_offset = {10,0,x,x}.
	91	The values corresponding to the weights are stored together in a
	92	separate weights[] vector, with offset stored in par_offset[pd_par[n]].
	93	In the above mentioned example weight = {r0, .., r9, l0, .., l29}.
	94	Polydisperse parameters should be stored in decreasing order of length
	95	for highest efficiency.
[2e44ac7]	96
	97	We limit the number of polydisperse dimensions to MAX_PD (currently 4).
	98	This cuts the size of the structure in half compared to allowing a
	99	separate polydispersity for each parameter. This will help a little
	100	bit for models with large numbers of parameters, such as the onion model.
	101
	102	Parameters may be coordinated. That is, we may have the value of one
	103	parameter depend on a set of other parameters, some of which may be
	104	polydisperse. For example, if sld is inversely proportional to the
	105	volume of a cylinder, and the length and radius are independently
	106	polydisperse, then for each combination of length and radius we need a
	107	separate value for the sld. The caller must provide a coordination table
	108	for each parameter containing the value for each parameter given the
[44f39a2]	109	value of the polydisperse parameters v1, v2, etc. The tables for each
[2e44ac7]	110	parameter are arranged contiguously in a vector, with offset[k] giving the
	111	starting location of parameter k in the vector. Each parameter defines
	112	coord[k] as a bit mask indicating which polydispersity parameters the
[a10da8b]	113	parameter depends upon. Usually this is zero, indicating that the parameter
[2e44ac7]	114	is independent, but for the cylinder example given, the bits for the
	115	radius and length polydispersity parameters would both be set, the result
	116	being a (#radius x #length) table, or maybe a (#length x #radius) table
	117	if length comes first in the polydispersity table.
	118
	119	NB: If we can guarantee that a compiler and OpenCL driver are available,
	120	we could instead create the coordination function on the fly for each
	121	parameter, saving memory and transfer time, but requiring a C compiler
	122	as part of the environment.
	123
	124	In ordering the polydisperse parameters by decreasing length we can
	125	iterate over the longest dispersion weight vector first. All parameters
	126	coordinated with this weight vector (the 'fast' parameters), can be
	127	updated with a simple increment to the next position in the parameter
	128	value table. The indices of these parameters is stored in fast_coord_index[],
	129	with fast_coord_count being the number of fast parameters. A total
	130	of NPARS slots is allocated to allow for the case that all parameters
	131	are coordinated with the fast index, though this will likely be mostly
	132	empty. When the fast increment count reaches the end of the weight
	133	vector, then the index of the second polydisperse parameter must be
	134	incremented, and all of its coordinated parameters updated. Because this
	135	operation is not in the inner loop, a slower algorithm can be used.
	136
[3044216]	137	If there is no polydispersity we pretend that it is polydisperisty with one
	138	parameter.
	139
[2e44ac7]	140	The problem details structure can be allocated and sent in as an integer
	141	array using the read-only flag. This allows us to copy it once per fit
	142	along with the weights vector, since features such as the number of
	143	polydisperity elements per pd parameter or the coordinated won't change
	144	between function evaluations. A new parameter vector is sent for
	145	each I(q) evaluation.
	146
	147	To protect against expensive evaluations taking all the GPU resource
	148	on large fits, the entire polydispersity will not be computed at once.
	149	Instead, a start and stop location will be sent, indicating where in the
	150	polydispersity loop the calculation should start and where it should
	151	stop. We can do this for arbitrary start/stop points since we have
	152	unwound the nested loop. Instead, we use the same technique as array
	153	index translation, using div and mod to figure out the i,j,k,...
	154	indices in the virtual nested loop.
	155
	156	The results array will be initialized to zero for polydispersity loop
	157	entry zero, and preserved between calls to [start, stop] so that the
	158	results accumulate by the time the loop has completed. Background and
	159	scale will be applied when the loop reaches the end. This does require
	160	that the results array be allocated read-write, which is less efficient
	161	for the GPU, but it makes the calling sequence much more manageable.
[3044216]	162
	163	TODO: cutoff
[2e44ac7]	164	*/
	165
	166	#define MAX_PD 4 // MAX_PD is the max number of polydisperse parameters
	167	#define PD_2N 16 // PD_2N is the size of the coordination step table
	168
	169	typedef struct {
	170	int pd_par[MAX_PD]; // index of the nth polydispersity variable
	171	int pd_length[MAX_PD]; // length of the nth polydispersity weight vector
	172	int pd_offset[MAX_PD]; // offset of pd weights in the par & weight vector
	173	int pd_stride[MAX_PD]; // stride to move to the next index at this level
	174	int par_offset[NPARS]; // offset of par values in the par & weight vector
	175	int par_coord[NPARS]; // polydispersity coordination bitvector
	176	int fast_coord_count; // number of parameters coordinated with pd 1
	177	int fast_coord_index[NPARS]; // index of the fast coordination parameters
	178	} ProblemDetails;
	179
	180	typedef struct {
	181	PARAMETER_DECL;
	182	} ParameterBlock;
	183
	184
	185	KERNEL
	186	void KERNEL_NAME(
	187	int nq, // number of q values
	188	global const ProblemDetails *problem,
	189	global const double *weights,
	190	global const double *pars,
	191	global const double *q, // nq q values, with padding to boundary
	192	global double *result, // nq return values, again with padding
[3044216]	193	global int *offset, // nq+padding space for current parameter index
[2e44ac7]	194	const double cutoff, // cutoff in the polydispersity weight product
	195	const int pd_start, // where we are in the polydispersity loop
	196	const int pd_stop, // where we are stopping in the polydispersity loop
	197	)
	198	{
[3044216]	199
[10ddb64]	200	// Storage for the current parameter values. These will be updated as we
	201	// walk the polydispersity cube.
[2e44ac7]	202	local ParameterBlock local_pars;
	203	const double *parvec = &local_pars; // Alias named parameters with a vector
	204
[3044216]	205	#if defined(USE_SHORTCUT_OPTIMIZATION)
	206	if (pd_length[0] == 1) {
	207	// Shouldn't need to copy!!
	208	for (int k=0; k < NPARS; k++) {
	209	parvec[k] = pars[k];
	210	}
	211
	212	#ifdef USE_OPENMP
	213	#pragma omp parallel for
	214	#endif
	215	for (int i=0; i < nq; i++) {
	216	{
	217	const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
	218	result[i] += local_pars.scale*scattering + local_pars.background;
	219	}
	220	return;
	221	}
	222	#endif
	223
	224
[10ddb64]	225	// Since we are no longer looping over the entire polydispersity hypercube
	226	// for each q, we need to track the normalization values for each q in a
	227	// separate work vector.
[3044216]	228	double norm; // contains sum over weights
	229	double vol; // contains sum over volume
	230	double norm_vol; // contains weights over volume
[10ddb64]	231
[2e44ac7]	232	// Initialize the results to zero
	233	if (pd_start == 0) {
[3044216]	234	norm_vol = 0.0;
	235	norm = 0.0;
	236	vol = 0.0;
	237
[2e44ac7]	238	#ifdef USE_OPENMP
	239	#pragma omp parallel for
	240	#endif
[3044216]	241	for (int i=0; i < nq; i++) {
[2e44ac7]	242	result[i] = 0.0;
	243	}
[3044216]	244	} else {
	245	//Pulling values from previous segment
	246	norm = result[nq];
	247	vol = result[nq+1];
	248	norm_vol = results[nq+2];
[2e44ac7]	249	}
	250
[3044216]	251	// Location in the polydispersity hypercube, one index per dimension.
[a10da8b]	252	local int pd_index[PD_MAX];
	253
[10ddb64]	254	// Set the initial index greater than its vector length in order to
	255	// trigger the reset behaviour that happens at the end the fast loop.
[a10da8b]	256	pd_index[0] = pd_length[0];
[2e44ac7]	257
[3044216]	258	// need product of weights at every Iq calc, so keep product of
	259	// weights from the outer loops so that weight = partial_weight * fast_weight
	260	double partial_weight = NAN; // product of weight w4w3w2 but not w1
	261
[2e44ac7]	262	// Loop over the weights then loop over q, accumulating values
	263	for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
	264	// check if indices need to be updated
	265	if (pd_index[0] >= pd_length[0]) {
	266	pd_index[0] = loop_index%pd_length[0];
	267	partial_weight = 1.0;
[3044216]	268	for (int k=1; k < MAX_PD; k++) {
[2e44ac7]	269	pd_index[k] = (loop_index%pd_length[k])/pd_stride[k];
	270	partial_weight *= weights[pd_offset[k]+pd_index[k]];
	271	}
	272	weight = partial_weight * weights[pd_offset[0]+pd_index[0]]
	273	for (int k=0; k < NPARS; k++) {
	274	int coord = par_coord[k];
[3044216]	275	int this_offset = par_offset[k];
[2e44ac7]	276	int block_size = 1;
	277	for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
	278	if (coord&1) {
	279	this_offset += block_size * pd_index[bit];
	280	block_size *= pd_length[bit];
	281	}
[a10da8b]	282	coord /= 2;
[2e44ac7]	283	}
	284	offset[k] = this_offset;
[3044216]	285	parvec[k] = pars[this_offset];
[2e44ac7]	286	}
	287	} else {
	288	pd_index[0] += 1;
	289	weight = partial_weight*weights[pd_offset[0]+pd_index[0]];
	290	for (int k=0; k < problem->fast_coord_count; k++) {
	291	parvec[ fast_coord_index[k]]
	292	= pars[offset[fast_coord_index[k]] + pd_index[0]];
	293	}
	294	}
[3044216]	295	#ifdef INVALID
	296	if (INVALID(local_pars)) continue;
	297	#endif
[10ddb64]	298	if (weight > cutoff) {
	299	const double vol_weight = VOLUME_WEIGHT_PRODUCT;
[3044216]	300	const double weighted_vol = vol_weight*form_volume(VOLUME_PARAMETERS);
	301	norm += weight;
	302	vol += weighted_vol;
	303	norm_vol += vol_weight;
	304
[10ddb64]	305	#ifdef USE_OPENMP
	306	#pragma omp parallel for
	307	#endif
[3044216]	308	for (int i=0; i < nq; i++) {
[10ddb64]	309	{
[3044216]	310	const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
	311	//const double scattering = Iq(q[i], IQ_PARAMETERS);
	312	result[i] += weight*scattering;
	313	}
[2e44ac7]	314	}
[3044216]	315	//Makes a normalization avialable for the next round
	316	result[nq] = norm;
	317	result[nq+1] = vol;
	318	results[nq+2] = norm_vol;
[2e44ac7]	319
[3044216]	320	//End of the PD loop we can normalize
[2e44ac7]	321	if (pd_stop == pd_stride[MAX_PD-1]) {}
	322	#ifdef USE_OPENMP
	323	#pragma omp parallel for
	324	#endif
[3044216]	325	for (int i=0; i < nq; i++) {
	326	if (vol*norm_vol != 0.0) {
	327	result[i] *= norm_vol/vol;
[2e44ac7]	328	}
[3044216]	329	result[i] = local_pars.scale*result[i]/norm+local_pars.background;
[2e44ac7]	330	}
	331	}
	332	}
[a10da8b]	333	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.c @ 3044216

Download in other formats: