kernel_iq.c @ 9c490bc

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since 9c490bc was 9c490bc, checked in by wojciech, 8 years ago
Some intermediate changes
Property mode set to `100644`
File size: 15.2 KB

Rev	Line
[2e44ac7]	1
	2	/*
	3	##########################################################
	4	# #
	5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	6	# !! !! #
	7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
	8	# !! !! #
	9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	10	# #
	11	##########################################################
	12	*/
	13
	14	/*
[f9245d4]	15	The environment needs to provide the following #defines:
	16
[2e44ac7]	17	USE_OPENCL is defined if running in opencl
	18	KERNEL declares a function to be available externally
	19	KERNEL_NAME is the name of the function being declared
	20	NPARS is the number of parameters in the kernel
[f9245d4]	21	PARAMETER_DECL is the declaration of the parameters to the kernel.
[2e44ac7]	22
	23	Cylinder:
	24
[f9245d4]	25	#define PARAMETER_DECL \
[2e44ac7]	26	double length; \
	27	double radius; \
	28	double sld; \
	29	double sld_solvent
	30
[f9245d4]	31	Note: scale and background are not included
	32
[2e44ac7]	33	Multi-shell cylinder (10 shell max):
	34
	35	#define PARAMETER_DECL \
	36	double num_shells; \
	37	double length; \
	38	double radius[10]; \
	39	double sld[10]; \
	40	double sld_solvent
	41
	42	PARAMETER_CALL(var) is the declaration of a call to the kernel.
	43
	44	Cylinder:
	45
	46	#define PARAMETER_CALL(var) \
	47	var.length, \
	48	var.radius, \
	49	var.sld, \
	50	var.sld_solvent
	51
	52	Multi-shell cylinder:
	53	#define PARAMETER_CALL(var) \
	54	var.num_shells, \
	55	var.length, \
	56	var.radius, \
	57	var.sld, \
	58	var.sld_solvent
	59
[3044216]	60	INVALID is a test for model parameters in the correct range
	61
	62	Cylinder:
	63
	64	#define INVALID(var) 0
	65
	66	BarBell:
	67
	68	#define INVALID(var) (var.bell_radius > var.radius)
	69
[f9245d4]	70	Model with complicated constraints:
	71
	72	inline bool constrained(p1, p2, p3) { return expression; }
	73	#define INVALID(var) constrained(var.p1, var.p2, var.p3)
	74
	75	IQ_FUNC could be Iq or Iqxy
	76	IQ_PARS could be q[i] or q[2i],q[2i+1]
	77
[2e44ac7]	78	Our design supports a limited number of polydispersity loops, wherein
	79	we need to cycle through the values of the polydispersity, calculate
	80	the I(q, p) for each combination of parameters, and perform a normalized
	81	weighted sum across all the weights. Parameters may be passed to the
	82	underlying calculation engine as scalars or vectors, but the polydispersity
	83	calculator treats the parameter set as one long vector.
	84
[f9245d4]	85	Let's assume we have 6 parameters in the model, with two polydisperse::
	86
	87	0: scale {scl = constant}
	88	1: background {bkg = constant}
	89	5: length {l = vector of 30pts}
	90	4: radius {r = vector of 10pts}
	91	3: sld {s = constant/(radius*2length)}
	92	2: sld_solvent {s2 = constant}
	93
	94	This generates the following call to the kernel (where x stands for an
	95	arbitrary value that is not used by the kernel evaluator):
	96
	97	NPARS = 4 // scale and background are in all models
	98	problem {
[f78a2a1]	99	pd_par = {5, 4, x, x} // parameters radius and length vary
	100	pd_length = {30, 10, 0, 0} // length has more, so it is first
	101	pd_offset = {10, 0, x, x} // length starts at index 10 in weights
	102	pd_stride = {1, 30, 300, 300} // cumulative product of pd length
	103	pd_isvol = {1, 1, x, x} // true if weight is a volume weight
[f9245d4]	104	par_offset = {2, 3, 303, 313} // parameter offsets
	105	par_coord = {0, 3, 2, 1} // bitmap of parameter dependencies
	106	fast_coord_count = 2 // two parameters vary with length distribution
	107	fast_coord_index = {5, 3, x, x, x, x}
	108	}
	109
	110	weight = { l0, .., l29, r0, .., r9}
	111	pars = { scl, bkg, l0, ..., l29, r0, r1, ..., r9,
	112	s[l0,r0], ... s[l0,r9], s[l1,r0], ... s[l29,r9] , s2}
	113
	114	nq = 130
	115	q = { q0, q1, ..., q130, x, x } # pad to 8 element boundary
	116	result = {r1, ..., r130, norm, vol, vol_norm, x, x, x, x, x, x, x}
	117
[44f39a2]	118
[2e44ac7]	119	The polydisperse parameters are stored in as an array of parameter
	120	indices, one for each polydisperse parameter, stored in pd_par[n].
	121	Non-polydisperse parameters do not appear in this array. Each polydisperse
[44f39a2]	122	parameter has a weight vector whose length is stored in pd_length[n],
[f9245d4]	123	The weights are stored in a contiguous vector of weights for all
	124	parameters, with the starting position for the each parameter stored
	125	in pd_offset[n]. The values corresponding to the weights are stored
	126	together in a separate weights[] vector, with offset stored in
	127	par_offset[pd_par[n]]. Polydisperse parameters should be stored in
	128	decreasing order of length for highest efficiency.
[2e44ac7]	129
	130	We limit the number of polydisperse dimensions to MAX_PD (currently 4).
	131	This cuts the size of the structure in half compared to allowing a
	132	separate polydispersity for each parameter. This will help a little
	133	bit for models with large numbers of parameters, such as the onion model.
	134
	135	Parameters may be coordinated. That is, we may have the value of one
	136	parameter depend on a set of other parameters, some of which may be
	137	polydisperse. For example, if sld is inversely proportional to the
	138	volume of a cylinder, and the length and radius are independently
	139	polydisperse, then for each combination of length and radius we need a
	140	separate value for the sld. The caller must provide a coordination table
	141	for each parameter containing the value for each parameter given the
[44f39a2]	142	value of the polydisperse parameters v1, v2, etc. The tables for each
[2e44ac7]	143	parameter are arranged contiguously in a vector, with offset[k] giving the
	144	starting location of parameter k in the vector. Each parameter defines
	145	coord[k] as a bit mask indicating which polydispersity parameters the
[a10da8b]	146	parameter depends upon. Usually this is zero, indicating that the parameter
[2e44ac7]	147	is independent, but for the cylinder example given, the bits for the
	148	radius and length polydispersity parameters would both be set, the result
	149	being a (#radius x #length) table, or maybe a (#length x #radius) table
	150	if length comes first in the polydispersity table.
	151
	152	NB: If we can guarantee that a compiler and OpenCL driver are available,
	153	we could instead create the coordination function on the fly for each
	154	parameter, saving memory and transfer time, but requiring a C compiler
	155	as part of the environment.
	156
	157	In ordering the polydisperse parameters by decreasing length we can
	158	iterate over the longest dispersion weight vector first. All parameters
	159	coordinated with this weight vector (the 'fast' parameters), can be
	160	updated with a simple increment to the next position in the parameter
	161	value table. The indices of these parameters is stored in fast_coord_index[],
	162	with fast_coord_count being the number of fast parameters. A total
	163	of NPARS slots is allocated to allow for the case that all parameters
	164	are coordinated with the fast index, though this will likely be mostly
	165	empty. When the fast increment count reaches the end of the weight
	166	vector, then the index of the second polydisperse parameter must be
	167	incremented, and all of its coordinated parameters updated. Because this
	168	operation is not in the inner loop, a slower algorithm can be used.
	169
[3044216]	170	If there is no polydispersity we pretend that it is polydisperisty with one
[f9245d4]	171	parameter, pd_start=0 and pd_stop=1. We may or may not short circuit the
	172	calculation in this case, depending on how much time it saves.
[3044216]	173
[2e44ac7]	174	The problem details structure can be allocated and sent in as an integer
	175	array using the read-only flag. This allows us to copy it once per fit
	176	along with the weights vector, since features such as the number of
	177	polydisperity elements per pd parameter or the coordinated won't change
	178	between function evaluations. A new parameter vector is sent for
	179	each I(q) evaluation.
	180
	181	To protect against expensive evaluations taking all the GPU resource
	182	on large fits, the entire polydispersity will not be computed at once.
	183	Instead, a start and stop location will be sent, indicating where in the
	184	polydispersity loop the calculation should start and where it should
	185	stop. We can do this for arbitrary start/stop points since we have
	186	unwound the nested loop. Instead, we use the same technique as array
	187	index translation, using div and mod to figure out the i,j,k,...
	188	indices in the virtual nested loop.
	189
	190	The results array will be initialized to zero for polydispersity loop
	191	entry zero, and preserved between calls to [start, stop] so that the
	192	results accumulate by the time the loop has completed. Background and
	193	scale will be applied when the loop reaches the end. This does require
	194	that the results array be allocated read-write, which is less efficient
	195	for the GPU, but it makes the calling sequence much more manageable.
[3044216]	196
[0cbcec4]	197	Scale and background cannot be coordinated with other polydisperse parameters
	198
[9c490bc]	199	Cutoff paramater is basically used to restrict the region where integration
	200	is peformed i.e. polydispersity hypercude is limitted spheres.
	201
[2e44ac7]	202	*/
	203
	204	#define MAX_PD 4 // MAX_PD is the max number of polydisperse parameters
	205	#define PD_2N 16 // PD_2N is the size of the coordination step table
	206
	207	typedef struct {
	208	int pd_par[MAX_PD]; // index of the nth polydispersity variable
	209	int pd_length[MAX_PD]; // length of the nth polydispersity weight vector
	210	int pd_offset[MAX_PD]; // offset of pd weights in the par & weight vector
	211	int pd_stride[MAX_PD]; // stride to move to the next index at this level
[f78a2a1]	212	int pd_isvol[MAX_PD]; // True if parameter is a volume weighting parameter
[2e44ac7]	213	int par_offset[NPARS]; // offset of par values in the par & weight vector
	214	int par_coord[NPARS]; // polydispersity coordination bitvector
	215	int fast_coord_count; // number of parameters coordinated with pd 1
	216	int fast_coord_index[NPARS]; // index of the fast coordination parameters
	217	} ProblemDetails;
	218
	219	typedef struct {
	220	PARAMETER_DECL;
	221	} ParameterBlock;
	222
[9c490bc]	223	#define KERNEL_NAME test_Iq
	224	#define FULL_KERNEL_NAME test_Iq
	225	#define IQ_FUNC Iq
	226
	227	#define IQ_PARAMETERS ignored
	228	#define IQ_FIXED_PARAMETER_DECLARATIONS const double scale, \
	229	const double background, \
	230	const double ignored
	231	#define IQ_PARAMETER_DECLARATIONS double ignored
	232	#define IQXY_KERNEL_NAME bessel_Iqxy
	233	#define IQXY_PARAMETERS ignored
	234	#define IQXY_FIXED_PARAMETER_DECLARATIONS const double scale, \
	235	const double background, \
	236	const double ignored
	237	#define IQXY_PARAMETER_DECLARATIONS double ignored
	238
	239
[f9245d4]	240	void FULL_KERNEL_NAME(
[2e44ac7]	241	int nq, // number of q values
	242	global const ProblemDetails *problem,
	243	global const double *weights,
	244	global const double *pars,
	245	global const double *q, // nq q values, with padding to boundary
	246	global double *result, // nq return values, again with padding
	247	const double cutoff, // cutoff in the polydispersity weight product
	248	const int pd_start, // where we are in the polydispersity loop
	249	const int pd_stop, // where we are stopping in the polydispersity loop
	250	)
	251	{
[3044216]	252
[10ddb64]	253	// Storage for the current parameter values. These will be updated as we
	254	// walk the polydispersity cube.
[f9245d4]	255	local ParameterBlock local_pars; // current parameter values
[2e44ac7]	256	const double *parvec = &local_pars; // Alias named parameters with a vector
	257
[f9245d4]	258	local int offset[NPARS-2];
	259
[3044216]	260	#if defined(USE_SHORTCUT_OPTIMIZATION)
	261	if (pd_length[0] == 1) {
	262	// Shouldn't need to copy!!
	263	for (int k=0; k < NPARS; k++) {
[f9245d4]	264	parvec[k] = pars[k+2]; // skip scale and background
[3044216]	265	}
	266
	267	#ifdef USE_OPENMP
	268	#pragma omp parallel for
	269	#endif
	270	for (int i=0; i < nq; i++) {
	271	{
	272	const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
[f9245d4]	273	result[i] += pars[0]*scattering + pars[1];
[3044216]	274	}
	275	return;
	276	}
	277	#endif
	278
	279
[10ddb64]	280	// Since we are no longer looping over the entire polydispersity hypercube
	281	// for each q, we need to track the normalization values for each q in a
	282	// separate work vector.
[3044216]	283	double norm; // contains sum over weights
	284	double vol; // contains sum over volume
	285	double norm_vol; // contains weights over volume
[10ddb64]	286
[2e44ac7]	287	// Initialize the results to zero
	288	if (pd_start == 0) {
[3044216]	289	norm_vol = 0.0;
	290	norm = 0.0;
	291	vol = 0.0;
	292
[2e44ac7]	293	#ifdef USE_OPENMP
	294	#pragma omp parallel for
	295	#endif
[3044216]	296	for (int i=0; i < nq; i++) {
[2e44ac7]	297	result[i] = 0.0;
	298	}
[3044216]	299	} else {
	300	//Pulling values from previous segment
	301	norm = result[nq];
	302	vol = result[nq+1];
	303	norm_vol = results[nq+2];
[2e44ac7]	304	}
	305
[3044216]	306	// Location in the polydispersity hypercube, one index per dimension.
[a10da8b]	307	local int pd_index[PD_MAX];
	308
[f9245d4]	309	// Trigger the reset behaviour that happens at the end the fast loop
	310	// by setting the initial index >= weight vector length.
[a10da8b]	311	pd_index[0] = pd_length[0];
[2e44ac7]	312
[3044216]	313	// need product of weights at every Iq calc, so keep product of
	314	// weights from the outer loops so that weight = partial_weight * fast_weight
	315	double partial_weight = NAN; // product of weight w4w3w2 but not w1
[f78a2a1]	316	double partial_volweight = NAN;
	317	double weight = 1.0; // set to 1 in case there are no weights
	318	double vol_weight = 1.0; // set to 1 in case there are no vol weights
[3044216]	319
[2e44ac7]	320	// Loop over the weights then loop over q, accumulating values
	321	for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
	322	// check if indices need to be updated
	323	if (pd_index[0] >= pd_length[0]) {
	324	pd_index[0] = loop_index%pd_length[0];
	325	partial_weight = 1.0;
[f78a2a1]	326	partial_volweight = 1.0;
[3044216]	327	for (int k=1; k < MAX_PD; k++) {
[2e44ac7]	328	pd_index[k] = (loop_index%pd_length[k])/pd_stride[k];
[f78a2a1]	329	const double wi = weights[pd_offset[0]+pd_index[0]];
	330	partial_weight *= wi;
	331	if (pd_isvol[k]) partial_volweight *= wi;
[2e44ac7]	332	}
	333	weight = partial_weight * weights[pd_offset[0]+pd_index[0]]
	334	for (int k=0; k < NPARS; k++) {
	335	int coord = par_coord[k];
[3044216]	336	int this_offset = par_offset[k];
[2e44ac7]	337	int block_size = 1;
	338	for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
	339	if (coord&1) {
	340	this_offset += block_size * pd_index[bit];
	341	block_size *= pd_length[bit];
	342	}
[a10da8b]	343	coord /= 2;
[2e44ac7]	344	}
	345	offset[k] = this_offset;
[3044216]	346	parvec[k] = pars[this_offset];
[2e44ac7]	347	}
	348	} else {
	349	pd_index[0] += 1;
[f78a2a1]	350	const double wi = weights[pd_offset[0]+pd_index[0]];
	351	weight = partial_weight*wi;
	352	if (pd_isvol[0]) vol_weight *= wi;
[2e44ac7]	353	for (int k=0; k < problem->fast_coord_count; k++) {
	354	parvec[ fast_coord_index[k]]
	355	= pars[offset[fast_coord_index[k]] + pd_index[0]];
	356	}
	357	}
[3044216]	358	#ifdef INVALID
	359	if (INVALID(local_pars)) continue;
	360	#endif
[10ddb64]	361	if (weight > cutoff) {
[3044216]	362	norm += weight;
[f78a2a1]	363	vol += vol_weight * form_volume(VOLUME_PARAMETERS);
[3044216]	364	norm_vol += vol_weight;
	365
[10ddb64]	366	#ifdef USE_OPENMP
	367	#pragma omp parallel for
	368	#endif
[3044216]	369	for (int i=0; i < nq; i++) {
[10ddb64]	370	{
[3044216]	371	const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
	372	//const double scattering = Iq(q[i], IQ_PARAMETERS);
	373	result[i] += weight*scattering;
	374	}
[2e44ac7]	375	}
[3044216]	376	//Makes a normalization avialable for the next round
	377	result[nq] = norm;
	378	result[nq+1] = vol;
[f35f1dd]	379	result[nq+2] = norm_vol;
[2e44ac7]	380
[3044216]	381	//End of the PD loop we can normalize
[2e44ac7]	382	if (pd_stop == pd_stride[MAX_PD-1]) {}
	383	#ifdef USE_OPENMP
	384	#pragma omp parallel for
	385	#endif
[3044216]	386	for (int i=0; i < nq; i++) {
	387	if (vol*norm_vol != 0.0) {
	388	result[i] *= norm_vol/vol;
[2e44ac7]	389	}
[f9245d4]	390	result[i] = pars[0]*result[i]/norm + pars[1];
[2e44ac7]	391	}
	392	}
	393	}
[a10da8b]	394	}
[9c490bc]	395	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.c @ 9c490bc

Download in other formats: