kernel_iq.cl @ a738209

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since a738209 was a738209, checked in by Paul Kienzle <pkienzle@…>, 8 years ago
simplify kernels by remove coordination parameter logic
Property mode set to `100644`
File size: 7.2 KB

Rev	Line
[f2f67a6]	1
	2	/*
	3	##########################################################
	4	# #
	5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	6	# !! !! #
	7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
	8	# !! !! #
	9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	10	# #
	11	##########################################################
	12	*/
	13
	14	#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
	15	#define _PAR_BLOCK_
	16
	17	typedef struct {
	18	#if MAX_PD > 0
	19	int32_t pd_par[MAX_PD]; // id of the nth polydispersity variable
	20	int32_t pd_length[MAX_PD]; // length of the nth polydispersity weight vector
	21	int32_t pd_offset[MAX_PD]; // offset of pd weights in the value & weight vector
	22	int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level
	23	#endif // MAX_PD > 0
[a738209]	24	int32_t pd_prod; // total number of voxels in hypercube
	25	int32_t pd_sum; // total length of the weights vector
[f2f67a6]	26	int32_t num_active; // number of non-trivial pd loops
	27	int32_t theta_par; // id of spherical correction variable
	28	} ProblemDetails;
	29
	30	typedef struct {
	31	PARAMETER_TABLE;
	32	} ParameterBlock;
	33	#endif
	34
	35
	36	kernel
	37	void KERNEL_NAME(
	38	int32_t nq, // number of q values
	39	const int32_t pd_start, // where we are in the polydispersity loop
	40	const int32_t pd_stop, // where we are stopping in the polydispersity loop
	41	global const ProblemDetails *details,
	42	global const double *values,
	43	global const double *q, // nq q values, with padding to boundary
[a738209]	44	global double *result, // nq+1 return values, again with padding
[f2f67a6]	45	const double cutoff // cutoff in the polydispersity weight product
	46	)
	47	{
[ae2b6b5]	48	// Storage for the current parameter values. These will be updated as we
[a738209]	49	// walk the polydispersity cube. local_values will be aliased to pvec.
	50	local ParameterBlock local_values;
[f2f67a6]	51
	52	// who we are and what element we are working with
	53	const int q_index = get_global_id(0);
[a738209]	54	const int thread = get_local_id(0);
[f2f67a6]	55
	56	// Fill in the initial variables
[a738209]	57	event_t e = async_work_group_copy((local double *)&local_values, values+2, NPARS, 0);
	58	wait_group_events(1, &e);
[f2f67a6]	59
	60	// Monodisperse computation
[a738209]	61	if (details->num_active == 0) {
	62	double norm, scale, background;
	63	// TODO: only needs to be done by one process...
[f2f67a6]	64	#ifdef INVALID
	65	if (INVALID(local_values)) { return; }
	66	#endif
	67
[a738209]	68	norm = CALL_VOLUME(local_values);
[f2f67a6]	69	scale = values[0];
	70	background = values[1];
	71
	72	if (q_index < nq) {
	73	double scattering = CALL_IQ(q, q_index, local_values);
	74	result[q_index] = (norm>0. ? scale*scattering/norm + background : background);
	75	}
	76	return;
	77	}
	78
	79	#if MAX_PD > 0
	80
	81	double this_result;
	82
	83	//printf("Entering polydispersity from %d to %d\n", pd_start, pd_stop);
	84	// norm will be shared across all threads.
	85
[a738209]	86	// "values" is global and can't be assigned to a local, so even though only
	87	// the alias is only needed for thread 0 it is allocated in all threads.
	88	global const double *pd_value = values+2+NPARS;
	89	global const double *pd_weight = pd_value+details->pd_sum;
	90
[f2f67a6]	91	// need product of weights at every Iq calc, so keep product of
	92	// weights from the outer loops so that weight = partial_weight * fast_weight
[a738209]	93	local double pd_norm;
	94	local double partial_weight; // product of weight w4w3w2 but not w1
	95	local double spherical_correction; // cosine correction for latitude variation
	96	local double weight; // product of partial_weightw1spherical_correction
	97	local double *pvec;
	98	local int p0_par;
	99	local int p0_length;
	100	local int p0_offset;
	101	local int p0_is_theta;
	102	local int p0_index;
[f2f67a6]	103
[ae2b6b5]	104	// Number of elements in the longest polydispersity loop
[a738209]	105	barrier(CLK_LOCAL_MEM_FENCE);
	106	if (thread == 0) {
	107	pvec = (local double *)(&local_values);
	108
	109	// Number of elements in the longest polydispersity loop
	110	p0_par = details->pd_par[0];
	111	p0_length = details->pd_length[0];
	112	p0_offset = details->pd_offset[0];
	113	p0_is_theta = (p0_par == details->theta_par);
	114
	115	// Trigger the reset behaviour that happens at the end the fast loop
	116	// by setting the initial index >= weight vector length.
	117	p0_index = p0_length;
	118
	119	// Default the spherical correction to 1.0 in case it is not otherwise set
	120	spherical_correction = 1.0;
	121
	122	// Since we are no longer looping over the entire polydispersity hypercube
	123	// for each q, we need to track the result and normalization values between
	124	// calls. This means initializing them to 0 at the start and accumulating
	125	// them between calls.
	126	pd_norm = pd_start == 0 ? 0.0 : result[nq];
	127	}
	128	barrier(CLK_LOCAL_MEM_FENCE);
[ae2b6b5]	129
[f2f67a6]	130	if (q_index < nq) {
	131	this_result = pd_start == 0 ? 0.0 : result[q_index];
	132	}
	133
	134	// Loop over the weights then loop over q, accumulating values
	135	for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
[a738209]	136	barrier(CLK_LOCAL_MEM_FENCE);
	137	if (thread == 0) {
	138	// check if fast loop needs to be reset
	139	if (p0_index == p0_length) {
	140	//printf("should be here with %d active\n", num_active);
	141
	142	// Compute position in polydispersity hypercube and partial weight
	143	partial_weight = 1.0;
	144	for (int k=1; k < details->num_active; k++) {
	145	int pk = details->pd_par[k];
	146	int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k];
	147	pvec[pk] = pd_value[index];
	148	partial_weight *= pd_weight[index];
	149	//printf("index[%d] = %d\n",k,index);
	150	if (pk == details->theta_par) {
	151	spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6);
[f2f67a6]	152	}
	153	}
[a738209]	154	p0_index = loop_index%p0_length;
	155	//printf("\n");
[f2f67a6]	156	}
	157
[a738209]	158	// Update parameter p0
	159	weight = partial_weight*pd_weight[p0_offset + p0_index];
	160	pvec[p0_par] = pd_value[p0_offset + p0_index];
	161	if (p0_is_theta) {
	162	spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6);
[f2f67a6]	163	}
[a738209]	164	p0_index++;
[f2f67a6]	165	}
[a738209]	166	barrier(CLK_LOCAL_MEM_FENCE);
[f2f67a6]	167	//printf("\n");
	168
[ae2b6b5]	169	// Increment fast index
	170
[f2f67a6]	171	#ifdef INVALID
	172	if (INVALID(local_values)) continue;
	173	#endif
	174
	175	// Accumulate I(q)
	176	// Note: weight==0 must always be excluded
	177	if (weight > cutoff) {
	178	// spherical correction has some nasty effects when theta is +90 or -90
	179	// where it becomes zero. If the entirety of the correction
	180	weight *= spherical_correction;
[a738209]	181	pd_norm += weight * CALL_VOLUME(local_values);
[f2f67a6]	182
	183	const double scattering = CALL_IQ(q, q_index, local_values);
	184	this_result += weight*scattering;
	185	}
	186	}
	187
	188	if (q_index < nq) {
[a738209]	189	if (pd_stop >= details->pd_prod) {
[f2f67a6]	190	// End of the PD loop we can normalize
[ae2b6b5]	191	double scale, background;
	192	scale = values[0];
	193	background = values[1];
[a738209]	194	result[q_index] = (pd_norm>0. ? scale*this_result/pd_norm + background : background);
[f2f67a6]	195	} else {
	196	// Partial result, so remember it but don't normalize it.
	197	result[q_index] = this_result;
	198	}
[ae2b6b5]	199
	200	// Remember the updated norm.
[a738209]	201	if (q_index == 0) result[nq] = pd_norm;
[f2f67a6]	202	}
	203
	204	#endif // MAX_PD > 0
	205	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.cl @ a738209

Download in other formats: