kernel_iq.cl @ 256dfe1

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since 256dfe1 was b9c12fe5, checked in by Paul Kienzle <pkienzle@…>, 8 years ago
faster using private pvector
Property mode set to `100644`
File size: 6.5 KB

Rev	Line
[f2f67a6]	1
	2	/*
	3	##########################################################
	4	# #
	5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	6	# !! !! #
	7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
	8	# !! !! #
	9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	10	# #
	11	##########################################################
	12	*/
	13
	14	#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
	15	#define _PAR_BLOCK_
	16
	17	typedef struct {
	18	#if MAX_PD > 0
	19	int32_t pd_par[MAX_PD]; // id of the nth polydispersity variable
	20	int32_t pd_length[MAX_PD]; // length of the nth polydispersity weight vector
	21	int32_t pd_offset[MAX_PD]; // offset of pd weights in the value & weight vector
	22	int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level
	23	#endif // MAX_PD > 0
[a738209]	24	int32_t pd_prod; // total number of voxels in hypercube
	25	int32_t pd_sum; // total length of the weights vector
[f2f67a6]	26	int32_t num_active; // number of non-trivial pd loops
	27	int32_t theta_par; // id of spherical correction variable
	28	} ProblemDetails;
	29
	30	typedef struct {
	31	PARAMETER_TABLE;
	32	} ParameterBlock;
	33	#endif
	34
	35
	36	kernel
	37	void KERNEL_NAME(
	38	int32_t nq, // number of q values
	39	const int32_t pd_start, // where we are in the polydispersity loop
	40	const int32_t pd_stop, // where we are stopping in the polydispersity loop
	41	global const ProblemDetails *details,
	42	global const double *values,
	43	global const double *q, // nq q values, with padding to boundary
[a738209]	44	global double *result, // nq+1 return values, again with padding
[f2f67a6]	45	const double cutoff // cutoff in the polydispersity weight product
	46	)
	47	{
[ae2b6b5]	48	// Storage for the current parameter values. These will be updated as we
[a738209]	49	// walk the polydispersity cube. local_values will be aliased to pvec.
[b9c12fe5]	50	ParameterBlock local_values;
	51	double pvec = (double )&local_values;
[f2f67a6]	52
	53	// who we are and what element we are working with
	54	const int q_index = get_global_id(0);
	55
	56	// Fill in the initial variables
[b9c12fe5]	57	for (int i=0; i < NPARS; i++) {
	58	pvec[i] = values[2+i];
	59	}
[f2f67a6]	60
	61	// Monodisperse computation
[a738209]	62	if (details->num_active == 0) {
	63	double norm, scale, background;
	64	// TODO: only needs to be done by one process...
[f2f67a6]	65	#ifdef INVALID
	66	if (INVALID(local_values)) { return; }
	67	#endif
	68
[a738209]	69	norm = CALL_VOLUME(local_values);
[f2f67a6]	70	scale = values[0];
	71	background = values[1];
	72
	73	if (q_index < nq) {
	74	double scattering = CALL_IQ(q, q_index, local_values);
	75	result[q_index] = (norm>0. ? scale*scattering/norm + background : background);
	76	}
	77	return;
	78	}
	79
	80	#if MAX_PD > 0
	81
	82	double this_result;
	83
	84	//printf("Entering polydispersity from %d to %d\n", pd_start, pd_stop);
	85
[a738209]	86	global const double *pd_value = values+2+NPARS;
	87	global const double *pd_weight = pd_value+details->pd_sum;
	88
[f2f67a6]	89	// need product of weights at every Iq calc, so keep product of
	90	// weights from the outer loops so that weight = partial_weight * fast_weight
[b9c12fe5]	91	double pd_norm;
	92	double partial_weight; // product of weight w4w3w2 but not w1
	93	double spherical_correction; // cosine correction for latitude variation
	94	double weight; // product of partial_weightw1spherical_correction
	95	int p0_par;
	96	int p0_length;
	97	int p0_offset;
	98	int p0_is_theta;
	99	int p0_index;
[f2f67a6]	100
[ae2b6b5]	101	// Number of elements in the longest polydispersity loop
[b9c12fe5]	102	p0_par = details->pd_par[0];
	103	p0_length = details->pd_length[0];
	104	p0_offset = details->pd_offset[0];
	105	p0_is_theta = (p0_par == details->theta_par);
	106
	107	// Trigger the reset behaviour that happens at the end the fast loop
	108	// by setting the initial index >= weight vector length.
	109	p0_index = p0_length;
	110
	111	// Default the spherical correction to 1.0 in case it is not otherwise set
	112	spherical_correction = 1.0;
	113	weight=1.0;
	114
	115	// Since we are no longer looping over the entire polydispersity hypercube
	116	// for each q, we need to track the result and normalization values between
	117	// calls. This means initializing them to 0 at the start and accumulating
	118	// them between calls.
	119	pd_norm = pd_start == 0 ? 0.0 : result[nq];
[ae2b6b5]	120
[f2f67a6]	121	if (q_index < nq) {
	122	this_result = pd_start == 0 ? 0.0 : result[q_index];
	123	}
	124
	125	// Loop over the weights then loop over q, accumulating values
	126	for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
[b9c12fe5]	127	// check if fast loop needs to be reset
	128	if (p0_index == p0_length) {
	129	//printf("should be here with %d active\n", num_active);
	130
	131	// Compute position in polydispersity hypercube and partial weight
	132	partial_weight = 1.0;
	133	for (int k=1; k < details->num_active; k++) {
	134	int pk = details->pd_par[k];
	135	int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k];
	136	pvec[pk] = pd_value[index];
	137	partial_weight *= pd_weight[index];
	138	//printf("index[%d] = %d\n",k,index);
	139	if (pk == details->theta_par) {
	140	spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6);
[f2f67a6]	141	}
	142	}
[b9c12fe5]	143	p0_index = loop_index%p0_length;
	144	//printf("\n");
	145	}
[f2f67a6]	146
[b9c12fe5]	147	// Update parameter p0
	148	weight = partial_weight*pd_weight[p0_offset + p0_index];
	149	pvec[p0_par] = pd_value[p0_offset + p0_index];
	150	if (p0_is_theta) {
	151	spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6);
[f2f67a6]	152	}
[b9c12fe5]	153	p0_index++;
[f2f67a6]	154	//printf("\n");
	155
[ae2b6b5]	156	// Increment fast index
	157
[f2f67a6]	158	#ifdef INVALID
	159	if (INVALID(local_values)) continue;
	160	#endif
	161
	162	// Accumulate I(q)
	163	// Note: weight==0 must always be excluded
	164	if (weight > cutoff) {
	165	// spherical correction has some nasty effects when theta is +90 or -90
	166	// where it becomes zero. If the entirety of the correction
	167	weight *= spherical_correction;
[a738209]	168	pd_norm += weight * CALL_VOLUME(local_values);
[f2f67a6]	169
	170	const double scattering = CALL_IQ(q, q_index, local_values);
	171	this_result += weight*scattering;
	172	}
	173	}
	174
	175	if (q_index < nq) {
[a738209]	176	if (pd_stop >= details->pd_prod) {
[f2f67a6]	177	// End of the PD loop we can normalize
[ae2b6b5]	178	double scale, background;
	179	scale = values[0];
	180	background = values[1];
[a738209]	181	result[q_index] = (pd_norm>0. ? scale*this_result/pd_norm + background : background);
[f2f67a6]	182	} else {
	183	// Partial result, so remember it but don't normalize it.
	184	result[q_index] = this_result;
	185	}
[ae2b6b5]	186
	187	// Remember the updated norm.
[a738209]	188	if (q_index == 0) result[nq] = pd_norm;
[f2f67a6]	189	}
	190
	191	#endif // MAX_PD > 0
	192	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.cl @ 256dfe1

Download in other formats: