kernel_iq.cl @ f2f67a6

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since f2f67a6 was f2f67a6, checked in by Paul Kienzle <pkienzle@…>, 8 years ago
reenable opencl; works on cpu but not gpu
Property mode set to `100644`
File size: 9.0 KB

Line
1
2	/*
3	##########################################################
4	# #
5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
6	# !! !! #
7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
8	# !! !! #
9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
10	# #
11	##########################################################
12	*/
13
14	#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
15	#define _PAR_BLOCK_
16
17	typedef struct {
18	#if MAX_PD > 0
19	int32_t pd_par[MAX_PD]; // id of the nth polydispersity variable
20	int32_t pd_length[MAX_PD]; // length of the nth polydispersity weight vector
21	int32_t pd_offset[MAX_PD]; // offset of pd weights in the value & weight vector
22	int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level
23	#endif // MAX_PD > 0
24	int32_t par_offset[NPARS]; // offset of par value blocks in the value & weight vector
25	int32_t par_coord[NPARS]; // ids of the coordination parameters
26	int32_t pd_coord[NPARS]; // polydispersity coordination bitvector
27	int32_t num_active; // number of non-trivial pd loops
28	int32_t total_pd; // total number of voxels in hypercube
29	int32_t num_coord; // number of coordinated parameters
30	int32_t theta_par; // id of spherical correction variable
31	} ProblemDetails;
32
33	typedef struct {
34	PARAMETER_TABLE;
35	} ParameterBlock;
36	#endif
37
38
39	kernel
40	void KERNEL_NAME(
41	int32_t nq, // number of q values
42	const int32_t pd_start, // where we are in the polydispersity loop
43	const int32_t pd_stop, // where we are stopping in the polydispersity loop
44	global const ProblemDetails *details,
45	global const double *weights,
46	global const double *values,
47	global const double *q, // nq q values, with padding to boundary
48	global double *result, // nq+3 return values, again with padding
49	const double cutoff // cutoff in the polydispersity weight product
50	)
51	{
52	double norm;
53
54	// who we are and what element we are working with
55	const int q_index = get_global_id(0);
56
57	// number of active loops
58	const int num_active = details->num_active;
59
60	// Storage for the current parameter values. These will be updated as we
61	// walk the polydispersity cube.
62	ParameterBlock local_values; // current parameter values
63	double pvec = (double )(&local_values); // Alias named parameters with a vector
64	// Fill in the initial variables
65	for (int k = 0; k < NPARS; k++) {
66	pvec[k] = values[details->par_offset[k]];
67	}
68
69	// Monodisperse computation
70	if (num_active == 0) {
71	#ifdef INVALID
72	if (INVALID(local_values)) { return; }
73	#endif
74
75	double scale, background;
76	norm = CALL_VOLUME(local_values);
77	scale = values[0];
78	background = values[1];
79
80	// if (i==0) result[nq] = norm; // Total volume normalization
81
82	if (q_index < nq) {
83	double scattering = CALL_IQ(q, q_index, local_values);
84	result[q_index] = (norm>0. ? scale*scattering/norm + background : background);
85	}
86	return;
87	}
88
89	#if MAX_PD > 0
90
91	// If it is the first round initialize the result to zero, otherwise
92	// assume that the previous result has been passed back.
93	// Note: doing this even in the monodisperse case in order to handle the
94	// rare case where the model parameters are invalid and zero is returned.
95	// So slightly increased cost for slightly smaller code size.
96	double this_result;
97
98	//printf("Entering polydispersity from %d to %d\n", pd_start, pd_stop);
99	// norm will be shared across all threads.
100
101	// need product of weights at every Iq calc, so keep product of
102	// weights from the outer loops so that weight = partial_weight * fast_weight
103	double partial_weight; // product of weight w4w3w2 but not w1
104	double spherical_correction; // cosine correction for latitude variation
105
106	// Location in the polydispersity hypercube, one index per dimension.
107	int pd_index[MAX_PD];
108
109	// Location of the coordinated parameters in their own sub-cubes.
110	int offset[NPARS];
111
112	// Number of elements in the longest polydispersity loop
113	const int fast_length = details->pd_length[0];
114
115	// Number of coordinated indices
116	const int num_coord = details->num_coord;
117
118	// We could in theory spread this work across different threads, but
119	// lets keep it simple;
120	norm = pd_start == 0 ? 0.0 : result[nq];
121	spherical_correction = 1.0; // the usual case.
122	// partial_weight = NAN;
123	// Trigger the reset behaviour that happens at the end the fast loop
124	// by setting the initial index >= weight vector length.
125	pd_index[0] = fast_length;
126
127	// Since we are no longer looping over the entire polydispersity hypercube
128	// for each q, we need to track the result and normalization values between
129	// calls. This means initializing them to 0 at the start and accumulating
130	// them between calls.
131	if (q_index < nq) {
132	this_result = pd_start == 0 ? 0.0 : result[q_index];
133	}
134
135	// Loop over the weights then loop over q, accumulating values
136	for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
137	// check if fast loop needs to be reset
138	if (pd_index[0] == fast_length) {
139	//printf("should be here with %d active\n", num_active);
140
141	// Compute position in polydispersity hypercube
142	for (int k=0; k < num_active; k++) {
143	pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k];
144	//printf("pd_index[%d] = %d\n",k,pd_index[k]);
145	}
146
147	// need to compute the product of the weights. If the vector were really
148	// long, we could split the work into groups, with each thread taking
149	// every nth weight, but there really is no call for it here. We could
150	// also do some clever pair-wise multiplication similar to parallel
151	// prefix, but again simpler is probably faster since n is likely small.
152	// Compute partial weights
153	partial_weight = 1.0;
154	//printf("partial weight %d: ", loop_index);
155	for (int k=1; k < num_active; k++) {
156	double wi = weights[details->pd_offset[k] + pd_index[k]];
157	//printf("pd[%d]=par[%d]=%g ", k, details->pd_par[k], wi);
158	partial_weight *= wi;
159	}
160	//printf("\n");
161
162	// Update parameter offsets in weight vector
163	//printf("slow %d: ", loop_index);
164	for (int k=0; k < num_coord; k++) {
165	if (k < num_coord) {
166	int par = details->par_coord[k];
167	int coord = details->pd_coord[k];
168	int this_offset = details->par_offset[par];
169	int block_size = 1;
170	for (int bit=0; coord != 0; bit++) {
171	if (coord&1) {
172	this_offset += block_size * pd_index[bit];
173	block_size *= details->pd_length[bit];
174	}
175	coord >>= 1;
176	}
177	offset[par] = this_offset;
178	pvec[par] = values[this_offset];
179	//printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]);
180	// if theta is not coordinated with fast index, precompute spherical correction
181	if (par == details->theta_par && !(details->par_coord[k]&1)) {
182	spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6);
183	}
184	}
185	}
186	//printf("\n");
187	}
188
189	double weight;
190	const double wi = weights[details->pd_offset[0] + pd_index[0]];
191	weight = partial_weight*wi;
192	pd_index[0]++;
193
194	// Increment fast index
195	//printf("fast %d: ", loop_index);
196	for (int k=0; k < num_coord; k++) {
197	if (k < num_coord) {
198	if (details->pd_coord[k]&1) {
199	const int par = details->par_coord[k];
200	pvec[par] = values[offset[par]++];
201	//printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]);
202	// if theta is coordinated with fast index, compute spherical correction each time
203	if (par == details->theta_par) {
204	spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6);
205	}
206	}
207	}
208	}
209	//printf("\n");
210
211	#ifdef INVALID
212	if (INVALID(local_values)) continue;
213	#endif
214
215	// Accumulate I(q)
216	// Note: weight==0 must always be excluded
217	if (weight > cutoff) {
218	// spherical correction has some nasty effects when theta is +90 or -90
219	// where it becomes zero. If the entirety of the correction
220	weight *= spherical_correction;
221	norm += weight * CALL_VOLUME(local_values);
222
223	const double scattering = CALL_IQ(q, q_index, local_values);
224	this_result += weight*scattering;
225	}
226	}
227
228	if (q_index < nq) {
229	if (pd_stop >= details->total_pd) {
230	// End of the PD loop we can normalize
231	const double scale = values[0];
232	const double background = values[1];
233	result[q_index] = (norm>0. ? scale*this_result/norm + background : background);
234	} else {
235	// Partial result, so remember it but don't normalize it.
236	result[q_index] = this_result;
237	}
238	// Accumulate norm.
239	if (q_index == 0) result[nq] = norm;
240	}
241
242	#endif // MAX_PD > 0
243	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.cl @ f2f67a6

Download in other formats: