kernel_iq.c @ b32caab

core_shell_microgelscostrafo411magnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests

Last change on this file since b32caab was bde38b5, checked in by Paul Kienzle <pkienzle@…>, 8 years ago
simplify kernel calling
Property mode set to `100644`
File size: 11.7 KB

Rev	Line
[2e44ac7]	1
	2	/*
	3	##########################################################
	4	# #
	5	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	6	# !! !! #
	7	# !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
	8	# !! !! #
	9	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
	10	# #
	11	##########################################################
	12	*/
	13
[03cac08]	14	#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
	15	#define _PAR_BLOCK_
[2e44ac7]	16
	17	typedef struct {
[60eab2a]	18	#if MAX_PD > 0
[a6f9577]	19	int32_t pd_par[MAX_PD]; // id of the nth polydispersity variable
[5cf3c33]	20	int32_t pd_length[MAX_PD]; // length of the nth polydispersity weight vector
[0a7e5eb4]	21	int32_t pd_offset[MAX_PD]; // offset of pd weights in the value & weight vector
[5cf3c33]	22	int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level
[60eab2a]	23	#endif // MAX_PD > 0
[bde38b5]	24	int32_t num_eval; // total number of voxels in hypercube
	25	int32_t num_weights; // total length of the weights vector
[5ff1b03]	26	int32_t num_active; // number of non-trivial pd loops
[0a7e5eb4]	27	int32_t theta_par; // id of spherical correction variable
[2e44ac7]	28	} ProblemDetails;
	29
[bde38b5]	30	// Intel HD 4000 needs private arrays to be a multiple of 4 long
[2e44ac7]	31	typedef struct {
[56547a8]	32	PARAMETER_TABLE
[bde38b5]	33	} ParameterTable;
	34	typedef union {
	35	ParameterTable table;
	36	double vector[4*((NUM_PARS+3)/4)];
[2e44ac7]	37	} ParameterBlock;
[9eb3632]	38	#endif // _PAR_BLOCK_
[03cac08]	39
[32e3c9b]	40
[a4280bd]	41	#if defined(MAGNETIC) && NUM_MAGNETIC>0
	42
[32e3c9b]	43	// Return value restricted between low and high
	44	static double clip(double value, double low, double high)
	45	{
[b966a96]	46	return (value < low ? low : (value > high ? high : value));
[32e3c9b]	47	}
	48
	49	// Compute spin cross sections given in_spin and out_spin
	50	// To convert spin cross sections to sld b:
	51	// uu * (sld - m_sigma_x);
	52	// dd * (sld + m_sigma_x);
	53	// ud * (m_sigma_y + 1j*m_sigma_z);
	54	// du * (m_sigma_y - 1j*m_sigma_z);
[a4280bd]	55	static void set_spins(double in_spin, double out_spin, double spins[4])
[32e3c9b]	56	{
[b966a96]	57	in_spin = clip(in_spin, 0.0, 1.0);
	58	out_spin = clip(out_spin, 0.0, 1.0);
[a4280bd]	59	spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
	60	spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin)); // du
	61	spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin))); // ud
	62	spins[3] = sqrt(sqrt(in_spin * out_spin)); // uu
	63	}
	64
	65	static double mag_sld(double qx, double qy, double p,
	66	double mx, double my, double sld)
	67	{
	68	const double perp = qymx - qxmy;
	69	return sld + perp*p;
[32e3c9b]	70	}
	71
[9eb3632]	72	#endif // MAGNETIC
[2e44ac7]	73
[03cac08]	74	kernel
	75	void KERNEL_NAME(
[5cf3c33]	76	int32_t nq, // number of q values
	77	const int32_t pd_start, // where we are in the polydispersity loop
	78	const int32_t pd_stop, // where we are stopping in the polydispersity loop
[6e7ff6d]	79	global const ProblemDetails *details,
[9eb3632]	80	global const double *values,
[2e44ac7]	81	global const double *q, // nq q values, with padding to boundary
[9eb3632]	82	global double *result, // nq+1 return values, again with padding
[303d8d6]	83	const double cutoff // cutoff in the polydispersity weight product
[2e44ac7]	84	)
	85	{
[10ddb64]	86	// Storage for the current parameter values. These will be updated as we
[bde38b5]	87	// walk the polydispersity cube.
[9eb3632]	88	ParameterBlock local_values;
[2e44ac7]	89
[a4280bd]	90	#if defined(MAGNETIC) && NUM_MAGNETIC>0
[bde38b5]	91	// Location of the sld parameters in the parameter vector.
[9eb3632]	92	// These parameters are updated with the effective sld due to magnetism.
[a4280bd]	93	#if NUM_MAGNETIC > 3
[9eb3632]	94	const int32_t slds[] = { MAGNETIC_PARS };
[a4280bd]	95	#endif
[32e3c9b]	96
[9eb3632]	97	// TODO: could precompute these outside of the kernel.
[32e3c9b]	98	// Interpret polarization cross section.
[a4280bd]	99	// up_frac_i = values[NUM_PARS+2];
	100	// up_frac_f = values[NUM_PARS+3];
	101	// up_angle = values[NUM_PARS+4];
	102	double spins[4];
[32e3c9b]	103	double cos_mspin, sin_mspin;
[a4280bd]	104	set_spins(values[NUM_PARS+2], values[NUM_PARS+3], spins);
	105	SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
[9eb3632]	106	#endif // MAGNETIC
[3044216]	107
[9eb3632]	108	// Fill in the initial variables
	109	// values[0] is scale
	110	// values[1] is background
	111	#ifdef USE_OPENMP
	112	#pragma omp parallel for
	113	#endif
[a4280bd]	114	for (int i=0; i < NUM_PARS; i++) {
[bde38b5]	115	local_values.vector[i] = values[2+i];
	116	//printf("p%d = %g\n",i, local_values.vector[i]);
[9eb3632]	117	}
[bde38b5]	118	//printf("NUM_VALUES:%d NUM_PARS:%d MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
	119	//printf("start:%d stop:%d\n", pd_start, pd_stop);
[f2f67a6]	120
[bde38b5]	121	double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
[9eb3632]	122	if (pd_start == 0) {
[2e44ac7]	123	#ifdef USE_OPENMP
	124	#pragma omp parallel for
	125	#endif
[9eb3632]	126	for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
[2e44ac7]	127	}
[9eb3632]	128	//printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
	129
[7b7da6b]	130	#if MAX_PD>0
[bde38b5]	131	global const double *pd_value = values + NUM_VALUES;
	132	global const double *pd_weight = pd_value + details->num_weights;
[7b7da6b]	133	#endif
[9eb3632]	134
	135	// Jump into the middle of the polydispersity loop
	136	#if MAX_PD>4
	137	int n4=details->pd_length[4];
	138	int i4=(pd_start/details->pd_stride[4])%n4;
	139	const int p4=details->pd_par[4];
	140	global const double *v4 = pd_value + details->pd_offset[4];
	141	global const double *w4 = pd_weight + details->pd_offset[4];
	142	#endif
	143	#if MAX_PD>3
	144	int n3=details->pd_length[3];
	145	int i3=(pd_start/details->pd_stride[3])%n3;
	146	const int p3=details->pd_par[3];
	147	global const double *v3 = pd_value + details->pd_offset[3];
	148	global const double *w3 = pd_weight + details->pd_offset[3];
	149	//printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES);
	150	#endif
	151	#if MAX_PD>2
	152	int n2=details->pd_length[2];
	153	int i2=(pd_start/details->pd_stride[2])%n2;
	154	const int p2=details->pd_par[2];
	155	global const double *v2 = pd_value + details->pd_offset[2];
	156	global const double *w2 = pd_weight + details->pd_offset[2];
	157	#endif
	158	#if MAX_PD>1
	159	int n1=details->pd_length[1];
	160	int i1=(pd_start/details->pd_stride[1])%n1;
	161	const int p1=details->pd_par[1];
	162	global const double *v1 = pd_value + details->pd_offset[1];
	163	global const double *w1 = pd_weight + details->pd_offset[1];
	164	#endif
	165	#if MAX_PD>0
	166	int n0=details->pd_length[0];
	167	int i0=(pd_start/details->pd_stride[0])%n0;
	168	const int p0=details->pd_par[0];
	169	global const double *v0 = pd_value + details->pd_offset[0];
	170	global const double *w0 = pd_weight + details->pd_offset[0];
[bde38b5]	171	//printf("w0:%p, values:%p, diff:%ld, %d\n",w0,values,(w0-values), NUM_VALUES);
[9eb3632]	172	#endif
[2e44ac7]	173
[5ff1b03]	174
[9eb3632]	175	#if MAX_PD>0
[0f00d95]	176	const int theta_par = details->theta_par;
[9eb3632]	177	const int fast_theta = (theta_par == p0);
	178	const int slow_theta = (theta_par >= 0 && !fast_theta);
[0f00d95]	179	double spherical_correction = 1.0;
[32e3c9b]	180	#else
[0f00d95]	181	// Note: if not polydisperse the weights cancel and we don't need the
	182	// spherical correction.
	183	const double spherical_correction = 1.0;
[32e3c9b]	184	#endif
[3044216]	185
[9eb3632]	186	int step = pd_start;
[ae2b6b5]	187
[9eb3632]	188	#if MAX_PD>4
	189	const double weight5 = 1.0;
	190	while (i4 < n4) {
[bde38b5]	191	local_values.vector[p4] = v4[i4];
[9eb3632]	192	double weight4 = w4[i4] * weight5;
[bde38b5]	193	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, local_values.vector[p4], weight4);
[9eb3632]	194	#elif MAX_PD>3
	195	const double weight4 = 1.0;
	196	#endif
	197	#if MAX_PD>3
	198	while (i3 < n3) {
[bde38b5]	199	local_values.vector[p3] = v3[i3];
[9eb3632]	200	double weight3 = w3[i3] * weight4;
[bde38b5]	201	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, local_values.vector[p3], weight3);
[9eb3632]	202	#elif MAX_PD>2
	203	const double weight3 = 1.0;
	204	#endif
	205	#if MAX_PD>2
	206	while (i2 < n2) {
[bde38b5]	207	local_values.vector[p2] = v2[i2];
[9eb3632]	208	double weight2 = w2[i2] * weight3;
[bde38b5]	209	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, local_values.vector[p2], weight2);
[9eb3632]	210	#elif MAX_PD>1
	211	const double weight2 = 1.0;
	212	#endif
	213	#if MAX_PD>1
	214	while (i1 < n1) {
[bde38b5]	215	local_values.vector[p1] = v1[i1];
[9eb3632]	216	double weight1 = w1[i1] * weight2;
[bde38b5]	217	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, local_values.vector[p1], weight1);
[9eb3632]	218	#elif MAX_PD>0
	219	const double weight1 = 1.0;
	220	#endif
	221	#if MAX_PD>0
[0f00d95]	222	if (slow_theta) { // Theta is not in inner loop
[bde38b5]	223	spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[theta_par])), 1.e-6);
[0f00d95]	224	}
[9eb3632]	225	while(i0 < n0) {
[bde38b5]	226	local_values.vector[p0] = v0[i0];
[9eb3632]	227	double weight0 = w0[i0] * weight1;
[bde38b5]	228	//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, local_values.vector[p0], weight0);
[9eb3632]	229	if (fast_theta) { // Theta is in inner loop
[bde38b5]	230	spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[p0])), 1.e-6);
[5ff1b03]	231	}
[9eb3632]	232	#else
	233	const double weight0 = 1.0;
	234	#endif
[5ff1b03]	235
[bde38b5]	236	//printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");
[9eb3632]	237	//printf("sphcor: %g\n", spherical_correction);
[ae2b6b5]	238
[3044216]	239	#ifdef INVALID
[bde38b5]	240	if (!INVALID(local_values.table))
[3044216]	241	#endif
[9eb3632]	242	{
	243	// Accumulate I(q)
	244	// Note: weight==0 must always be excluded
	245	if (weight0 > cutoff) {
[a3a0c5c]	246	// spherical correction is set at a minimum of 1e-6, otherwise there
	247	// would be problems looking at models with theta=90.
[9eb3632]	248	const double weight = weight0 * spherical_correction;
[bde38b5]	249	pd_norm += weight * CALL_VOLUME(local_values.table);
[9eb3632]	250
	251	#ifdef USE_OPENMP
	252	#pragma omp parallel for
	253	#endif
	254	for (int q_index=0; q_index<nq; q_index++) {
[a4280bd]	255	#if defined(MAGNETIC) && NUM_MAGNETIC > 0
[9eb3632]	256	const double qx = q[2*q_index];
	257	const double qy = q[2*q_index+1];
	258	const double qsq = qxqx + qyqy;
	259
	260	// Constant across orientation, polydispersity for given qx, qy
[a4280bd]	261	double scattering = 0.0;
	262	// TODO: what is the magnetic scattering at q=0
[9eb3632]	263	if (qsq > 1.e-16) {
[bde38b5]	264	double p[4]; // dd, du, ud, uu
[a4280bd]	265	p[0] = (qycos_mspin + qxsin_mspin)/qsq;
	266	p[3] = -p[0];
	267	p[1] = p[2] = (qysin_mspin - qxcos_mspin)/qsq;
[9eb3632]	268
[a4280bd]	269	for (int index=0; index<4; index++) {
	270	const double xs = spins[index];
	271	if (xs > 1.e-8) {
	272	const int spin_flip = (index==1) \|\| (index==2);
	273	const double pk = p[index];
	274	for (int axis=0; axis<=spin_flip; axis++) {
	275	#define M1 NUM_PARS+5
	276	#define M2 NUM_PARS+8
	277	#define M3 NUM_PARS+13
	278	#define SLD(_M_offset, _sld_offset) \
[bde38b5]	279	local_values.vector[_sld_offset] = xs * (axis \
[a4280bd]	280	? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \
	281	: mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \
	282	(spin_flip ? 0.0 : values[_sld_offset+2])))
	283	#if NUM_MAGNETIC==1
	284	SLD(M1, MAGNETIC_PAR1);
	285	#elif NUM_MAGNETIC==2
	286	SLD(M1, MAGNETIC_PAR1);
	287	SLD(M2, MAGNETIC_PAR2);
	288	#elif NUM_MAGNETIC==3
	289	SLD(M1, MAGNETIC_PAR1);
	290	SLD(M2, MAGNETIC_PAR2);
	291	SLD(M3, MAGNETIC_PAR3);
	292	#else
	293	for (int sk=0; sk<NUM_MAGNETIC; sk++) {
	294	SLD(M1+3*sk, slds[sk]);
	295	}
	296	#endif
[bde38b5]	297	scattering += CALL_IQ(q, q_index, local_values.table);
[a4280bd]	298	}
	299	}
[9eb3632]	300	}
[32e3c9b]	301	}
[9eb3632]	302	#else // !MAGNETIC
[bde38b5]	303	const double scattering = CALL_IQ(q, q_index, local_values.table);
[9eb3632]	304	#endif // !MAGNETIC
	305	//printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0);
	306	result[q_index] += weight * scattering;
[32e3c9b]	307	}
[3044216]	308	}
[03cac08]	309	}
[9eb3632]	310	++step;
	311	#if MAX_PD>0
	312	if (step >= pd_stop) break;
	313	++i0;
[2e44ac7]	314	}
[9eb3632]	315	i0 = 0;
	316	#endif
	317	#if MAX_PD>1
	318	if (step >= pd_stop) break;
	319	++i1;
	320	}
	321	i1 = 0;
	322	#endif
	323	#if MAX_PD>2
	324	if (step >= pd_stop) break;
	325	++i2;
[2e44ac7]	326	}
[9eb3632]	327	i2 = 0;
	328	#endif
	329	#if MAX_PD>3
	330	if (step >= pd_stop) break;
	331	++i3;
	332	}
	333	i3 = 0;
	334	#endif
	335	#if MAX_PD>4
	336	if (step >= pd_stop) break;
	337	++i4;
	338	}
	339	i4 = 0;
	340	#endif
[f2f67a6]	341
[9eb3632]	342	//printf("res: %g/%g\n", result[0], pd_norm);
[f2f67a6]	343	// Remember the updated norm.
[a738209]	344	result[nq] = pd_norm;
[2e44ac7]	345	}

Note: See TracBrowser for help on using the repository browser.

SasView

source: sasmodels/sasmodels/kernel_iq.c @ b32caab

Download in other formats: