/* ########################################################## # # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # # !! !! # # !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! # # !! !! # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # # # ########################################################## */ #ifndef _PAR_BLOCK_ // protected block so we can include this code twice. #define _PAR_BLOCK_ #define MAX_PD 4 // MAX_PD is the max number of polydisperse parameters typedef struct { int32_t pd_par[MAX_PD]; // id of the nth polydispersity variable int32_t pd_length[MAX_PD]; // length of the nth polydispersity weight vector int32_t pd_offset[MAX_PD]; // offset of pd weights in the value & weight vector int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level int32_t pd_isvol[MAX_PD]; // True if parameter is a volume weighting parameter int32_t par_offset[NPARS]; // offset of par values in the value & weight vector int32_t par_coord[NPARS]; // polydispersity coordination bitvector int32_t fast_coord_pars[NPARS]; // ids of the fast coordination parameters int32_t fast_coord_count; // number of parameters coordinated with pd 1 int32_t theta_par; // id of spherical correction variable } ProblemDetails; typedef struct { PARAMETER_TABLE; } ParameterBlock; #endif kernel void KERNEL_NAME( int32_t nq, // number of q values const int32_t pd_start, // where we are in the polydispersity loop const int32_t pd_stop, // where we are stopping in the polydispersity loop global const ProblemDetails *problem, global const double *weights, global const double *values, global const double *q, // nq q values, with padding to boundary global double *result, // nq+3 return values, again with padding const double cutoff // cutoff in the polydispersity weight product ) { // Storage for the current parameter values. These will be updated as we // walk the polydispersity cube. local ParameterBlock local_values; // current parameter values double *pvec = (double *)(&local_values); // Alias named parameters with a vector local int offset[NPARS]; // NPARS excludes scale/background #if 1 // defined(USE_SHORTCUT_OPTIMIZATION) if (problem->pd_length[0] == 1) { // Shouldn't need to copy!! for (int k=0; k < NPARS; k++) { pvec[k] = values[k+2]; // skip scale and background } const double volume = CALL_VOLUME(local_values); #ifdef USE_OPENMP #pragma omp parallel for #endif for (int i=0; i < nq; i++) { const double scattering = CALL_IQ(q, i, local_values); result[i] = values[0]*scattering/volume + values[1]; } return; } printf("falling through\n"); #endif // Since we are no longer looping over the entire polydispersity hypercube // for each q, we need to track the normalization values for each q in a // separate work vector. double norm; // contains sum over weights double vol; // contains sum over volume double norm_vol; // contains weights over volume // Initialize the results to zero if (pd_start == 0) { norm_vol = 0.0; norm = 0.0; vol = 0.0; #ifdef USE_OPENMP #pragma omp parallel for #endif for (int i=0; i < nq; i++) { result[i] = 0.0; } } else { //Pulling values from previous segment norm = result[nq]; vol = result[nq+1]; norm_vol = result[nq+2]; } // Location in the polydispersity hypercube, one index per dimension. local int pd_index[MAX_PD]; // Trigger the reset behaviour that happens at the end the fast loop // by setting the initial index >= weight vector length. pd_index[0] = problem->pd_length[0]; // need product of weights at every Iq calc, so keep product of // weights from the outer loops so that weight = partial_weight * fast_weight double partial_weight = NAN; // product of weight w4*w3*w2 but not w1 double partial_volweight = NAN; double weight = 1.0; // set to 1 in case there are no weights double vol_weight = 1.0; // set to 1 in case there are no vol weights double spherical_correction = 1.0; // correction for latitude variation // Loop over the weights then loop over q, accumulating values for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { // check if indices need to be updated if (pd_index[0] >= problem->pd_length[0]) { // RESET INDICES pd_index[0] = loop_index%problem->pd_length[0]; partial_weight = 1.0; partial_volweight = 1.0; for (int k=1; k < MAX_PD; k++) { pd_index[k] = (loop_index%problem->pd_length[k])/problem->pd_stride[k]; const double wi = weights[problem->pd_offset[0]+pd_index[0]]; partial_weight *= wi; if (problem->pd_isvol[k]) partial_volweight *= wi; } for (int k=0; k < NPARS; k++) { int coord = problem->par_coord[k]; int this_offset = problem->par_offset[k]; int block_size = 1; for (int bit=0; bit < MAX_PD && coord != 0; bit++) { if (coord&1) { this_offset += block_size * pd_index[bit]; block_size *= problem->pd_length[bit]; } coord /= 2; } offset[k] = this_offset; pvec[k] = values[this_offset]; } weight = partial_weight * weights[problem->pd_offset[0]+pd_index[0]]; if (problem->theta_par >= 0) { spherical_correction = fabs(cos(M_PI_180*pvec[problem->theta_par])); } if (problem->theta_par == problem->pd_par[0]) { weight *= spherical_correction; } } else { // INCREMENT INDICES pd_index[0] += 1; const double wi = weights[problem->pd_offset[0]+pd_index[0]]; weight = partial_weight*wi; if (problem->pd_isvol[0]) vol_weight *= wi; for (int k=0; k < problem->fast_coord_count; k++) { pvec[problem->fast_coord_pars[k]] = values[offset[problem->fast_coord_pars[k]]++]; } if (problem->theta_par ==problem->pd_par[0]) { weight *= fabs(cos(M_PI_180*pvec[problem->theta_par])); } } #ifdef INVALID if (INVALID(local_values)) continue; #endif // Accumulate I(q) // Note: weight==0 must always be excluded if (weight > cutoff) { norm += weight; vol += vol_weight * CALL_VOLUME(local_values); norm_vol += vol_weight; #ifdef USE_OPENMP #pragma omp parallel for #endif for (int i=0; i < nq; i++) { const double scattering = CALL_IQ(q, i, local_values); result[i] += weight*scattering; } } } //Makes a normalization avialable for the next round result[nq] = norm; result[nq+1] = vol; result[nq+2] = norm_vol; //End of the PD loop we can normalize if (pd_stop >= problem->pd_stride[MAX_PD-1]) { #ifdef USE_OPENMP #pragma omp parallel for #endif for (int i=0; i < nq; i++) { if (vol*norm_vol != 0.0) { result[i] *= norm_vol/vol; } result[i] = values[0]*result[i]/norm + values[1]; } } }