[2e44ac7] | 1 | |
---|
| 2 | /* |
---|
| 3 | ########################################################## |
---|
| 4 | # # |
---|
| 5 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # |
---|
| 6 | # !! !! # |
---|
| 7 | # !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! # |
---|
| 8 | # !! !! # |
---|
| 9 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # |
---|
| 10 | # # |
---|
| 11 | ########################################################## |
---|
| 12 | */ |
---|
| 13 | |
---|
[03cac08] | 14 | #ifndef _PAR_BLOCK_ // protected block so we can include this code twice. |
---|
| 15 | #define _PAR_BLOCK_ |
---|
[2e44ac7] | 16 | |
---|
| 17 | #define MAX_PD 4 // MAX_PD is the max number of polydisperse parameters |
---|
| 18 | |
---|
| 19 | typedef struct { |
---|
[a6f9577] | 20 | int32_t pd_par[MAX_PD]; // id of the nth polydispersity variable |
---|
[5cf3c33] | 21 | int32_t pd_length[MAX_PD]; // length of the nth polydispersity weight vector |
---|
[0a7e5eb4] | 22 | int32_t pd_offset[MAX_PD]; // offset of pd weights in the value & weight vector |
---|
[5cf3c33] | 23 | int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level |
---|
| 24 | int32_t pd_isvol[MAX_PD]; // True if parameter is a volume weighting parameter |
---|
[0a7e5eb4] | 25 | int32_t par_offset[NPARS]; // offset of par values in the value & weight vector |
---|
[5cf3c33] | 26 | int32_t par_coord[NPARS]; // polydispersity coordination bitvector |
---|
[a6f9577] | 27 | int32_t fast_coord_pars[NPARS]; // ids of the fast coordination parameters |
---|
[5cf3c33] | 28 | int32_t fast_coord_count; // number of parameters coordinated with pd 1 |
---|
[0a7e5eb4] | 29 | int32_t theta_par; // id of spherical correction variable |
---|
[2e44ac7] | 30 | } ProblemDetails; |
---|
| 31 | |
---|
| 32 | typedef struct { |
---|
[03cac08] | 33 | PARAMETER_TABLE; |
---|
[2e44ac7] | 34 | } ParameterBlock; |
---|
[03cac08] | 35 | #endif |
---|
| 36 | |
---|
[2e44ac7] | 37 | |
---|
[03cac08] | 38 | kernel |
---|
| 39 | void KERNEL_NAME( |
---|
[5cf3c33] | 40 | int32_t nq, // number of q values |
---|
| 41 | const int32_t pd_start, // where we are in the polydispersity loop |
---|
| 42 | const int32_t pd_stop, // where we are stopping in the polydispersity loop |
---|
[2e44ac7] | 43 | global const ProblemDetails *problem, |
---|
| 44 | global const double *weights, |
---|
[0a7e5eb4] | 45 | global const double *values, |
---|
[2e44ac7] | 46 | global const double *q, // nq q values, with padding to boundary |
---|
[03cac08] | 47 | global double *result, // nq+3 return values, again with padding |
---|
[303d8d6] | 48 | const double cutoff // cutoff in the polydispersity weight product |
---|
[2e44ac7] | 49 | ) |
---|
| 50 | { |
---|
[10ddb64] | 51 | // Storage for the current parameter values. These will be updated as we |
---|
| 52 | // walk the polydispersity cube. |
---|
[0a7e5eb4] | 53 | local ParameterBlock local_values; // current parameter values |
---|
| 54 | double *pvec = (double *)(&local_values); // Alias named parameters with a vector |
---|
[2e44ac7] | 55 | |
---|
[a6f9577] | 56 | local int offset[NPARS]; // NPARS excludes scale/background |
---|
[f9245d4] | 57 | |
---|
[e1bdc7e] | 58 | #if 1 // defined(USE_SHORTCUT_OPTIMIZATION) |
---|
| 59 | if (problem->pd_length[0] == 1) { |
---|
[3044216] | 60 | // Shouldn't need to copy!! |
---|
[39cc3be] | 61 | |
---|
[3044216] | 62 | for (int k=0; k < NPARS; k++) { |
---|
[0a7e5eb4] | 63 | pvec[k] = values[k+2]; // skip scale and background |
---|
[3044216] | 64 | } |
---|
| 65 | |
---|
[0a7e5eb4] | 66 | const double volume = CALL_VOLUME(local_values); |
---|
[3044216] | 67 | #ifdef USE_OPENMP |
---|
| 68 | #pragma omp parallel for |
---|
| 69 | #endif |
---|
| 70 | for (int i=0; i < nq; i++) { |
---|
[0a7e5eb4] | 71 | const double scattering = CALL_IQ(q, i, local_values); |
---|
| 72 | result[i] = values[0]*scattering/volume + values[1]; |
---|
[3044216] | 73 | } |
---|
| 74 | return; |
---|
| 75 | } |
---|
[e1bdc7e] | 76 | printf("falling through\n"); |
---|
[3044216] | 77 | #endif |
---|
| 78 | |
---|
| 79 | |
---|
[10ddb64] | 80 | // Since we are no longer looping over the entire polydispersity hypercube |
---|
| 81 | // for each q, we need to track the normalization values for each q in a |
---|
| 82 | // separate work vector. |
---|
[3044216] | 83 | double norm; // contains sum over weights |
---|
| 84 | double vol; // contains sum over volume |
---|
| 85 | double norm_vol; // contains weights over volume |
---|
[10ddb64] | 86 | |
---|
[2e44ac7] | 87 | // Initialize the results to zero |
---|
| 88 | if (pd_start == 0) { |
---|
[3044216] | 89 | norm_vol = 0.0; |
---|
| 90 | norm = 0.0; |
---|
| 91 | vol = 0.0; |
---|
| 92 | |
---|
[2e44ac7] | 93 | #ifdef USE_OPENMP |
---|
| 94 | #pragma omp parallel for |
---|
| 95 | #endif |
---|
[3044216] | 96 | for (int i=0; i < nq; i++) { |
---|
[2e44ac7] | 97 | result[i] = 0.0; |
---|
| 98 | } |
---|
[3044216] | 99 | } else { |
---|
| 100 | //Pulling values from previous segment |
---|
| 101 | norm = result[nq]; |
---|
| 102 | vol = result[nq+1]; |
---|
[03cac08] | 103 | norm_vol = result[nq+2]; |
---|
[2e44ac7] | 104 | } |
---|
| 105 | |
---|
[3044216] | 106 | // Location in the polydispersity hypercube, one index per dimension. |
---|
[03cac08] | 107 | local int pd_index[MAX_PD]; |
---|
[a10da8b] | 108 | |
---|
[f9245d4] | 109 | // Trigger the reset behaviour that happens at the end the fast loop |
---|
| 110 | // by setting the initial index >= weight vector length. |
---|
[03cac08] | 111 | pd_index[0] = problem->pd_length[0]; |
---|
| 112 | |
---|
[2e44ac7] | 113 | |
---|
[3044216] | 114 | // need product of weights at every Iq calc, so keep product of |
---|
| 115 | // weights from the outer loops so that weight = partial_weight * fast_weight |
---|
| 116 | double partial_weight = NAN; // product of weight w4*w3*w2 but not w1 |
---|
[f78a2a1] | 117 | double partial_volweight = NAN; |
---|
| 118 | double weight = 1.0; // set to 1 in case there are no weights |
---|
| 119 | double vol_weight = 1.0; // set to 1 in case there are no vol weights |
---|
[208f0a4] | 120 | double spherical_correction = 1.0; // correction for latitude variation |
---|
[3044216] | 121 | |
---|
[2e44ac7] | 122 | // Loop over the weights then loop over q, accumulating values |
---|
| 123 | for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { |
---|
| 124 | // check if indices need to be updated |
---|
[03cac08] | 125 | if (pd_index[0] >= problem->pd_length[0]) { |
---|
[208f0a4] | 126 | |
---|
| 127 | // RESET INDICES |
---|
[03cac08] | 128 | pd_index[0] = loop_index%problem->pd_length[0]; |
---|
[2e44ac7] | 129 | partial_weight = 1.0; |
---|
[f78a2a1] | 130 | partial_volweight = 1.0; |
---|
[3044216] | 131 | for (int k=1; k < MAX_PD; k++) { |
---|
[03cac08] | 132 | pd_index[k] = (loop_index%problem->pd_length[k])/problem->pd_stride[k]; |
---|
| 133 | const double wi = weights[problem->pd_offset[0]+pd_index[0]]; |
---|
[f78a2a1] | 134 | partial_weight *= wi; |
---|
[03cac08] | 135 | if (problem->pd_isvol[k]) partial_volweight *= wi; |
---|
[2e44ac7] | 136 | } |
---|
| 137 | for (int k=0; k < NPARS; k++) { |
---|
[03cac08] | 138 | int coord = problem->par_coord[k]; |
---|
| 139 | int this_offset = problem->par_offset[k]; |
---|
[2e44ac7] | 140 | int block_size = 1; |
---|
| 141 | for (int bit=0; bit < MAX_PD && coord != 0; bit++) { |
---|
| 142 | if (coord&1) { |
---|
| 143 | this_offset += block_size * pd_index[bit]; |
---|
[03cac08] | 144 | block_size *= problem->pd_length[bit]; |
---|
[2e44ac7] | 145 | } |
---|
[a10da8b] | 146 | coord /= 2; |
---|
[2e44ac7] | 147 | } |
---|
| 148 | offset[k] = this_offset; |
---|
[0a7e5eb4] | 149 | pvec[k] = values[this_offset]; |
---|
[03cac08] | 150 | } |
---|
| 151 | weight = partial_weight * weights[problem->pd_offset[0]+pd_index[0]]; |
---|
[0a7e5eb4] | 152 | if (problem->theta_par >= 0) { |
---|
| 153 | spherical_correction = fabs(cos(M_PI_180*pvec[problem->theta_par])); |
---|
[2e44ac7] | 154 | } |
---|
[0a7e5eb4] | 155 | if (problem->theta_par == problem->pd_par[0]) { |
---|
[03cac08] | 156 | weight *= spherical_correction; |
---|
[208f0a4] | 157 | } |
---|
| 158 | |
---|
[2e44ac7] | 159 | } else { |
---|
[208f0a4] | 160 | |
---|
| 161 | // INCREMENT INDICES |
---|
[2e44ac7] | 162 | pd_index[0] += 1; |
---|
[03cac08] | 163 | const double wi = weights[problem->pd_offset[0]+pd_index[0]]; |
---|
[f78a2a1] | 164 | weight = partial_weight*wi; |
---|
[03cac08] | 165 | if (problem->pd_isvol[0]) vol_weight *= wi; |
---|
[2e44ac7] | 166 | for (int k=0; k < problem->fast_coord_count; k++) { |
---|
[a6f9577] | 167 | pvec[problem->fast_coord_pars[k]] |
---|
[0a7e5eb4] | 168 | = values[offset[problem->fast_coord_pars[k]]++]; |
---|
[03cac08] | 169 | } |
---|
[0a7e5eb4] | 170 | if (problem->theta_par ==problem->pd_par[0]) { |
---|
| 171 | weight *= fabs(cos(M_PI_180*pvec[problem->theta_par])); |
---|
[2e44ac7] | 172 | } |
---|
| 173 | } |
---|
[208f0a4] | 174 | |
---|
[3044216] | 175 | #ifdef INVALID |
---|
[0a7e5eb4] | 176 | if (INVALID(local_values)) continue; |
---|
[3044216] | 177 | #endif |
---|
[208f0a4] | 178 | |
---|
[303d8d6] | 179 | // Accumulate I(q) |
---|
| 180 | // Note: weight==0 must always be excluded |
---|
[10ddb64] | 181 | if (weight > cutoff) { |
---|
[3044216] | 182 | norm += weight; |
---|
[0a7e5eb4] | 183 | vol += vol_weight * CALL_VOLUME(local_values); |
---|
[3044216] | 184 | norm_vol += vol_weight; |
---|
| 185 | |
---|
[10ddb64] | 186 | #ifdef USE_OPENMP |
---|
| 187 | #pragma omp parallel for |
---|
| 188 | #endif |
---|
[3044216] | 189 | for (int i=0; i < nq; i++) { |
---|
[0a7e5eb4] | 190 | const double scattering = CALL_IQ(q, i, local_values); |
---|
[3044216] | 191 | result[i] += weight*scattering; |
---|
| 192 | } |
---|
[03cac08] | 193 | } |
---|
[2e44ac7] | 194 | } |
---|
[3044216] | 195 | //Makes a normalization avialable for the next round |
---|
| 196 | result[nq] = norm; |
---|
| 197 | result[nq+1] = vol; |
---|
[f35f1dd] | 198 | result[nq+2] = norm_vol; |
---|
[2e44ac7] | 199 | |
---|
[3044216] | 200 | //End of the PD loop we can normalize |
---|
[03cac08] | 201 | if (pd_stop >= problem->pd_stride[MAX_PD-1]) { |
---|
[2e44ac7] | 202 | #ifdef USE_OPENMP |
---|
| 203 | #pragma omp parallel for |
---|
| 204 | #endif |
---|
[3044216] | 205 | for (int i=0; i < nq; i++) { |
---|
| 206 | if (vol*norm_vol != 0.0) { |
---|
| 207 | result[i] *= norm_vol/vol; |
---|
[2e44ac7] | 208 | } |
---|
[0a7e5eb4] | 209 | result[i] = values[0]*result[i]/norm + values[1]; |
---|
[2e44ac7] | 210 | } |
---|
| 211 | } |
---|
| 212 | } |
---|