Changeset 9ee2756 in sasmodels for sasmodels/kernel_iq.c
- Timestamp:
- Oct 19, 2017 12:31:30 PM (7 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 2c108a3
- Parents:
- 94f4543
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel_iq.c
r94f4543 r9ee2756 1 2 1 /* 3 2 ########################################################## … … 11 10 ########################################################## 12 11 */ 12 13 // NOTE: the following macros are defined in generate.py: 14 // 15 // MAX_PD : the maximum number of dispersity loops allowed 16 // NUM_PARS : the number of parameters in the parameter table 17 // NUM_VALUES : the number of values to skip at the start of the 18 // values array before you get to the dispersity values. 19 // NUM_MAGNETIC : the number of magnetic parameters 20 // PARAMETER_TABLE : list of parameter declarations used to create the 21 // ParameterTable type. 22 // KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic. This code is 23 // included three times, once for each kernel type. 24 // MAGNETIC_PARS : a comma-separated list of indices to the sld 25 // parameters in the parameter table. 26 // MAGNETIC_PAR1, ... : the first few indices of slds in the parameter table. 27 // MAGNETIC : defined when the magnetic kernel is being instantiated 28 // CALL_VOLUME(table) : call the form volume function 29 // CALL_IQ(q, table) : call the Iq function for 1D calcs. 30 // CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data. 31 // CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes 32 // CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes 33 // INVALID(table) : test if the current point is feesible to calculate. This 34 // will be defined in the kernel definition file. 13 35 14 36 #ifndef _PAR_BLOCK_ // protected block so we can include this code twice. … … 38 60 #endif // _PAR_BLOCK_ 39 61 40 41 #if defined(MAGNETIC) && NUM_MAGNETIC>0 62 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 63 // ===== Helper functions for magnetism ===== 42 64 43 65 // Return value restricted between low and high … … 63 85 } 64 86 87 // Compute the magnetic sld 65 88 static double mag_sld(double qx, double qy, double p, 66 89 double mx, double my, double sld) … … 69 92 return sld + perp*p; 70 93 } 71 //#endif // MAGNETIC 72 73 // TODO: way too hackish 74 // For the 1D kernel, CALL_IQ_[A,AC,ABC] and MAGNETIC are not defined 75 // so view_direct *IS NOT* included 76 // For the 2D kernel, CALL_IQ_[A,AC,ABC] is defined but MAGNETIC is not 77 // so view_direct *IS* included 78 // For the magnetic kernel, CALL_IQ_[A,AC,ABC] is defined, but so is MAGNETIC 79 // so view_direct *IS NOT* included 80 #else // !MAGNETIC 81 82 // ===== Implement jitter in orientation ===== 94 95 #endif 96 97 // ===== Helper functions for orientation and jitter ===== 98 83 99 // To change the definition of the angles, run explore/angles.py, which 84 100 // uses sympy to generate the equations. 85 101 86 #if defined(CALL_IQ_AC) // oriented symmetric 87 static double 88 view_direct(double qx, double qy, 89 double theta, double phi, 90 ParameterTable table) 102 #if !defined(_QAC_SECTION) && defined(CALL_IQ_AC) 103 #define _QAC_SECTION 104 105 typedef struct { 106 double R31, R32; 107 } QACRotation; 108 109 // Fill in the rotation matrix R from the view angles (theta, phi) and the 110 // jitter angles (dtheta, dphi). This matrix can be applied to all of the 111 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]' 112 static void 113 qac_rotation( 114 QACRotation *rotation, 115 double theta, double phi, 116 double dtheta, double dphi) 91 117 { 92 118 double sin_theta, cos_theta; 93 119 double sin_phi, cos_phi; 94 120 95 // reverse view 121 // reverse view matrix 96 122 SINCOS(theta*M_PI_180, sin_theta, cos_theta); 97 123 SINCOS(phi*M_PI_180, sin_phi, cos_phi); 98 const double qa = qx*cos_phi*cos_theta + qy*sin_phi*cos_theta; 99 const double qb = -qx*sin_phi + qy*cos_phi; 100 const double qc = qx*sin_theta*cos_phi + qy*sin_phi*sin_theta; 101 102 // reverse jitter after view 103 SINCOS(table.theta*M_PI_180, sin_theta, cos_theta); 104 SINCOS(table.phi*M_PI_180, sin_phi, cos_phi); 105 const double dqc = qa*sin_theta - qb*sin_phi*cos_theta + qc*cos_phi*cos_theta; 106 124 const double V11 = cos_phi*cos_theta; 125 const double V12 = sin_phi*cos_theta; 126 const double V21 = -sin_phi; 127 const double V22 = cos_phi; 128 const double V31 = sin_theta*cos_phi; 129 const double V32 = sin_phi*sin_theta; 130 131 // reverse jitter matrix 132 SINCOS(dtheta*M_PI_180, sin_theta, cos_theta); 133 SINCOS(dphi*M_PI_180, sin_phi, cos_phi); 134 const double J31 = sin_theta; 135 const double J32 = -sin_phi*cos_theta; 136 const double J33 = cos_phi*cos_theta; 137 138 // reverse matrix 139 rotation->R31 = J31*V11 + J32*V21 + J33*V31; 140 rotation->R32 = J31*V12 + J32*V22 + J33*V32; 141 } 142 143 // Apply the rotation matrix returned from qac_rotation to the point (qx,qy), 144 // returning R*[qx,qy]' = [qa,qc]' 145 static double 146 qac_apply( 147 QACRotation rotation, 148 double qx, double qy, 149 double *qa_out, double *qc_out) 150 { 151 const double dqc = rotation.R31*qx + rotation.R32*qy; 107 152 // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2 108 153 const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy); 109 154 110 return CALL_IQ_AC(dqa, dqc, table); 111 } 112 113 #elif defined(CALL_IQ_ABC) // oriented asymmetric 114 115 static double 116 view_direct(double qx, double qy, 117 double theta, double phi, double psi, 118 ParameterTable table) 119 { 120 double sin_theta, cos_theta; 121 double sin_phi, cos_phi; 122 double sin_psi, cos_psi; 123 124 // reverse view 125 SINCOS(theta*M_PI_180, sin_theta, cos_theta); 126 SINCOS(phi*M_PI_180, sin_phi, cos_phi); 127 SINCOS(psi*M_PI_180, sin_psi, cos_psi); 128 const double qa = qx*(-sin_phi*sin_psi + cos_phi*cos_psi*cos_theta) + qy*(sin_phi*cos_psi*cos_theta + sin_psi*cos_phi); 129 const double qb = qx*(-sin_phi*cos_psi - sin_psi*cos_phi*cos_theta) + qy*(-sin_phi*sin_psi*cos_theta + cos_phi*cos_psi); 130 const double qc = qx*sin_theta*cos_phi + qy*sin_phi*sin_theta; 131 132 // reverse jitter after view 133 SINCOS(table.theta*M_PI_180, sin_theta, cos_theta); 134 SINCOS(table.phi*M_PI_180, sin_phi, cos_phi); 135 SINCOS(table.psi*M_PI_180, sin_psi, cos_psi); 136 const double dqa = qa*cos_psi*cos_theta + qb*(sin_phi*sin_theta*cos_psi + sin_psi*cos_phi) + qc*(sin_phi*sin_psi - sin_theta*cos_phi*cos_psi); 137 const double dqb = -qa*sin_psi*cos_theta + qb*(-sin_phi*sin_psi*sin_theta + cos_phi*cos_psi) + qc*(sin_phi*cos_psi + sin_psi*sin_theta*cos_phi); 138 const double dqc = qa*sin_theta - qb*sin_phi*cos_theta + qc*cos_phi*cos_theta; 139 140 return CALL_IQ_ABC(dqa, dqb, dqc, table); 141 } 142 /* TODO: use precalculated jitter for faster 2D calcs on DLL. 155 *qa_out = dqa; 156 *qc_out = dqc; 157 } 158 #endif // _QAC_SECTION 159 160 #if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC) 161 #define _QABC_SECTION 162 163 typedef struct { 164 double R11, R12; 165 double R21, R22; 166 double R31, R32; 167 } QABCRotation; 168 169 // Fill in the rotation matrix R from the view angles (theta, phi, psi) and the 170 // jitter angles (dtheta, dphi, dpsi). This matrix can be applied to all of the 171 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]' 143 172 static void 144 view_precalc( 173 qabc_rotation( 174 QABCRotation *rotation, 145 175 double theta, double phi, double psi, 146 ParameterTable table, 147 double *R11, double *R12, double *R21, 148 double *R22, double *R31, double *R32) 176 double dtheta, double dphi, double dpsi) 149 177 { 150 178 double sin_theta, cos_theta; … … 156 184 SINCOS(phi*M_PI_180, sin_phi, cos_phi); 157 185 SINCOS(psi*M_PI_180, sin_psi, cos_psi); 158 const double V11 = sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;159 const double V12 = sin_phi*cos_psi*cos_theta -sin_psi*cos_phi;160 const double V21 = -sin_phi*cos_psi +sin_psi*cos_phi*cos_theta;161 const double V22 = sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;186 const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta; 187 const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi; 188 const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta; 189 const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi; 162 190 const double V31 = sin_theta*cos_phi; 163 191 const double V32 = sin_phi*sin_theta; 164 192 165 193 // reverse jitter matrix 166 SINCOS( table.theta*M_PI_180, sin_theta, cos_theta);167 SINCOS( table.phi*M_PI_180, sin_phi, cos_phi);168 SINCOS( table.psi*M_PI_180, sin_psi, cos_psi);194 SINCOS(dtheta*M_PI_180, sin_theta, cos_theta); 195 SINCOS(dphi*M_PI_180, sin_phi, cos_phi); 196 SINCOS(dpsi*M_PI_180, sin_psi, cos_psi); 169 197 const double J11 = cos_psi*cos_theta; 170 const double J12 = sin_phi*sin_theta*cos_psi -sin_psi*cos_phi;171 const double J13 = -sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;172 const double J21 = sin_psi*cos_theta;173 const double J22 = sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;174 const double J23 = sin_phi*cos_psi -sin_psi*sin_theta*cos_phi;198 const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi; 199 const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi; 200 const double J21 = -sin_psi*cos_theta; 201 const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi; 202 const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi; 175 203 const double J31 = sin_theta; 176 204 const double J32 = -sin_phi*cos_theta; … … 178 206 179 207 // reverse matrix 180 *R11 = J11*V11 + J12*V21 + J13*V31; 181 *R12 = J11*V12 + J12*V22 + J13*V32; 182 *R21 = J21*V11 + J22*V21 + J23*V31; 183 *R22 = J21*V12 + J22*V22 + J23*V32; 184 *R31 = J31*V11 + J32*V21 + J33*V31; 185 *R32 = J31*V12 + J32*V22 + J33*V32; 186 } 187 208 rotation->R11 = J11*V11 + J12*V21 + J13*V31; 209 rotation->R12 = J11*V12 + J12*V22 + J13*V32; 210 rotation->R21 = J21*V11 + J22*V21 + J23*V31; 211 rotation->R22 = J21*V12 + J22*V22 + J23*V32; 212 rotation->R31 = J31*V11 + J32*V21 + J33*V31; 213 rotation->R32 = J31*V12 + J32*V22 + J33*V32; 214 } 215 216 // Apply the rotation matrix returned from qabc_rotation to the point (qx,qy), 217 // returning R*[qx,qy]' = [qa,qb,qc]' 188 218 static double 189 view_apply(double qx, double qy, 190 double R11, double R12, double R21, double R22, double R31, double R32,191 ParameterTable table)192 { 193 const double dqa = R11*qx + R12*qy; 194 const double dqb = R21*qx + R22*qy;195 const double dqc = R31*qx + R32*qy;196 197 CALL_IQ_ABC(dqa, dqb, dqc, table); 198 } 199 */ 200 #endif 201 202 #endif // !MAGNETIC 219 qabc_apply( 220 QABCRotation rotation, 221 double qx, double qy, 222 double *qa_out, double *qb_out, double *qc_out) 223 { 224 *qa_out = rotation.R11*qx + rotation.R12*qy; 225 *qb_out = rotation.R21*qx + rotation.R22*qy; 226 *qc_out = rotation.R31*qx + rotation.R32*qy; 227 } 228 229 #endif // _QABC_SECTION 230 231 232 // ==================== KERNEL CODE ======================== 203 233 204 234 kernel … … 214 244 ) 215 245 { 246 #ifdef USE_OPENCL 247 // who we are and what element we are working with 248 const int q_index = get_global_id(0); 249 if (q_index >= nq) return; 250 #else 251 // Define q_index here so that debugging statements can be written to work 252 // for both OpenCL and DLL using: 253 // if (q_index == 0) {printf(...);} 254 int q_index = 0; 255 #endif 256 216 257 // Storage for the current parameter values. These will be updated as we 217 258 // walk the polydispersity cube. … … 225 266 #endif 226 267 227 // TODO: could precompute these outside of the kernel.228 268 // Interpret polarization cross section. 229 269 // up_frac_i = values[NUM_PARS+2]; 230 270 // up_frac_f = values[NUM_PARS+3]; 231 271 // up_angle = values[NUM_PARS+4]; 272 // TODO: could precompute more magnetism parameters before calling the kernel. 232 273 double spins[4]; 233 274 double cos_mspin, sin_mspin; … … 235 276 SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin); 236 277 #endif // MAGNETIC 237 238 #if defined(CALL_IQ_AC) // oriented symmetric239 const double theta = values[details->theta_par+2];240 const double phi = values[details->theta_par+3];241 #elif defined(CALL_IQ_ABC) // oriented asymmetric242 const double theta = values[details->theta_par+2];243 const double phi = values[details->theta_par+3];244 const double psi = values[details->theta_par+4];245 #endif246 278 247 279 // Fill in the initial variables … … 253 285 for (int i=0; i < NUM_PARS; i++) { 254 286 local_values.vector[i] = values[2+i]; 255 // printf("p%d = %g\n",i, local_values.vector[i]);287 //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]); 256 288 } 257 //printf("NUM_VALUES:%d NUM_PARS:%d MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD); 258 //printf("start:%d stop:%d\n", pd_start, pd_stop); 259 260 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 261 if (pd_start == 0) { 262 #ifdef USE_OPENMP 263 #pragma omp parallel for 264 #endif 265 for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 289 //if (q_index==0) printf("NUM_VALUES:%d NUM_PARS:%d MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD); 290 //if (q_index==0) printf("start:%d stop:%d\n", pd_start, pd_stop); 291 292 // If pd_start is zero that means that we are starting a new calculation, 293 // and must initialize the result to zero. Otherwise, we are restarting 294 // the calculation from somewhere in the middle of the dispersity mesh, 295 // and we update the value rather than reset it. Similarly for the 296 // normalization factor, which is stored as the final value in the 297 // results vector (one past the number of q values). 298 // 299 // The code differs slightly between opencl and dll since opencl is only 300 // seeing one q value (stored in the variable "this_result") while the dll 301 // version must loop over all q. 302 #ifdef USE_OPENCL 303 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 304 double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 305 #else // !USE_OPENCL 306 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 307 if (pd_start == 0) { 308 #ifdef USE_OPENMP 309 #pragma omp parallel for 310 #endif 311 for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 312 } 313 //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 314 #endif // !USE_OPENCL 315 316 317 // ====== macros to set up the parts of the loop ======= 318 /* 319 Based on the level of the loop, uses C preprocessor magic to construct 320 level-specific looping variables, including these from loop level 3: 321 322 int n3 : length of loop for mesh level 3 323 int i3 : current position in the loop for level 3, which is calculated 324 from a combination of pd_start, pd_stride[3] and pd_length[3]. 325 int p3 : is the index into the parameter table for mesh level 3 326 double v3[] : pointer into dispersity array to values for loop 3 327 double w3[] : pointer into dispersity array to weights for loop 3 328 double weight3 : the product of weights from levels 3 and up, computed 329 as weight5*weight4*w3[i3]. Note that we need an outermost 330 value weight5 set to 1.0 for this to work properly. 331 332 After expansion, the loop struction will look like the following: 333 334 // --- PD_INIT(4) --- 335 const int n4 = pd_length[4]; 336 const int p4 = pd_par[4]; 337 global const double *v4 = pd_value + pd_offset[4]; 338 global const double *w4 = pd_weight + pd_offset[4]; 339 int i4 = (pd_start/pd_stride[4])%n4; // position in level 4 at pd_start 340 341 // --- PD_INIT(3) --- 342 const int n3 = pd_length[3]; 343 ... 344 int i3 = (pd_start/pd_stride[3])%n3; // position in level 3 at pd_start 345 346 PD_INIT(2) 347 PD_INIT(1) 348 PD_INIT(0) 349 350 // --- PD_OUTERMOST_WEIGHT(5) --- 351 const double weight5 = 1.0; 352 353 // --- PD_OPEN(4,5) --- 354 while (i4 < n4) { 355 parameter[p4] = v4[i4]; // set the value for pd parameter 4 at this mesh point 356 const double weight4 = w4[i4] * weight5; 357 358 // from PD_OPEN(3,4) 359 while (i3 < n3) { 360 parameter[p3] = v3[i3]; // set the value for pd parameter 3 at this mesh point 361 const double weight3 = w3[i3] * weight4; 362 363 PD_OPEN(3,2) 364 PD_OPEN(2,1) 365 PD_OPEN(0,1) 366 367 ... main loop body ... 368 369 ++step; // increment counter representing position in dispersity mesh 370 371 PD_CLOSE(0) 372 PD_CLOSE(1) 373 PD_CLOSE(2) 374 375 // --- PD_CLOSE(3) --- 376 if (step >= pd_stop) break; 377 ++i3; 378 } 379 i3 = 0; // reset loop counter for next round through the loop 380 381 // --- PD_CLOSE(4) --- 382 if (step >= pd_stop) break; 383 ++i4; 266 384 } 267 //printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 268 385 i4 = 0; // reset loop counter even though no more rounds through the loop 386 387 */ 388 389 // Define looping variables 390 #define PD_INIT(_LOOP) \ 391 const int n##_LOOP = details->pd_length[_LOOP]; \ 392 const int p##_LOOP = details->pd_par[_LOOP]; \ 393 global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 394 global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 395 int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 396 397 // Jump into the middle of the polydispersity loop 398 #define PD_OPEN(_LOOP,_OUTER) \ 399 while (i##_LOOP < n##_LOOP) { \ 400 local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \ 401 const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER; 402 403 // create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD). 404 #define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0; 405 #define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n) 406 407 // Close out the loop 408 #define PD_CLOSE(_LOOP) \ 409 if (step >= pd_stop) break; \ 410 ++i##_LOOP; \ 411 } \ 412 i##_LOOP = 0; 413 414 // The main loop body is essentially: 415 // 416 // BUILD_ROTATION // construct the rotation matrix qxy => qabc 417 // for each q 418 // FETCH_Q // set qx,qy from the q input vector 419 // APPLY_ROTATION // convert qx,qy to qa,qb,qc 420 // CALL_KERNEL // scattering = Iqxy(qa, qb, qc, p1, p2, ...) 421 // 422 // Depending on the shape type (radial, axial, triaxial), the variables 423 // and calling parameters will be slightly different. These macros 424 // capture the differences in one spot so the rest of the code is easier 425 // to read. 426 #if defined(CALL_IQ) 427 // unoriented 1D 428 double qk; 429 #define FETCH_Q do { qk = q[q_index]; } while (0) 430 #define BUILD_ROTATION do {} while(0) 431 #define APPLY_ROTATION do {} while(0) 432 #define CALL_KERNEL CALL_IQ(qk, local_values.table) 433 434 #elif defined(CALL_IQ_A) 435 // unoriented 2D 436 double qx, qy; 437 #define FETCH_Q do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 438 #define BUILD_ROTATION do {} while(0) 439 #define APPLY_ROTATION do {} while(0) 440 #define CALL_KERNEL CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table) 441 442 #elif defined(CALL_IQ_AC) 443 // oriented symmetric 2D 444 double qx, qy; 445 #define FETCH_Q do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 446 double qa, qc; 447 QACRotation rotation; 448 // Grab the "view" angles (theta, phi) from the initial parameter table. 449 // These are separate from the "jitter" angles (dtheta, dphi) which are 450 // stored with the dispersity values and copied to the local parameter 451 // table as we go through the mesh. 452 const double theta = values[details->theta_par+2]; 453 const double phi = values[details->theta_par+3]; 454 #define BUILD_ROTATION qac_rotation(&rotation, \ 455 theta, phi, local_values.table.theta, local_values.table.phi) 456 #define APPLY_ROTATION qac_apply(rotation, qx, qy, &qa, &qc) 457 #define CALL_KERNEL CALL_IQ_AC(qa, qc, local_values.table) 458 459 #elif defined(CALL_IQ_ABC) 460 // oriented asymmetric 2D 461 double qx, qy; 462 #define FETCH_Q do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 463 double qa, qb, qc; 464 QABCRotation rotation; 465 // Grab the "view" angles (theta, phi, psi) from the initial parameter table. 466 // These are separate from the "jitter" angles (dtheta, dphi, dpsi) which are 467 // stored with the dispersity values and copied to the local parameter 468 // table as we go through the mesh. 469 const double theta = values[details->theta_par+2]; 470 const double phi = values[details->theta_par+3]; 471 const double psi = values[details->theta_par+4]; 472 #define BUILD_ROTATION qabc_rotation(&rotation, \ 473 theta, phi, psi, local_values.table.theta, \ 474 local_values.table.phi, local_values.table.psi) 475 #define APPLY_ROTATION qabc_apply(rotation, qx, qy, &qa, &qb, &qc) 476 #define CALL_KERNEL CALL_IQ_ABC(qa, qb, qc, local_values.table) 477 478 #endif 479 480 // ====== construct the loops ======= 481 482 // Pointers to the start of the dispersity and weight vectors, if needed. 269 483 #if MAX_PD>0 270 484 global const double *pd_value = values + NUM_VALUES; … … 272 486 #endif 273 487 274 // Jump into the middle of the polydispersity loop 488 // The variable "step" is the current position in the dispersity loop. 489 // It will be incremented each time a new point in the mesh is accumulated, 490 // and used to test whether we have reached pd_stop. 491 int step = pd_start; 492 493 // define looping variables 275 494 #if MAX_PD>4 276 int n4=details->pd_length[4]; 277 int i4=(pd_start/details->pd_stride[4])%n4; 278 const int p4=details->pd_par[4]; 279 global const double *v4 = pd_value + details->pd_offset[4]; 280 global const double *w4 = pd_weight + details->pd_offset[4]; 495 PD_INIT(4) 281 496 #endif 282 497 #if MAX_PD>3 283 int n3=details->pd_length[3]; 284 int i3=(pd_start/details->pd_stride[3])%n3; 285 const int p3=details->pd_par[3]; 286 global const double *v3 = pd_value + details->pd_offset[3]; 287 global const double *w3 = pd_weight + details->pd_offset[3]; 288 //printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES); 498 PD_INIT(3) 289 499 #endif 290 500 #if MAX_PD>2 291 int n2=details->pd_length[2]; 292 int i2=(pd_start/details->pd_stride[2])%n2; 293 const int p2=details->pd_par[2]; 294 global const double *v2 = pd_value + details->pd_offset[2]; 295 global const double *w2 = pd_weight + details->pd_offset[2]; 501 PD_INIT(2) 296 502 #endif 297 503 #if MAX_PD>1 298 int n1=details->pd_length[1]; 299 int i1=(pd_start/details->pd_stride[1])%n1; 300 const int p1=details->pd_par[1]; 301 global const double *v1 = pd_value + details->pd_offset[1]; 302 global const double *w1 = pd_weight + details->pd_offset[1]; 504 PD_INIT(1) 303 505 #endif 304 506 #if MAX_PD>0 305 int n0=details->pd_length[0]; 306 int i0=(pd_start/details->pd_stride[0])%n0; 307 const int p0=details->pd_par[0]; 308 global const double *v0 = pd_value + details->pd_offset[0]; 309 global const double *w0 = pd_weight + details->pd_offset[0]; 310 //printf("w0:%p, values:%p, diff:%ld, %d\n",w0,values,(w0-values), NUM_VALUES); 311 #endif 312 313 314 int step = pd_start; 315 507 PD_INIT(0) 508 #endif 509 510 // open nested loops 511 PD_OUTERMOST_WEIGHT(MAX_PD) 316 512 #if MAX_PD>4 317 const double weight5 = 1.0; 318 while (i4 < n4) { 319 local_values.vector[p4] = v4[i4]; 320 double weight4 = w4[i4] * weight5; 321 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, local_values.vector[p4], weight4); 322 #elif MAX_PD>3 323 const double weight4 = 1.0; 513 PD_OPEN(4,5) 324 514 #endif 325 515 #if MAX_PD>3 326 while (i3 < n3) { 327 local_values.vector[p3] = v3[i3]; 328 double weight3 = w3[i3] * weight4; 329 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, local_values.vector[p3], weight3); 330 #elif MAX_PD>2 331 const double weight3 = 1.0; 516 PD_OPEN(3,4) 332 517 #endif 333 518 #if MAX_PD>2 334 while (i2 < n2) { 335 local_values.vector[p2] = v2[i2]; 336 double weight2 = w2[i2] * weight3; 337 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, local_values.vector[p2], weight2); 338 #elif MAX_PD>1 339 const double weight2 = 1.0; 519 PD_OPEN(2,3) 340 520 #endif 341 521 #if MAX_PD>1 342 while (i1 < n1) { 343 local_values.vector[p1] = v1[i1]; 344 double weight1 = w1[i1] * weight2; 345 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, local_values.vector[p1], weight1); 346 #elif MAX_PD>0 347 const double weight1 = 1.0; 522 PD_OPEN(1,2) 348 523 #endif 349 524 #if MAX_PD>0 350 while(i0 < n0) { 351 local_values.vector[p0] = v0[i0]; 352 double weight0 = w0[i0] * weight1; 353 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, local_values.vector[p0], weight0); 354 #else 355 const double weight0 = 1.0; 356 #endif 357 358 //printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n"); 359 //printf("sphcor: %g\n", spherical_correction); 360 361 #ifdef INVALID 362 if (!INVALID(local_values.table)) 363 #endif 364 { 365 // Accumulate I(q) 366 // Note: weight==0 must always be excluded 367 if (weight0 > cutoff) { 368 pd_norm += weight0 * CALL_VOLUME(local_values.table); 369 370 #ifdef USE_OPENMP 371 #pragma omp parallel for 372 #endif 373 for (int q_index=0; q_index<nq; q_index++) { 374 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 375 const double qx = q[2*q_index]; 376 const double qy = q[2*q_index+1]; 377 const double qsq = qx*qx + qy*qy; 378 379 // Constant across orientation, polydispersity for given qx, qy 380 double scattering = 0.0; 381 // TODO: what is the magnetic scattering at q=0 382 if (qsq > 1.e-16) { 383 double p[4]; // dd, du, ud, uu 384 p[0] = (qy*cos_mspin + qx*sin_mspin)/qsq; 385 p[3] = -p[0]; 386 p[1] = p[2] = (qy*sin_mspin - qx*cos_mspin)/qsq; 387 388 for (int index=0; index<4; index++) { 389 const double xs = spins[index]; 390 if (xs > 1.e-8) { 391 const int spin_flip = (index==1) || (index==2); 392 const double pk = p[index]; 393 for (int axis=0; axis<=spin_flip; axis++) { 394 #define M1 NUM_PARS+5 395 #define M2 NUM_PARS+8 396 #define M3 NUM_PARS+13 397 #define SLD(_M_offset, _sld_offset) \ 398 local_values.vector[_sld_offset] = xs * (axis \ 399 ? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \ 400 : mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \ 401 (spin_flip ? 0.0 : values[_sld_offset+2]))) 402 #if NUM_MAGNETIC==1 403 SLD(M1, MAGNETIC_PAR1); 404 #elif NUM_MAGNETIC==2 405 SLD(M1, MAGNETIC_PAR1); 406 SLD(M2, MAGNETIC_PAR2); 407 #elif NUM_MAGNETIC==3 408 SLD(M1, MAGNETIC_PAR1); 409 SLD(M2, MAGNETIC_PAR2); 410 SLD(M3, MAGNETIC_PAR3); 411 #else 525 PD_OPEN(0,1) 526 #endif 527 528 529 //if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");} 530 531 // ====== loop body ======= 532 #ifdef INVALID 533 if (!INVALID(local_values.table)) 534 #endif 535 { 536 // Accumulate I(q) 537 // Note: weight==0 must always be excluded 538 if (weight0 > cutoff) { 539 pd_norm += weight0 * CALL_VOLUME(local_values.table); 540 BUILD_ROTATION; 541 542 #ifndef USE_OPENCL 543 // DLL needs to explicitly loop over the q values. 544 #ifdef USE_OPENMP 545 #pragma omp parallel for 546 #endif 547 for (q_index=0; q_index<nq; q_index++) 548 #endif // !USE_OPENCL 549 { 550 551 FETCH_Q; 552 APPLY_ROTATION; 553 554 // ======= COMPUTE SCATTERING ========== 555 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 556 // Constant across orientation, polydispersity for given qx, qy 557 double scattering = 0.0; 558 // TODO: what is the magnetic scattering at q=0 559 const double qsq = qx*qx + qy*qy; 560 if (qsq > 1.e-16) { 561 double p[4]; // dd, du, ud, uu 562 p[0] = (qy*cos_mspin + qx*sin_mspin)/qsq; 563 p[3] = -p[0]; 564 p[1] = p[2] = (qy*sin_mspin - qx*cos_mspin)/qsq; 565 566 for (int index=0; index<4; index++) { 567 const double xs = spins[index]; 568 if (xs > 1.e-8) { 569 const int spin_flip = (index==1) || (index==2); 570 const double pk = p[index]; 571 for (int axis=0; axis<=spin_flip; axis++) { 572 #define SLD(_M_offset, _sld_offset) \ 573 local_values.vector[_sld_offset] = xs * (axis \ 574 ? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \ 575 : mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \ 576 (spin_flip ? 0.0 : values[_sld_offset+2]))) 577 #define M1 NUM_PARS+5 578 #if NUM_MAGNETIC==1 579 SLD(M1, MAGNETIC_PAR1); 580 #elif NUM_MAGNETIC==2 581 SLD(M1, MAGNETIC_PAR1); 582 SLD(M1+3, MAGNETIC_PAR2); 583 #elif NUM_MAGNETIC==3 584 SLD(M1, MAGNETIC_PAR1); 585 SLD(M1+3, MAGNETIC_PAR2); 586 SLD(M1+6, MAGNETIC_PAR3); 587 #else 412 588 for (int sk=0; sk<NUM_MAGNETIC; sk++) { 413 589 SLD(M1+3*sk, slds[sk]); 414 590 } 415 #endif 416 # if defined(CALL_IQ_A) // unoriented 417 scattering += CALL_IQ_A(sqrt(qsq), local_values.table); 418 # elif defined(CALL_IQ_AC) // oriented symmetric 419 scattering += view_direct(qx, qy, theta, phi, local_values.table); 420 # elif defined(CALL_IQ_ABC) // oriented asymmetric 421 scattering += view_direct(qx, qy, theta, phi, psi, local_values.table); 422 # endif 423 } 591 #endif 592 #undef SLD 593 scattering += CALL_KERNEL; 424 594 } 425 595 } 426 596 } 427 #elif defined(CALL_IQ) // 1d, not MAGNETIC428 const double scattering = CALL_IQ(q[q_index], local_values.table);429 #else // 2d data, not magnetic430 const double qx = q[2*q_index];431 const double qy = q[2*q_index+1];432 # if defined(CALL_IQ_A) // unoriented433 const double scattering = CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table);434 # elif defined(CALL_IQ_AC) // oriented symmetric435 const double scattering = view_direct(qx, qy, theta, phi, local_values.table);436 # elif defined(CALL_IQ_ABC) // oriented asymmetric437 const double scattering = view_direct(qx, qy, theta, phi, psi, local_values.table);438 # endif439 #endif // !MAGNETIC440 //printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0);441 result[q_index] += weight0 * scattering;442 597 } 598 #else // !MAGNETIC 599 const double scattering = CALL_KERNEL; 600 #endif // !MAGNETIC 601 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight, weight0); 602 603 #ifdef USE_OPENCL 604 this_result += weight0 * scattering; 605 #else // !USE_OPENCL 606 result[q_index] += weight0 * scattering; 607 #endif // !USE_OPENCL 443 608 } 444 609 } 445 ++step; 610 } 611 612 // close nested loops 613 ++step; 446 614 #if MAX_PD>0 447 if (step >= pd_stop) break; 448 ++i0; 449 } 450 i0 = 0; 615 PD_CLOSE(0) 451 616 #endif 452 617 #if MAX_PD>1 453 if (step >= pd_stop) break; 454 ++i1; 455 } 456 i1 = 0; 618 PD_CLOSE(1) 457 619 #endif 458 620 #if MAX_PD>2 459 if (step >= pd_stop) break; 460 ++i2; 461 } 462 i2 = 0; 621 PD_CLOSE(2) 463 622 #endif 464 623 #if MAX_PD>3 465 if (step >= pd_stop) break; 466 ++i3; 467 } 468 i3 = 0; 624 PD_CLOSE(3) 469 625 #endif 470 626 #if MAX_PD>4 471 if (step >= pd_stop) break; 472 ++i4; 473 } 474 i4 = 0; 475 #endif 476 627 PD_CLOSE(4) 628 #endif 629 630 // clear the macros in preparation for the next kernel 631 #undef PD_INIT 632 #undef PD_OPEN 633 #undef PD_CLOSE 634 #undef FETCH_Q 635 #undef BUILD_ROTATION 636 #undef APPLY_ROTATION 637 #undef CALL_KERNEL 638 639 // Remember the current result and the updated norm. 640 #ifdef USE_OPENCL 641 result[q_index] = this_result; 642 if (q_index == 0) result[nq] = pd_norm; 643 //if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 644 #else // !USE_OPENCL 645 result[nq] = pd_norm; 477 646 //printf("res: %g/%g\n", result[0], pd_norm); 478 // Remember the updated norm. 479 result[nq] = pd_norm; 480 } 647 #endif // !USE_OPENCL 648 }
Note: See TracChangeset
for help on using the changeset viewer.