Changes in sasmodels/kernel_iq.c [bde38b5:2c108a3] in sasmodels
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernel_iq.c
rbde38b5 r2c108a3 1 2 1 /* 3 2 ########################################################## … … 12 11 */ 13 12 13 // NOTE: the following macros are defined in generate.py: 14 // 15 // MAX_PD : the maximum number of dispersity loops allowed 16 // NUM_PARS : the number of parameters in the parameter table 17 // NUM_VALUES : the number of values to skip at the start of the 18 // values array before you get to the dispersity values. 19 // PARAMETER_TABLE : list of parameter declarations used to create the 20 // ParameterTable type. 21 // KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic. This code is 22 // included three times, once for each kernel type. 23 // MAGNETIC : defined when the magnetic kernel is being instantiated 24 // NUM_MAGNETIC : the number of magnetic parameters 25 // MAGNETIC_PARS : a comma-separated list of indices to the sld 26 // parameters in the parameter table. 27 // CALL_VOLUME(table) : call the form volume function 28 // CALL_IQ(q, table) : call the Iq function for 1D calcs. 29 // CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data. 30 // CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes 31 // CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes 32 // INVALID(table) : test if the current point is feesible to calculate. This 33 // will be defined in the kernel definition file. 34 14 35 #ifndef _PAR_BLOCK_ // protected block so we can include this code twice. 15 36 #define _PAR_BLOCK_ … … 17 38 typedef struct { 18 39 #if MAX_PD > 0 19 int32_t pd_par[MAX_PD]; // id of the nth polydispersity variable20 int32_t pd_length[MAX_PD]; // length of the nth polydispersity weight vector40 int32_t pd_par[MAX_PD]; // id of the nth dispersity variable 41 int32_t pd_length[MAX_PD]; // length of the nth dispersity weight vector 21 42 int32_t pd_offset[MAX_PD]; // offset of pd weights in the value & weight vector 22 43 int32_t pd_stride[MAX_PD]; // stride to move to the next index at this level … … 25 46 int32_t num_weights; // total length of the weights vector 26 47 int32_t num_active; // number of non-trivial pd loops 27 int32_t theta_par; // id of spherical correction variable48 int32_t theta_par; // id of first orientation variable 28 49 } ProblemDetails; 29 50 … … 38 59 #endif // _PAR_BLOCK_ 39 60 40 41 #if defined(MAGNETIC) && NUM_MAGNETIC>0 61 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 62 // ===== Helper functions for magnetism ===== 42 63 43 64 // Return value restricted between low and high … … 51 72 // uu * (sld - m_sigma_x); 52 73 // dd * (sld + m_sigma_x); 53 // ud * (m_sigma_y + 1j*m_sigma_z); 54 // du * (m_sigma_y - 1j*m_sigma_z); 55 static void set_spins(double in_spin, double out_spin, double spins[4]) 74 // ud * (m_sigma_y - 1j*m_sigma_z); 75 // du * (m_sigma_y + 1j*m_sigma_z); 76 // weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag 77 static void set_spin_weights(double in_spin, double out_spin, double spins[4]) 56 78 { 57 79 in_spin = clip(in_spin, 0.0, 1.0); 58 80 out_spin = clip(out_spin, 0.0, 1.0); 59 81 spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd 60 spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin)); // du 61 spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin))); // ud 82 spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin)); // du real 83 spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin))); // ud real 62 84 spins[3] = sqrt(sqrt(in_spin * out_spin)); // uu 63 } 64 65 static double mag_sld(double qx, double qy, double p, 66 double mx, double my, double sld) 67 { 85 spins[4] = spins[1]; // du imag 86 spins[5] = spins[2]; // ud imag 87 } 88 89 // Compute the magnetic sld 90 static double mag_sld( 91 const int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag 92 const double qx, const double qy, 93 const double px, const double py, 94 const double sld, 95 const double mx, const double my, const double mz 96 ) 97 { 98 if (xs < 4) { 68 99 const double perp = qy*mx - qx*my; 69 return sld + perp*p; 70 } 71 72 #endif // MAGNETIC 100 switch (xs) { 101 case 0: // uu => sld - D M_perpx 102 return sld - px*perp; 103 case 1: // ud real => -D M_perpy 104 return py*perp; 105 case 2: // du real => -D M_perpy 106 return py*perp; 107 case 3: // dd real => sld + D M_perpx 108 return sld + px*perp; 109 } 110 } else { 111 if (xs== 4) { 112 return -mz; // ud imag => -D M_perpz 113 } else { // index == 5 114 return mz; // du imag => D M_perpz 115 } 116 } 117 } 118 119 120 #endif 121 122 // ===== Helper functions for orientation and jitter ===== 123 124 // To change the definition of the angles, run explore/angles.py, which 125 // uses sympy to generate the equations. 126 127 #if !defined(_QAC_SECTION) && defined(CALL_IQ_AC) 128 #define _QAC_SECTION 129 130 typedef struct { 131 double R31, R32; 132 } QACRotation; 133 134 // Fill in the rotation matrix R from the view angles (theta, phi) and the 135 // jitter angles (dtheta, dphi). This matrix can be applied to all of the 136 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]' 137 static void 138 qac_rotation( 139 QACRotation *rotation, 140 double theta, double phi, 141 double dtheta, double dphi) 142 { 143 double sin_theta, cos_theta; 144 double sin_phi, cos_phi; 145 146 // reverse view matrix 147 SINCOS(theta*M_PI_180, sin_theta, cos_theta); 148 SINCOS(phi*M_PI_180, sin_phi, cos_phi); 149 const double V11 = cos_phi*cos_theta; 150 const double V12 = sin_phi*cos_theta; 151 const double V21 = -sin_phi; 152 const double V22 = cos_phi; 153 const double V31 = sin_theta*cos_phi; 154 const double V32 = sin_phi*sin_theta; 155 156 // reverse jitter matrix 157 SINCOS(dtheta*M_PI_180, sin_theta, cos_theta); 158 SINCOS(dphi*M_PI_180, sin_phi, cos_phi); 159 const double J31 = sin_theta; 160 const double J32 = -sin_phi*cos_theta; 161 const double J33 = cos_phi*cos_theta; 162 163 // reverse matrix 164 rotation->R31 = J31*V11 + J32*V21 + J33*V31; 165 rotation->R32 = J31*V12 + J32*V22 + J33*V32; 166 } 167 168 // Apply the rotation matrix returned from qac_rotation to the point (qx,qy), 169 // returning R*[qx,qy]' = [qa,qc]' 170 static double 171 qac_apply( 172 QACRotation rotation, 173 double qx, double qy, 174 double *qa_out, double *qc_out) 175 { 176 const double dqc = rotation.R31*qx + rotation.R32*qy; 177 // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2 178 const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy); 179 180 *qa_out = dqa; 181 *qc_out = dqc; 182 } 183 #endif // _QAC_SECTION 184 185 #if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC) 186 #define _QABC_SECTION 187 188 typedef struct { 189 double R11, R12; 190 double R21, R22; 191 double R31, R32; 192 } QABCRotation; 193 194 // Fill in the rotation matrix R from the view angles (theta, phi, psi) and the 195 // jitter angles (dtheta, dphi, dpsi). This matrix can be applied to all of the 196 // (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]' 197 static void 198 qabc_rotation( 199 QABCRotation *rotation, 200 double theta, double phi, double psi, 201 double dtheta, double dphi, double dpsi) 202 { 203 double sin_theta, cos_theta; 204 double sin_phi, cos_phi; 205 double sin_psi, cos_psi; 206 207 // reverse view matrix 208 SINCOS(theta*M_PI_180, sin_theta, cos_theta); 209 SINCOS(phi*M_PI_180, sin_phi, cos_phi); 210 SINCOS(psi*M_PI_180, sin_psi, cos_psi); 211 const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta; 212 const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi; 213 const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta; 214 const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi; 215 const double V31 = sin_theta*cos_phi; 216 const double V32 = sin_phi*sin_theta; 217 218 // reverse jitter matrix 219 SINCOS(dtheta*M_PI_180, sin_theta, cos_theta); 220 SINCOS(dphi*M_PI_180, sin_phi, cos_phi); 221 SINCOS(dpsi*M_PI_180, sin_psi, cos_psi); 222 const double J11 = cos_psi*cos_theta; 223 const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi; 224 const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi; 225 const double J21 = -sin_psi*cos_theta; 226 const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi; 227 const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi; 228 const double J31 = sin_theta; 229 const double J32 = -sin_phi*cos_theta; 230 const double J33 = cos_phi*cos_theta; 231 232 // reverse matrix 233 rotation->R11 = J11*V11 + J12*V21 + J13*V31; 234 rotation->R12 = J11*V12 + J12*V22 + J13*V32; 235 rotation->R21 = J21*V11 + J22*V21 + J23*V31; 236 rotation->R22 = J21*V12 + J22*V22 + J23*V32; 237 rotation->R31 = J31*V11 + J32*V21 + J33*V31; 238 rotation->R32 = J31*V12 + J32*V22 + J33*V32; 239 } 240 241 // Apply the rotation matrix returned from qabc_rotation to the point (qx,qy), 242 // returning R*[qx,qy]' = [qa,qb,qc]' 243 static double 244 qabc_apply( 245 QABCRotation rotation, 246 double qx, double qy, 247 double *qa_out, double *qb_out, double *qc_out) 248 { 249 *qa_out = rotation.R11*qx + rotation.R12*qy; 250 *qb_out = rotation.R21*qx + rotation.R22*qy; 251 *qc_out = rotation.R31*qx + rotation.R32*qy; 252 } 253 254 #endif // _QABC_SECTION 255 256 257 // ==================== KERNEL CODE ======================== 73 258 74 259 kernel 75 260 void KERNEL_NAME( 76 261 int32_t nq, // number of q values 77 const int32_t pd_start, // where we are in the polydispersity loop78 const int32_t pd_stop, // where we are stopping in the polydispersity loop262 const int32_t pd_start, // where we are in the dispersity loop 263 const int32_t pd_stop, // where we are stopping in the dispersity loop 79 264 global const ProblemDetails *details, 80 265 global const double *values, 81 266 global const double *q, // nq q values, with padding to boundary 82 267 global double *result, // nq+1 return values, again with padding 83 const double cutoff // cutoff in the polydispersity weight product268 const double cutoff // cutoff in the dispersity weight product 84 269 ) 85 270 { 271 #ifdef USE_OPENCL 272 // who we are and what element we are working with 273 const int q_index = get_global_id(0); 274 if (q_index >= nq) return; 275 #else 276 // Define q_index here so that debugging statements can be written to work 277 // for both OpenCL and DLL using: 278 // if (q_index == 0) {printf(...);} 279 int q_index = 0; 280 #endif 281 86 282 // Storage for the current parameter values. These will be updated as we 87 // walk the polydispersity cube.283 // walk the dispersity mesh. 88 284 ParameterBlock local_values; 89 285 … … 91 287 // Location of the sld parameters in the parameter vector. 92 288 // These parameters are updated with the effective sld due to magnetism. 93 #if NUM_MAGNETIC > 394 289 const int32_t slds[] = { MAGNETIC_PARS }; 95 #endif 96 97 // TODO: could precompute these outside of the kernel. 290 98 291 // Interpret polarization cross section. 99 292 // up_frac_i = values[NUM_PARS+2]; 100 293 // up_frac_f = values[NUM_PARS+3]; 101 294 // up_angle = values[NUM_PARS+4]; 102 double spins[4]; 295 // TODO: could precompute more magnetism parameters before calling the kernel. 296 double spins[8]; // uu, ud real, du real, dd, ud imag, du imag, fill, fill 103 297 double cos_mspin, sin_mspin; 104 set_spin s(values[NUM_PARS+2], values[NUM_PARS+3], spins);298 set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], spins); 105 299 SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin); 106 300 #endif // MAGNETIC … … 114 308 for (int i=0; i < NUM_PARS; i++) { 115 309 local_values.vector[i] = values[2+i]; 116 // printf("p%d = %g\n",i, local_values.vector[i]);310 //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]); 117 311 } 118 //printf("NUM_VALUES:%d NUM_PARS:%d MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD); 119 //printf("start:%d stop:%d\n", pd_start, pd_stop); 120 121 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 122 if (pd_start == 0) { 123 #ifdef USE_OPENMP 124 #pragma omp parallel for 125 #endif 126 for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 312 //if (q_index==0) printf("NUM_VALUES:%d NUM_PARS:%d MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD); 313 //if (q_index==0) printf("start:%d stop:%d\n", pd_start, pd_stop); 314 315 // If pd_start is zero that means that we are starting a new calculation, 316 // and must initialize the result to zero. Otherwise, we are restarting 317 // the calculation from somewhere in the middle of the dispersity mesh, 318 // and we update the value rather than reset it. Similarly for the 319 // normalization factor, which is stored as the final value in the 320 // results vector (one past the number of q values). 321 // 322 // The code differs slightly between opencl and dll since opencl is only 323 // seeing one q value (stored in the variable "this_result") while the dll 324 // version must loop over all q. 325 #ifdef USE_OPENCL 326 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 327 double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 328 #else // !USE_OPENCL 329 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 330 if (pd_start == 0) { 331 #ifdef USE_OPENMP 332 #pragma omp parallel for 333 #endif 334 for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 335 } 336 //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 337 #endif // !USE_OPENCL 338 339 340 // ====== macros to set up the parts of the loop ======= 341 /* 342 Based on the level of the loop, uses C preprocessor magic to construct 343 level-specific looping variables, including these from loop level 3: 344 345 int n3 : length of loop for mesh level 3 346 int i3 : current position in the loop for level 3, which is calculated 347 from a combination of pd_start, pd_stride[3] and pd_length[3]. 348 int p3 : is the index into the parameter table for mesh level 3 349 double v3[] : pointer into dispersity array to values for loop 3 350 double w3[] : pointer into dispersity array to weights for loop 3 351 double weight3 : the product of weights from levels 3 and up, computed 352 as weight5*weight4*w3[i3]. Note that we need an outermost 353 value weight5 set to 1.0 for this to work properly. 354 355 After expansion, the loop struction will look like the following: 356 357 // --- PD_INIT(4) --- 358 const int n4 = pd_length[4]; 359 const int p4 = pd_par[4]; 360 global const double *v4 = pd_value + pd_offset[4]; 361 global const double *w4 = pd_weight + pd_offset[4]; 362 int i4 = (pd_start/pd_stride[4])%n4; // position in level 4 at pd_start 363 364 // --- PD_INIT(3) --- 365 const int n3 = pd_length[3]; 366 ... 367 int i3 = (pd_start/pd_stride[3])%n3; // position in level 3 at pd_start 368 369 PD_INIT(2) 370 PD_INIT(1) 371 PD_INIT(0) 372 373 // --- PD_OUTERMOST_WEIGHT(5) --- 374 const double weight5 = 1.0; 375 376 // --- PD_OPEN(4,5) --- 377 while (i4 < n4) { 378 parameter[p4] = v4[i4]; // set the value for pd parameter 4 at this mesh point 379 const double weight4 = w4[i4] * weight5; 380 381 // from PD_OPEN(3,4) 382 while (i3 < n3) { 383 parameter[p3] = v3[i3]; // set the value for pd parameter 3 at this mesh point 384 const double weight3 = w3[i3] * weight4; 385 386 PD_OPEN(3,2) 387 PD_OPEN(2,1) 388 PD_OPEN(0,1) 389 390 ... main loop body ... 391 392 ++step; // increment counter representing position in dispersity mesh 393 394 PD_CLOSE(0) 395 PD_CLOSE(1) 396 PD_CLOSE(2) 397 398 // --- PD_CLOSE(3) --- 399 if (step >= pd_stop) break; 400 ++i3; 401 } 402 i3 = 0; // reset loop counter for next round through the loop 403 404 // --- PD_CLOSE(4) --- 405 if (step >= pd_stop) break; 406 ++i4; 127 407 } 128 //printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 129 408 i4 = 0; // reset loop counter even though no more rounds through the loop 409 410 */ 411 412 // Define looping variables 413 #define PD_INIT(_LOOP) \ 414 const int n##_LOOP = details->pd_length[_LOOP]; \ 415 const int p##_LOOP = details->pd_par[_LOOP]; \ 416 global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 417 global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 418 int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 419 420 // Jump into the middle of the dispersity loop 421 #define PD_OPEN(_LOOP,_OUTER) \ 422 while (i##_LOOP < n##_LOOP) { \ 423 local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \ 424 const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER; 425 426 // create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD). 427 #define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0; 428 #define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n) 429 430 // Close out the loop 431 #define PD_CLOSE(_LOOP) \ 432 if (step >= pd_stop) break; \ 433 ++i##_LOOP; \ 434 } \ 435 i##_LOOP = 0; 436 437 // The main loop body is essentially: 438 // 439 // BUILD_ROTATION // construct the rotation matrix qxy => qabc 440 // for each q 441 // FETCH_Q // set qx,qy from the q input vector 442 // APPLY_ROTATION // convert qx,qy to qa,qb,qc 443 // CALL_KERNEL // scattering = Iqxy(qa, qb, qc, p1, p2, ...) 444 // 445 // Depending on the shape type (radial, axial, triaxial), the variables 446 // and calling parameters will be slightly different. These macros 447 // capture the differences in one spot so the rest of the code is easier 448 // to read. 449 #if defined(CALL_IQ) 450 // unoriented 1D 451 double qk; 452 #define FETCH_Q() do { qk = q[q_index]; } while (0) 453 #define BUILD_ROTATION() do {} while(0) 454 #define APPLY_ROTATION() do {} while(0) 455 #define CALL_KERNEL() CALL_IQ(qk, local_values.table) 456 457 #elif defined(CALL_IQ_A) 458 // unoriented 2D 459 double qx, qy; 460 #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 461 #define BUILD_ROTATION() do {} while(0) 462 #define APPLY_ROTATION() do {} while(0) 463 #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table) 464 465 #elif defined(CALL_IQ_AC) 466 // oriented symmetric 2D 467 double qx, qy; 468 #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 469 double qa, qc; 470 QACRotation rotation; 471 // Grab the "view" angles (theta, phi) from the initial parameter table. 472 // These are separate from the "jitter" angles (dtheta, dphi) which are 473 // stored with the dispersity values and copied to the local parameter 474 // table as we go through the mesh. 475 const double theta = values[details->theta_par+2]; 476 const double phi = values[details->theta_par+3]; 477 #define BUILD_ROTATION() qac_rotation(&rotation, \ 478 theta, phi, local_values.table.theta, local_values.table.phi) 479 #define APPLY_ROTATION() qac_apply(rotation, qx, qy, &qa, &qc) 480 #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table) 481 482 #elif defined(CALL_IQ_ABC) 483 // oriented asymmetric 2D 484 double qx, qy; 485 #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 486 double qa, qb, qc; 487 QABCRotation rotation; 488 // Grab the "view" angles (theta, phi, psi) from the initial parameter table. 489 // These are separate from the "jitter" angles (dtheta, dphi, dpsi) which are 490 // stored with the dispersity values and copied to the local parameter 491 // table as we go through the mesh. 492 const double theta = values[details->theta_par+2]; 493 const double phi = values[details->theta_par+3]; 494 const double psi = values[details->theta_par+4]; 495 #define BUILD_ROTATION() qabc_rotation(&rotation, \ 496 theta, phi, psi, local_values.table.theta, \ 497 local_values.table.phi, local_values.table.psi) 498 #define APPLY_ROTATION() qabc_apply(rotation, qx, qy, &qa, &qb, &qc) 499 #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table) 500 501 #endif 502 503 // ====== construct the loops ======= 504 505 // Pointers to the start of the dispersity and weight vectors, if needed. 130 506 #if MAX_PD>0 131 507 global const double *pd_value = values + NUM_VALUES; … … 133 509 #endif 134 510 135 // Jump into the middle of the polydispersity loop 511 // The variable "step" is the current position in the dispersity loop. 512 // It will be incremented each time a new point in the mesh is accumulated, 513 // and used to test whether we have reached pd_stop. 514 int step = pd_start; 515 516 // define looping variables 136 517 #if MAX_PD>4 137 int n4=details->pd_length[4]; 138 int i4=(pd_start/details->pd_stride[4])%n4; 139 const int p4=details->pd_par[4]; 140 global const double *v4 = pd_value + details->pd_offset[4]; 141 global const double *w4 = pd_weight + details->pd_offset[4]; 518 PD_INIT(4) 142 519 #endif 143 520 #if MAX_PD>3 144 int n3=details->pd_length[3]; 145 int i3=(pd_start/details->pd_stride[3])%n3; 146 const int p3=details->pd_par[3]; 147 global const double *v3 = pd_value + details->pd_offset[3]; 148 global const double *w3 = pd_weight + details->pd_offset[3]; 149 //printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES); 521 PD_INIT(3) 150 522 #endif 151 523 #if MAX_PD>2 152 int n2=details->pd_length[2]; 153 int i2=(pd_start/details->pd_stride[2])%n2; 154 const int p2=details->pd_par[2]; 155 global const double *v2 = pd_value + details->pd_offset[2]; 156 global const double *w2 = pd_weight + details->pd_offset[2]; 524 PD_INIT(2) 157 525 #endif 158 526 #if MAX_PD>1 159 int n1=details->pd_length[1]; 160 int i1=(pd_start/details->pd_stride[1])%n1; 161 const int p1=details->pd_par[1]; 162 global const double *v1 = pd_value + details->pd_offset[1]; 163 global const double *w1 = pd_weight + details->pd_offset[1]; 527 PD_INIT(1) 164 528 #endif 165 529 #if MAX_PD>0 166 int n0=details->pd_length[0]; 167 int i0=(pd_start/details->pd_stride[0])%n0; 168 const int p0=details->pd_par[0]; 169 global const double *v0 = pd_value + details->pd_offset[0]; 170 global const double *w0 = pd_weight + details->pd_offset[0]; 171 //printf("w0:%p, values:%p, diff:%ld, %d\n",w0,values,(w0-values), NUM_VALUES); 172 #endif 173 174 530 PD_INIT(0) 531 #endif 532 533 // open nested loops 534 PD_OUTERMOST_WEIGHT(MAX_PD) 535 #if MAX_PD>4 536 PD_OPEN(4,5) 537 #endif 538 #if MAX_PD>3 539 PD_OPEN(3,4) 540 #endif 541 #if MAX_PD>2 542 PD_OPEN(2,3) 543 #endif 544 #if MAX_PD>1 545 PD_OPEN(1,2) 546 #endif 175 547 #if MAX_PD>0 176 const int theta_par = details->theta_par; 177 const int fast_theta = (theta_par == p0); 178 const int slow_theta = (theta_par >= 0 && !fast_theta); 179 double spherical_correction = 1.0; 180 #else 181 // Note: if not polydisperse the weights cancel and we don't need the 182 // spherical correction. 183 const double spherical_correction = 1.0; 184 #endif 185 186 int step = pd_start; 187 188 #if MAX_PD>4 189 const double weight5 = 1.0; 190 while (i4 < n4) { 191 local_values.vector[p4] = v4[i4]; 192 double weight4 = w4[i4] * weight5; 193 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, local_values.vector[p4], weight4); 194 #elif MAX_PD>3 195 const double weight4 = 1.0; 196 #endif 197 #if MAX_PD>3 198 while (i3 < n3) { 199 local_values.vector[p3] = v3[i3]; 200 double weight3 = w3[i3] * weight4; 201 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, local_values.vector[p3], weight3); 202 #elif MAX_PD>2 203 const double weight3 = 1.0; 204 #endif 205 #if MAX_PD>2 206 while (i2 < n2) { 207 local_values.vector[p2] = v2[i2]; 208 double weight2 = w2[i2] * weight3; 209 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, local_values.vector[p2], weight2); 210 #elif MAX_PD>1 211 const double weight2 = 1.0; 212 #endif 213 #if MAX_PD>1 214 while (i1 < n1) { 215 local_values.vector[p1] = v1[i1]; 216 double weight1 = w1[i1] * weight2; 217 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, local_values.vector[p1], weight1); 218 #elif MAX_PD>0 219 const double weight1 = 1.0; 220 #endif 221 #if MAX_PD>0 222 if (slow_theta) { // Theta is not in inner loop 223 spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[theta_par])), 1.e-6); 224 } 225 while(i0 < n0) { 226 local_values.vector[p0] = v0[i0]; 227 double weight0 = w0[i0] * weight1; 228 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, local_values.vector[p0], weight0); 229 if (fast_theta) { // Theta is in inner loop 230 spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[p0])), 1.e-6); 231 } 232 #else 233 const double weight0 = 1.0; 234 #endif 235 236 //printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n"); 237 //printf("sphcor: %g\n", spherical_correction); 238 239 #ifdef INVALID 240 if (!INVALID(local_values.table)) 241 #endif 242 { 243 // Accumulate I(q) 244 // Note: weight==0 must always be excluded 245 if (weight0 > cutoff) { 246 // spherical correction is set at a minimum of 1e-6, otherwise there 247 // would be problems looking at models with theta=90. 248 const double weight = weight0 * spherical_correction; 249 pd_norm += weight * CALL_VOLUME(local_values.table); 250 251 #ifdef USE_OPENMP 252 #pragma omp parallel for 253 #endif 254 for (int q_index=0; q_index<nq; q_index++) { 255 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 256 const double qx = q[2*q_index]; 257 const double qy = q[2*q_index+1]; 548 PD_OPEN(0,1) 549 #endif 550 551 552 //if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");} 553 554 // ====== loop body ======= 555 #ifdef INVALID 556 if (!INVALID(local_values.table)) 557 #endif 558 { 559 // Accumulate I(q) 560 // Note: weight==0 must always be excluded 561 if (weight0 > cutoff) { 562 pd_norm += weight0 * CALL_VOLUME(local_values.table); 563 BUILD_ROTATION(); 564 565 #ifndef USE_OPENCL 566 // DLL needs to explicitly loop over the q values. 567 #ifdef USE_OPENMP 568 #pragma omp parallel for 569 #endif 570 for (q_index=0; q_index<nq; q_index++) 571 #endif // !USE_OPENCL 572 { 573 574 FETCH_Q(); 575 APPLY_ROTATION(); 576 577 // ======= COMPUTE SCATTERING ========== 578 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 579 // Compute the scattering from the magnetic cross sections. 580 double scattering = 0.0; 258 581 const double qsq = qx*qx + qy*qy; 259 260 // Constant across orientation, polydispersity for given qx, qy261 double scattering = 0.0;262 // TODO: what is the magnetic scattering at q=0263 582 if (qsq > 1.e-16) { 264 double p[4]; // dd, du, ud, uu 265 p[0] = (qy*cos_mspin + qx*sin_mspin)/qsq; 266 p[3] = -p[0]; 267 p[1] = p[2] = (qy*sin_mspin - qx*cos_mspin)/qsq; 268 269 for (int index=0; index<4; index++) { 270 const double xs = spins[index]; 271 if (xs > 1.e-8) { 272 const int spin_flip = (index==1) || (index==2); 273 const double pk = p[index]; 274 for (int axis=0; axis<=spin_flip; axis++) { 275 #define M1 NUM_PARS+5 276 #define M2 NUM_PARS+8 277 #define M3 NUM_PARS+13 278 #define SLD(_M_offset, _sld_offset) \ 279 local_values.vector[_sld_offset] = xs * (axis \ 280 ? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \ 281 : mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \ 282 (spin_flip ? 0.0 : values[_sld_offset+2]))) 283 #if NUM_MAGNETIC==1 284 SLD(M1, MAGNETIC_PAR1); 285 #elif NUM_MAGNETIC==2 286 SLD(M1, MAGNETIC_PAR1); 287 SLD(M2, MAGNETIC_PAR2); 288 #elif NUM_MAGNETIC==3 289 SLD(M1, MAGNETIC_PAR1); 290 SLD(M2, MAGNETIC_PAR2); 291 SLD(M3, MAGNETIC_PAR3); 292 #else 293 for (int sk=0; sk<NUM_MAGNETIC; sk++) { 294 SLD(M1+3*sk, slds[sk]); 295 } 296 #endif 297 scattering += CALL_IQ(q, q_index, local_values.table); 583 // TODO: what is the magnetic scattering at q=0 584 const double px = (qy*cos_mspin + qx*sin_mspin)/qsq; 585 const double py = (qy*sin_mspin - qx*cos_mspin)/qsq; 586 587 // loop over uu, ud real, du real, dd, ud imag, du imag 588 for (int xs=0; xs<6; xs++) { 589 const double xs_weight = spins[xs]; 590 if (xs_weight > 1.e-8) { 591 // Since the cross section weight is significant, set the slds 592 // to the effective slds for this cross section, call the 593 // kernel, and add according to weight. 594 for (int sk=0; sk<NUM_MAGNETIC; sk++) { 595 const int32_t mag_index = NUM_PARS+5 + 3*sk; 596 const int32_t sld_index = slds[sk]; 597 const double mx = values[mag_index]; 598 const double my = values[mag_index+1]; 599 const double mz = values[mag_index+2]; 600 local_values.vector[sld_index] = 601 mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz); 298 602 } 603 scattering += xs_weight * CALL_KERNEL(); 299 604 } 300 605 } 301 606 } 302 #else // !MAGNETIC 303 const double scattering = CALL_IQ(q, q_index, local_values.table); 304 #endif // !MAGNETIC 305 //printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0); 306 result[q_index] += weight * scattering; 307 } 607 #else // !MAGNETIC 608 const double scattering = CALL_KERNEL(); 609 #endif // !MAGNETIC 610 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight, weight0); 611 612 #ifdef USE_OPENCL 613 this_result += weight0 * scattering; 614 #else // !USE_OPENCL 615 result[q_index] += weight0 * scattering; 616 #endif // !USE_OPENCL 308 617 } 309 618 } 310 ++step; 619 } 620 621 // close nested loops 622 ++step; 311 623 #if MAX_PD>0 312 if (step >= pd_stop) break; 313 ++i0; 314 } 315 i0 = 0; 624 PD_CLOSE(0) 316 625 #endif 317 626 #if MAX_PD>1 318 if (step >= pd_stop) break; 319 ++i1; 320 } 321 i1 = 0; 627 PD_CLOSE(1) 322 628 #endif 323 629 #if MAX_PD>2 324 if (step >= pd_stop) break; 325 ++i2; 326 } 327 i2 = 0; 630 PD_CLOSE(2) 328 631 #endif 329 632 #if MAX_PD>3 330 if (step >= pd_stop) break; 331 ++i3; 332 } 333 i3 = 0; 633 PD_CLOSE(3) 334 634 #endif 335 635 #if MAX_PD>4 336 if (step >= pd_stop) break; 337 ++i4; 338 } 339 i4 = 0; 340 #endif 341 636 PD_CLOSE(4) 637 #endif 638 639 // Remember the current result and the updated norm. 640 #ifdef USE_OPENCL 641 result[q_index] = this_result; 642 if (q_index == 0) result[nq] = pd_norm; 643 //if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 644 #else // !USE_OPENCL 645 result[nq] = pd_norm; 342 646 //printf("res: %g/%g\n", result[0], pd_norm); 343 // Remember the updated norm. 344 result[nq] = pd_norm; 345 } 647 #endif // !USE_OPENCL 648 649 // clear the macros in preparation for the next kernel 650 #undef PD_INIT 651 #undef PD_OPEN 652 #undef PD_CLOSE 653 #undef FETCH_Q 654 #undef BUILD_ROTATION 655 #undef APPLY_ROTATION 656 #undef CALL_KERNEL 657 }
Note: See TracChangeset
for help on using the changeset viewer.