Changes in sasmodels/kernel_iq.c [bde38b5:2c108a3] in sasmodels


Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernel_iq.c

    rbde38b5 r2c108a3  
    1  
    21/* 
    32    ########################################################## 
     
    1211*/ 
    1312 
     13// NOTE: the following macros are defined in generate.py: 
     14// 
     15//  MAX_PD : the maximum number of dispersity loops allowed 
     16//  NUM_PARS : the number of parameters in the parameter table 
     17//  NUM_VALUES : the number of values to skip at the start of the 
     18//      values array before you get to the dispersity values. 
     19//  PARAMETER_TABLE : list of parameter declarations used to create the 
     20//      ParameterTable type. 
     21//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is 
     22//      included three times, once for each kernel type. 
     23//  MAGNETIC : defined when the magnetic kernel is being instantiated 
     24//  NUM_MAGNETIC : the number of magnetic parameters 
     25//  MAGNETIC_PARS : a comma-separated list of indices to the sld 
     26//      parameters in the parameter table. 
     27//  CALL_VOLUME(table) : call the form volume function 
     28//  CALL_IQ(q, table) : call the Iq function for 1D calcs. 
     29//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data. 
     30//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes 
     31//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes 
     32//  INVALID(table) : test if the current point is feesible to calculate.  This 
     33//      will be defined in the kernel definition file. 
     34 
    1435#ifndef _PAR_BLOCK_ // protected block so we can include this code twice. 
    1536#define _PAR_BLOCK_ 
     
    1738typedef struct { 
    1839#if MAX_PD > 0 
    19     int32_t pd_par[MAX_PD];     // id of the nth polydispersity variable 
    20     int32_t pd_length[MAX_PD];  // length of the nth polydispersity weight vector 
     40    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable 
     41    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector 
    2142    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector 
    2243    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level 
     
    2546    int32_t num_weights;        // total length of the weights vector 
    2647    int32_t num_active;         // number of non-trivial pd loops 
    27     int32_t theta_par;          // id of spherical correction variable 
     48    int32_t theta_par;          // id of first orientation variable 
    2849} ProblemDetails; 
    2950 
     
    3859#endif // _PAR_BLOCK_ 
    3960 
    40  
    41 #if defined(MAGNETIC) && NUM_MAGNETIC>0 
     61#if defined(MAGNETIC) && NUM_MAGNETIC > 0 
     62// ===== Helper functions for magnetism ===== 
    4263 
    4364// Return value restricted between low and high 
     
    5172//     uu * (sld - m_sigma_x); 
    5273//     dd * (sld + m_sigma_x); 
    53 //     ud * (m_sigma_y + 1j*m_sigma_z); 
    54 //     du * (m_sigma_y - 1j*m_sigma_z); 
    55 static void set_spins(double in_spin, double out_spin, double spins[4]) 
     74//     ud * (m_sigma_y - 1j*m_sigma_z); 
     75//     du * (m_sigma_y + 1j*m_sigma_z); 
     76// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag 
     77static void set_spin_weights(double in_spin, double out_spin, double spins[4]) 
    5678{ 
    5779  in_spin = clip(in_spin, 0.0, 1.0); 
    5880  out_spin = clip(out_spin, 0.0, 1.0); 
    5981  spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd 
    60   spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin));       // du 
    61   spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin)));       // ud 
     82  spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin));       // du real 
     83  spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin)));       // ud real 
    6284  spins[3] = sqrt(sqrt(in_spin * out_spin));             // uu 
    63 } 
    64  
    65 static double mag_sld(double qx, double qy, double p, 
    66                        double mx, double my, double sld) 
    67 { 
     85  spins[4] = spins[1]; // du imag 
     86  spins[5] = spins[2]; // ud imag 
     87} 
     88 
     89// Compute the magnetic sld 
     90static double mag_sld( 
     91  const int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag 
     92  const double qx, const double qy, 
     93  const double px, const double py, 
     94  const double sld, 
     95  const double mx, const double my, const double mz 
     96) 
     97{ 
     98  if (xs < 4) { 
    6899    const double perp = qy*mx - qx*my; 
    69     return sld + perp*p; 
    70 } 
    71  
    72 #endif // MAGNETIC 
     100    switch (xs) { 
     101      case 0: // uu => sld - D M_perpx 
     102          return sld - px*perp; 
     103      case 1: // ud real => -D M_perpy 
     104          return py*perp; 
     105      case 2: // du real => -D M_perpy 
     106          return py*perp; 
     107      case 3: // dd real => sld + D M_perpx 
     108          return sld + px*perp; 
     109    } 
     110  } else { 
     111    if (xs== 4) { 
     112      return -mz;  // ud imag => -D M_perpz 
     113    } else { // index == 5 
     114      return mz;   // du imag => D M_perpz 
     115    } 
     116  } 
     117} 
     118 
     119 
     120#endif 
     121 
     122// ===== Helper functions for orientation and jitter ===== 
     123 
     124// To change the definition of the angles, run explore/angles.py, which 
     125// uses sympy to generate the equations. 
     126 
     127#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC) 
     128#define _QAC_SECTION 
     129 
     130typedef struct { 
     131    double R31, R32; 
     132} QACRotation; 
     133 
     134// Fill in the rotation matrix R from the view angles (theta, phi) and the 
     135// jitter angles (dtheta, dphi).  This matrix can be applied to all of the 
     136// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]' 
     137static void 
     138qac_rotation( 
     139    QACRotation *rotation, 
     140    double theta, double phi, 
     141    double dtheta, double dphi) 
     142{ 
     143    double sin_theta, cos_theta; 
     144    double sin_phi, cos_phi; 
     145 
     146    // reverse view matrix 
     147    SINCOS(theta*M_PI_180, sin_theta, cos_theta); 
     148    SINCOS(phi*M_PI_180, sin_phi, cos_phi); 
     149    const double V11 = cos_phi*cos_theta; 
     150    const double V12 = sin_phi*cos_theta; 
     151    const double V21 = -sin_phi; 
     152    const double V22 = cos_phi; 
     153    const double V31 = sin_theta*cos_phi; 
     154    const double V32 = sin_phi*sin_theta; 
     155 
     156    // reverse jitter matrix 
     157    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta); 
     158    SINCOS(dphi*M_PI_180, sin_phi, cos_phi); 
     159    const double J31 = sin_theta; 
     160    const double J32 = -sin_phi*cos_theta; 
     161    const double J33 = cos_phi*cos_theta; 
     162 
     163    // reverse matrix 
     164    rotation->R31 = J31*V11 + J32*V21 + J33*V31; 
     165    rotation->R32 = J31*V12 + J32*V22 + J33*V32; 
     166} 
     167 
     168// Apply the rotation matrix returned from qac_rotation to the point (qx,qy), 
     169// returning R*[qx,qy]' = [qa,qc]' 
     170static double 
     171qac_apply( 
     172    QACRotation rotation, 
     173    double qx, double qy, 
     174    double *qa_out, double *qc_out) 
     175{ 
     176    const double dqc = rotation.R31*qx + rotation.R32*qy; 
     177    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2 
     178    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy); 
     179 
     180    *qa_out = dqa; 
     181    *qc_out = dqc; 
     182} 
     183#endif // _QAC_SECTION 
     184 
     185#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC) 
     186#define _QABC_SECTION 
     187 
     188typedef struct { 
     189    double R11, R12; 
     190    double R21, R22; 
     191    double R31, R32; 
     192} QABCRotation; 
     193 
     194// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the 
     195// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the 
     196// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]' 
     197static void 
     198qabc_rotation( 
     199    QABCRotation *rotation, 
     200    double theta, double phi, double psi, 
     201    double dtheta, double dphi, double dpsi) 
     202{ 
     203    double sin_theta, cos_theta; 
     204    double sin_phi, cos_phi; 
     205    double sin_psi, cos_psi; 
     206 
     207    // reverse view matrix 
     208    SINCOS(theta*M_PI_180, sin_theta, cos_theta); 
     209    SINCOS(phi*M_PI_180, sin_phi, cos_phi); 
     210    SINCOS(psi*M_PI_180, sin_psi, cos_psi); 
     211    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta; 
     212    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi; 
     213    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta; 
     214    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi; 
     215    const double V31 = sin_theta*cos_phi; 
     216    const double V32 = sin_phi*sin_theta; 
     217 
     218    // reverse jitter matrix 
     219    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta); 
     220    SINCOS(dphi*M_PI_180, sin_phi, cos_phi); 
     221    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi); 
     222    const double J11 = cos_psi*cos_theta; 
     223    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi; 
     224    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi; 
     225    const double J21 = -sin_psi*cos_theta; 
     226    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi; 
     227    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi; 
     228    const double J31 = sin_theta; 
     229    const double J32 = -sin_phi*cos_theta; 
     230    const double J33 = cos_phi*cos_theta; 
     231 
     232    // reverse matrix 
     233    rotation->R11 = J11*V11 + J12*V21 + J13*V31; 
     234    rotation->R12 = J11*V12 + J12*V22 + J13*V32; 
     235    rotation->R21 = J21*V11 + J22*V21 + J23*V31; 
     236    rotation->R22 = J21*V12 + J22*V22 + J23*V32; 
     237    rotation->R31 = J31*V11 + J32*V21 + J33*V31; 
     238    rotation->R32 = J31*V12 + J32*V22 + J33*V32; 
     239} 
     240 
     241// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy), 
     242// returning R*[qx,qy]' = [qa,qb,qc]' 
     243static double 
     244qabc_apply( 
     245    QABCRotation rotation, 
     246    double qx, double qy, 
     247    double *qa_out, double *qb_out, double *qc_out) 
     248{ 
     249    *qa_out = rotation.R11*qx + rotation.R12*qy; 
     250    *qb_out = rotation.R21*qx + rotation.R22*qy; 
     251    *qc_out = rotation.R31*qx + rotation.R32*qy; 
     252} 
     253 
     254#endif // _QABC_SECTION 
     255 
     256 
     257// ==================== KERNEL CODE ======================== 
    73258 
    74259kernel 
    75260void KERNEL_NAME( 
    76261    int32_t nq,                 // number of q values 
    77     const int32_t pd_start,     // where we are in the polydispersity loop 
    78     const int32_t pd_stop,      // where we are stopping in the polydispersity loop 
     262    const int32_t pd_start,     // where we are in the dispersity loop 
     263    const int32_t pd_stop,      // where we are stopping in the dispersity loop 
    79264    global const ProblemDetails *details, 
    80265    global const double *values, 
    81266    global const double *q, // nq q values, with padding to boundary 
    82267    global double *result,  // nq+1 return values, again with padding 
    83     const double cutoff     // cutoff in the polydispersity weight product 
     268    const double cutoff     // cutoff in the dispersity weight product 
    84269    ) 
    85270{ 
     271#ifdef USE_OPENCL 
     272  // who we are and what element we are working with 
     273  const int q_index = get_global_id(0); 
     274  if (q_index >= nq) return; 
     275#else 
     276  // Define q_index here so that debugging statements can be written to work 
     277  // for both OpenCL and DLL using: 
     278  //    if (q_index == 0) {printf(...);} 
     279  int q_index = 0; 
     280#endif 
     281 
    86282  // Storage for the current parameter values.  These will be updated as we 
    87   // walk the polydispersity cube. 
     283  // walk the dispersity mesh. 
    88284  ParameterBlock local_values; 
    89285 
     
    91287  // Location of the sld parameters in the parameter vector. 
    92288  // These parameters are updated with the effective sld due to magnetism. 
    93   #if NUM_MAGNETIC > 3 
    94289  const int32_t slds[] = { MAGNETIC_PARS }; 
    95   #endif 
    96  
    97   // TODO: could precompute these outside of the kernel. 
     290 
    98291  // Interpret polarization cross section. 
    99292  //     up_frac_i = values[NUM_PARS+2]; 
    100293  //     up_frac_f = values[NUM_PARS+3]; 
    101294  //     up_angle = values[NUM_PARS+4]; 
    102   double spins[4]; 
     295  // TODO: could precompute more magnetism parameters before calling the kernel. 
     296  double spins[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill 
    103297  double cos_mspin, sin_mspin; 
    104   set_spins(values[NUM_PARS+2], values[NUM_PARS+3], spins); 
     298  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], spins); 
    105299  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin); 
    106300#endif // MAGNETIC 
     
    114308  for (int i=0; i < NUM_PARS; i++) { 
    115309    local_values.vector[i] = values[2+i]; 
    116 //printf("p%d = %g\n",i, local_values.vector[i]); 
     310//if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]); 
    117311  } 
    118 //printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD); 
    119 //printf("start:%d  stop:%d\n", pd_start, pd_stop); 
    120  
    121   double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    122   if (pd_start == 0) { 
    123     #ifdef USE_OPENMP 
    124     #pragma omp parallel for 
    125     #endif 
    126     for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 
     312//if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD); 
     313//if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop); 
     314 
     315  // If pd_start is zero that means that we are starting a new calculation, 
     316  // and must initialize the result to zero.  Otherwise, we are restarting 
     317  // the calculation from somewhere in the middle of the dispersity mesh, 
     318  // and we update the value rather than reset it. Similarly for the 
     319  // normalization factor, which is stored as the final value in the 
     320  // results vector (one past the number of q values). 
     321  // 
     322  // The code differs slightly between opencl and dll since opencl is only 
     323  // seeing one q value (stored in the variable "this_result") while the dll 
     324  // version must loop over all q. 
     325  #ifdef USE_OPENCL 
     326    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
     327    double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 
     328  #else // !USE_OPENCL 
     329    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
     330    if (pd_start == 0) { 
     331      #ifdef USE_OPENMP 
     332      #pragma omp parallel for 
     333      #endif 
     334      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 
     335    } 
     336//if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 
     337#endif // !USE_OPENCL 
     338 
     339 
     340// ====== macros to set up the parts of the loop ======= 
     341/* 
     342Based on the level of the loop, uses C preprocessor magic to construct 
     343level-specific looping variables, including these from loop level 3: 
     344 
     345  int n3 : length of loop for mesh level 3 
     346  int i3 : current position in the loop for level 3, which is calculated 
     347       from a combination of pd_start, pd_stride[3] and pd_length[3]. 
     348  int p3 : is the index into the parameter table for mesh level 3 
     349  double v3[] : pointer into dispersity array to values for loop 3 
     350  double w3[] : pointer into dispersity array to weights for loop 3 
     351  double weight3 : the product of weights from levels 3 and up, computed 
     352       as weight5*weight4*w3[i3].  Note that we need an outermost 
     353       value weight5 set to 1.0 for this to work properly. 
     354 
     355After expansion, the loop struction will look like the following: 
     356 
     357  // --- PD_INIT(4) --- 
     358  const int n4 = pd_length[4]; 
     359  const int p4 = pd_par[4]; 
     360  global const double *v4 = pd_value + pd_offset[4]; 
     361  global const double *w4 = pd_weight + pd_offset[4]; 
     362  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start 
     363 
     364  // --- PD_INIT(3) --- 
     365  const int n3 = pd_length[3]; 
     366  ... 
     367  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start 
     368 
     369  PD_INIT(2) 
     370  PD_INIT(1) 
     371  PD_INIT(0) 
     372 
     373  // --- PD_OUTERMOST_WEIGHT(5) --- 
     374  const double weight5 = 1.0; 
     375 
     376  // --- PD_OPEN(4,5) --- 
     377  while (i4 < n4) { 
     378    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point 
     379    const double weight4 = w4[i4] * weight5; 
     380 
     381    // from PD_OPEN(3,4) 
     382    while (i3 < n3) { 
     383      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point 
     384      const double weight3 = w3[i3] * weight4; 
     385 
     386      PD_OPEN(3,2) 
     387      PD_OPEN(2,1) 
     388      PD_OPEN(0,1) 
     389 
     390      ... main loop body ... 
     391 
     392      ++step;  // increment counter representing position in dispersity mesh 
     393 
     394      PD_CLOSE(0) 
     395      PD_CLOSE(1) 
     396      PD_CLOSE(2) 
     397 
     398      // --- PD_CLOSE(3) --- 
     399      if (step >= pd_stop) break; 
     400      ++i3; 
     401    } 
     402    i3 = 0; // reset loop counter for next round through the loop 
     403 
     404    // --- PD_CLOSE(4) --- 
     405    if (step >= pd_stop) break; 
     406    ++i4; 
    127407  } 
    128 //printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 
    129  
     408  i4 = 0; // reset loop counter even though no more rounds through the loop 
     409 
     410*/ 
     411 
     412// Define looping variables 
     413#define PD_INIT(_LOOP) \ 
     414  const int n##_LOOP = details->pd_length[_LOOP]; \ 
     415  const int p##_LOOP = details->pd_par[_LOOP]; \ 
     416  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 
     417  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 
     418  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 
     419 
     420// Jump into the middle of the dispersity loop 
     421#define PD_OPEN(_LOOP,_OUTER) \ 
     422  while (i##_LOOP < n##_LOOP) { \ 
     423    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \ 
     424    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER; 
     425 
     426// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD). 
     427#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0; 
     428#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n) 
     429 
     430// Close out the loop 
     431#define PD_CLOSE(_LOOP) \ 
     432    if (step >= pd_stop) break; \ 
     433    ++i##_LOOP; \ 
     434  } \ 
     435  i##_LOOP = 0; 
     436 
     437// The main loop body is essentially: 
     438// 
     439//    BUILD_ROTATION      // construct the rotation matrix qxy => qabc 
     440//    for each q 
     441//        FETCH_Q         // set qx,qy from the q input vector 
     442//        APPLY_ROTATION  // convert qx,qy to qa,qb,qc 
     443//        CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...) 
     444// 
     445// Depending on the shape type (radial, axial, triaxial), the variables 
     446// and calling parameters will be slightly different.  These macros 
     447// capture the differences in one spot so the rest of the code is easier 
     448// to read. 
     449#if defined(CALL_IQ) 
     450  // unoriented 1D 
     451  double qk; 
     452  #define FETCH_Q() do { qk = q[q_index]; } while (0) 
     453  #define BUILD_ROTATION() do {} while(0) 
     454  #define APPLY_ROTATION() do {} while(0) 
     455  #define CALL_KERNEL() CALL_IQ(qk, local_values.table) 
     456 
     457#elif defined(CALL_IQ_A) 
     458  // unoriented 2D 
     459  double qx, qy; 
     460  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 
     461  #define BUILD_ROTATION() do {} while(0) 
     462  #define APPLY_ROTATION() do {} while(0) 
     463  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table) 
     464 
     465#elif defined(CALL_IQ_AC) 
     466  // oriented symmetric 2D 
     467  double qx, qy; 
     468  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 
     469  double qa, qc; 
     470  QACRotation rotation; 
     471  // Grab the "view" angles (theta, phi) from the initial parameter table. 
     472  // These are separate from the "jitter" angles (dtheta, dphi) which are 
     473  // stored with the dispersity values and copied to the local parameter 
     474  // table as we go through the mesh. 
     475  const double theta = values[details->theta_par+2]; 
     476  const double phi = values[details->theta_par+3]; 
     477  #define BUILD_ROTATION() qac_rotation(&rotation, \ 
     478    theta, phi, local_values.table.theta, local_values.table.phi) 
     479  #define APPLY_ROTATION() qac_apply(rotation, qx, qy, &qa, &qc) 
     480  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table) 
     481 
     482#elif defined(CALL_IQ_ABC) 
     483  // oriented asymmetric 2D 
     484  double qx, qy; 
     485  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 
     486  double qa, qb, qc; 
     487  QABCRotation rotation; 
     488  // Grab the "view" angles (theta, phi, psi) from the initial parameter table. 
     489  // These are separate from the "jitter" angles (dtheta, dphi, dpsi) which are 
     490  // stored with the dispersity values and copied to the local parameter 
     491  // table as we go through the mesh. 
     492  const double theta = values[details->theta_par+2]; 
     493  const double phi = values[details->theta_par+3]; 
     494  const double psi = values[details->theta_par+4]; 
     495  #define BUILD_ROTATION() qabc_rotation(&rotation, \ 
     496    theta, phi, psi, local_values.table.theta, \ 
     497    local_values.table.phi, local_values.table.psi) 
     498  #define APPLY_ROTATION() qabc_apply(rotation, qx, qy, &qa, &qb, &qc) 
     499  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table) 
     500 
     501#endif 
     502 
     503// ====== construct the loops ======= 
     504 
     505// Pointers to the start of the dispersity and weight vectors, if needed. 
    130506#if MAX_PD>0 
    131507  global const double *pd_value = values + NUM_VALUES; 
     
    133509#endif 
    134510 
    135   // Jump into the middle of the polydispersity loop 
     511// The variable "step" is the current position in the dispersity loop. 
     512// It will be incremented each time a new point in the mesh is accumulated, 
     513// and used to test whether we have reached pd_stop. 
     514int step = pd_start; 
     515 
     516// define looping variables 
    136517#if MAX_PD>4 
    137   int n4=details->pd_length[4]; 
    138   int i4=(pd_start/details->pd_stride[4])%n4; 
    139   const int p4=details->pd_par[4]; 
    140   global const double *v4 = pd_value + details->pd_offset[4]; 
    141   global const double *w4 = pd_weight + details->pd_offset[4]; 
     518  PD_INIT(4) 
    142519#endif 
    143520#if MAX_PD>3 
    144   int n3=details->pd_length[3]; 
    145   int i3=(pd_start/details->pd_stride[3])%n3; 
    146   const int p3=details->pd_par[3]; 
    147   global const double *v3 = pd_value + details->pd_offset[3]; 
    148   global const double *w3 = pd_weight + details->pd_offset[3]; 
    149 //printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES); 
     521  PD_INIT(3) 
    150522#endif 
    151523#if MAX_PD>2 
    152   int n2=details->pd_length[2]; 
    153   int i2=(pd_start/details->pd_stride[2])%n2; 
    154   const int p2=details->pd_par[2]; 
    155   global const double *v2 = pd_value + details->pd_offset[2]; 
    156   global const double *w2 = pd_weight + details->pd_offset[2]; 
     524  PD_INIT(2) 
    157525#endif 
    158526#if MAX_PD>1 
    159   int n1=details->pd_length[1]; 
    160   int i1=(pd_start/details->pd_stride[1])%n1; 
    161   const int p1=details->pd_par[1]; 
    162   global const double *v1 = pd_value + details->pd_offset[1]; 
    163   global const double *w1 = pd_weight + details->pd_offset[1]; 
     527  PD_INIT(1) 
    164528#endif 
    165529#if MAX_PD>0 
    166   int n0=details->pd_length[0]; 
    167   int i0=(pd_start/details->pd_stride[0])%n0; 
    168   const int p0=details->pd_par[0]; 
    169   global const double *v0 = pd_value + details->pd_offset[0]; 
    170   global const double *w0 = pd_weight + details->pd_offset[0]; 
    171 //printf("w0:%p, values:%p, diff:%ld, %d\n",w0,values,(w0-values), NUM_VALUES); 
    172 #endif 
    173  
    174  
     530  PD_INIT(0) 
     531#endif 
     532 
     533// open nested loops 
     534PD_OUTERMOST_WEIGHT(MAX_PD) 
     535#if MAX_PD>4 
     536  PD_OPEN(4,5) 
     537#endif 
     538#if MAX_PD>3 
     539  PD_OPEN(3,4) 
     540#endif 
     541#if MAX_PD>2 
     542  PD_OPEN(2,3) 
     543#endif 
     544#if MAX_PD>1 
     545  PD_OPEN(1,2) 
     546#endif 
    175547#if MAX_PD>0 
    176   const int theta_par = details->theta_par; 
    177   const int fast_theta = (theta_par == p0); 
    178   const int slow_theta = (theta_par >= 0 && !fast_theta); 
    179   double spherical_correction = 1.0; 
    180 #else 
    181   // Note: if not polydisperse the weights cancel and we don't need the 
    182   // spherical correction. 
    183   const double spherical_correction = 1.0; 
    184 #endif 
    185  
    186   int step = pd_start; 
    187  
    188 #if MAX_PD>4 
    189   const double weight5 = 1.0; 
    190   while (i4 < n4) { 
    191     local_values.vector[p4] = v4[i4]; 
    192     double weight4 = w4[i4] * weight5; 
    193 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, local_values.vector[p4], weight4); 
    194 #elif MAX_PD>3 
    195     const double weight4 = 1.0; 
    196 #endif 
    197 #if MAX_PD>3 
    198   while (i3 < n3) { 
    199     local_values.vector[p3] = v3[i3]; 
    200     double weight3 = w3[i3] * weight4; 
    201 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, local_values.vector[p3], weight3); 
    202 #elif MAX_PD>2 
    203     const double weight3 = 1.0; 
    204 #endif 
    205 #if MAX_PD>2 
    206   while (i2 < n2) { 
    207     local_values.vector[p2] = v2[i2]; 
    208     double weight2 = w2[i2] * weight3; 
    209 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, local_values.vector[p2], weight2); 
    210 #elif MAX_PD>1 
    211     const double weight2 = 1.0; 
    212 #endif 
    213 #if MAX_PD>1 
    214   while (i1 < n1) { 
    215     local_values.vector[p1] = v1[i1]; 
    216     double weight1 = w1[i1] * weight2; 
    217 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, local_values.vector[p1], weight1); 
    218 #elif MAX_PD>0 
    219     const double weight1 = 1.0; 
    220 #endif 
    221 #if MAX_PD>0 
    222   if (slow_theta) { // Theta is not in inner loop 
    223     spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[theta_par])), 1.e-6); 
    224   } 
    225   while(i0 < n0) { 
    226     local_values.vector[p0] = v0[i0]; 
    227     double weight0 = w0[i0] * weight1; 
    228 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, local_values.vector[p0], weight0); 
    229     if (fast_theta) { // Theta is in inner loop 
    230       spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[p0])), 1.e-6); 
    231     } 
    232 #else 
    233     const double weight0 = 1.0; 
    234 #endif 
    235  
    236 //printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n"); 
    237 //printf("sphcor: %g\n", spherical_correction); 
    238  
    239     #ifdef INVALID 
    240     if (!INVALID(local_values.table)) 
    241     #endif 
    242     { 
    243       // Accumulate I(q) 
    244       // Note: weight==0 must always be excluded 
    245       if (weight0 > cutoff) { 
    246         // spherical correction is set at a minimum of 1e-6, otherwise there 
    247         // would be problems looking at models with theta=90. 
    248         const double weight = weight0 * spherical_correction; 
    249         pd_norm += weight * CALL_VOLUME(local_values.table); 
    250  
    251         #ifdef USE_OPENMP 
    252         #pragma omp parallel for 
    253         #endif 
    254         for (int q_index=0; q_index<nq; q_index++) { 
    255 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 
    256           const double qx = q[2*q_index]; 
    257           const double qy = q[2*q_index+1]; 
     548  PD_OPEN(0,1) 
     549#endif 
     550 
     551 
     552//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");} 
     553 
     554  // ====== loop body ======= 
     555  #ifdef INVALID 
     556  if (!INVALID(local_values.table)) 
     557  #endif 
     558  { 
     559    // Accumulate I(q) 
     560    // Note: weight==0 must always be excluded 
     561    if (weight0 > cutoff) { 
     562      pd_norm += weight0 * CALL_VOLUME(local_values.table); 
     563      BUILD_ROTATION(); 
     564 
     565#ifndef USE_OPENCL 
     566      // DLL needs to explicitly loop over the q values. 
     567      #ifdef USE_OPENMP 
     568      #pragma omp parallel for 
     569      #endif 
     570      for (q_index=0; q_index<nq; q_index++) 
     571#endif // !USE_OPENCL 
     572      { 
     573 
     574        FETCH_Q(); 
     575        APPLY_ROTATION(); 
     576 
     577        // ======= COMPUTE SCATTERING ========== 
     578        #if defined(MAGNETIC) && NUM_MAGNETIC > 0 
     579          // Compute the scattering from the magnetic cross sections. 
     580          double scattering = 0.0; 
    258581          const double qsq = qx*qx + qy*qy; 
    259  
    260           // Constant across orientation, polydispersity for given qx, qy 
    261           double scattering = 0.0; 
    262           // TODO: what is the magnetic scattering at q=0 
    263582          if (qsq > 1.e-16) { 
    264             double p[4];  // dd, du, ud, uu 
    265             p[0] = (qy*cos_mspin + qx*sin_mspin)/qsq; 
    266             p[3] = -p[0]; 
    267             p[1] = p[2] = (qy*sin_mspin - qx*cos_mspin)/qsq; 
    268  
    269             for (int index=0; index<4; index++) { 
    270               const double xs = spins[index]; 
    271               if (xs > 1.e-8) { 
    272                 const int spin_flip = (index==1) || (index==2); 
    273                 const double pk = p[index]; 
    274                 for (int axis=0; axis<=spin_flip; axis++) { 
    275                   #define M1 NUM_PARS+5 
    276                   #define M2 NUM_PARS+8 
    277                   #define M3 NUM_PARS+13 
    278                   #define SLD(_M_offset, _sld_offset) \ 
    279                       local_values.vector[_sld_offset] = xs * (axis \ 
    280                       ? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \ 
    281                       : mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \ 
    282                                 (spin_flip ? 0.0 : values[_sld_offset+2]))) 
    283                   #if NUM_MAGNETIC==1 
    284                       SLD(M1, MAGNETIC_PAR1); 
    285                   #elif NUM_MAGNETIC==2 
    286                       SLD(M1, MAGNETIC_PAR1); 
    287                       SLD(M2, MAGNETIC_PAR2); 
    288                   #elif NUM_MAGNETIC==3 
    289                       SLD(M1, MAGNETIC_PAR1); 
    290                       SLD(M2, MAGNETIC_PAR2); 
    291                       SLD(M3, MAGNETIC_PAR3); 
    292                   #else 
    293                   for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
    294                       SLD(M1+3*sk, slds[sk]); 
    295                   } 
    296                   #endif 
    297                   scattering += CALL_IQ(q, q_index, local_values.table); 
     583            // TODO: what is the magnetic scattering at q=0 
     584            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq; 
     585            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq; 
     586 
     587            // loop over uu, ud real, du real, dd, ud imag, du imag 
     588            for (int xs=0; xs<6; xs++) { 
     589              const double xs_weight = spins[xs]; 
     590              if (xs_weight > 1.e-8) { 
     591                // Since the cross section weight is significant, set the slds 
     592                // to the effective slds for this cross section, call the 
     593                // kernel, and add according to weight. 
     594                for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
     595                  const int32_t mag_index = NUM_PARS+5 + 3*sk; 
     596                  const int32_t sld_index = slds[sk]; 
     597                  const double mx = values[mag_index]; 
     598                  const double my = values[mag_index+1]; 
     599                  const double mz = values[mag_index+2]; 
     600                  local_values.vector[sld_index] = 
     601                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz); 
    298602                } 
     603                scattering += xs_weight * CALL_KERNEL(); 
    299604              } 
    300605            } 
    301606          } 
    302 #else  // !MAGNETIC 
    303           const double scattering = CALL_IQ(q, q_index, local_values.table); 
    304 #endif // !MAGNETIC 
    305 //printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0); 
    306           result[q_index] += weight * scattering; 
    307         } 
     607        #else  // !MAGNETIC 
     608          const double scattering = CALL_KERNEL(); 
     609        #endif // !MAGNETIC 
     610//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight, weight0); 
     611 
     612        #ifdef USE_OPENCL 
     613          this_result += weight0 * scattering; 
     614        #else // !USE_OPENCL 
     615          result[q_index] += weight0 * scattering; 
     616        #endif // !USE_OPENCL 
    308617      } 
    309618    } 
    310     ++step; 
     619  } 
     620 
     621// close nested loops 
     622++step; 
    311623#if MAX_PD>0 
    312     if (step >= pd_stop) break; 
    313     ++i0; 
    314   } 
    315   i0 = 0; 
     624  PD_CLOSE(0) 
    316625#endif 
    317626#if MAX_PD>1 
    318     if (step >= pd_stop) break; 
    319     ++i1; 
    320   } 
    321   i1 = 0; 
     627  PD_CLOSE(1) 
    322628#endif 
    323629#if MAX_PD>2 
    324     if (step >= pd_stop) break; 
    325     ++i2; 
    326   } 
    327   i2 = 0; 
     630  PD_CLOSE(2) 
    328631#endif 
    329632#if MAX_PD>3 
    330     if (step >= pd_stop) break; 
    331     ++i3; 
    332   } 
    333   i3 = 0; 
     633  PD_CLOSE(3) 
    334634#endif 
    335635#if MAX_PD>4 
    336     if (step >= pd_stop) break; 
    337     ++i4; 
    338   } 
    339   i4 = 0; 
    340 #endif 
    341  
     636  PD_CLOSE(4) 
     637#endif 
     638 
     639// Remember the current result and the updated norm. 
     640#ifdef USE_OPENCL 
     641  result[q_index] = this_result; 
     642  if (q_index == 0) result[nq] = pd_norm; 
     643//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 
     644#else // !USE_OPENCL 
     645  result[nq] = pd_norm; 
    342646//printf("res: %g/%g\n", result[0], pd_norm); 
    343   // Remember the updated norm. 
    344   result[nq] = pd_norm; 
    345 } 
     647#endif // !USE_OPENCL 
     648 
     649// clear the macros in preparation for the next kernel 
     650#undef PD_INIT 
     651#undef PD_OPEN 
     652#undef PD_CLOSE 
     653#undef FETCH_Q 
     654#undef BUILD_ROTATION 
     655#undef APPLY_ROTATION 
     656#undef CALL_KERNEL 
     657} 
Note: See TracChangeset for help on using the changeset viewer.