Changeset 9ee2756 in sasmodels for sasmodels/kernel_iq.c


Ignore:
Timestamp:
Oct 19, 2017 12:31:30 PM (7 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
2c108a3
Parents:
94f4543
Message:

simplify kernel wrapper code and combine OpenCL with DLL in one file

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernel_iq.c

    r94f4543 r9ee2756  
    1  
    21/* 
    32    ########################################################## 
     
    1110    ########################################################## 
    1211*/ 
     12 
     13// NOTE: the following macros are defined in generate.py: 
     14// 
     15//  MAX_PD : the maximum number of dispersity loops allowed 
     16//  NUM_PARS : the number of parameters in the parameter table 
     17//  NUM_VALUES : the number of values to skip at the start of the 
     18//      values array before you get to the dispersity values. 
     19//  NUM_MAGNETIC : the number of magnetic parameters 
     20//  PARAMETER_TABLE : list of parameter declarations used to create the 
     21//      ParameterTable type. 
     22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is 
     23//      included three times, once for each kernel type. 
     24//  MAGNETIC_PARS : a comma-separated list of indices to the sld 
     25//      parameters in the parameter table. 
     26//  MAGNETIC_PAR1, ... : the first few indices of slds in the parameter table. 
     27//  MAGNETIC : defined when the magnetic kernel is being instantiated 
     28//  CALL_VOLUME(table) : call the form volume function 
     29//  CALL_IQ(q, table) : call the Iq function for 1D calcs. 
     30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data. 
     31//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes 
     32//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes 
     33//  INVALID(table) : test if the current point is feesible to calculate.  This 
     34//      will be defined in the kernel definition file. 
    1335 
    1436#ifndef _PAR_BLOCK_ // protected block so we can include this code twice. 
     
    3860#endif // _PAR_BLOCK_ 
    3961 
    40  
    41 #if defined(MAGNETIC) && NUM_MAGNETIC>0 
     62#if defined(MAGNETIC) && NUM_MAGNETIC > 0 
     63// ===== Helper functions for magnetism ===== 
    4264 
    4365// Return value restricted between low and high 
     
    6385} 
    6486 
     87// Compute the magnetic sld 
    6588static double mag_sld(double qx, double qy, double p, 
    6689                       double mx, double my, double sld) 
     
    6992    return sld + perp*p; 
    7093} 
    71 //#endif // MAGNETIC 
    72  
    73 // TODO: way too hackish 
    74 // For the 1D kernel, CALL_IQ_[A,AC,ABC] and MAGNETIC are not defined 
    75 // so view_direct *IS NOT* included 
    76 // For the 2D kernel, CALL_IQ_[A,AC,ABC] is defined but MAGNETIC is not 
    77 // so view_direct *IS* included 
    78 // For the magnetic kernel, CALL_IQ_[A,AC,ABC] is defined, but so is MAGNETIC 
    79 // so view_direct *IS NOT* included 
    80 #else // !MAGNETIC 
    81  
    82 // ===== Implement jitter in orientation ===== 
     94 
     95#endif 
     96 
     97// ===== Helper functions for orientation and jitter ===== 
     98 
    8399// To change the definition of the angles, run explore/angles.py, which 
    84100// uses sympy to generate the equations. 
    85101 
    86 #if defined(CALL_IQ_AC) // oriented symmetric 
    87 static double 
    88 view_direct(double qx, double qy, 
    89              double theta, double phi, 
    90              ParameterTable table) 
     102#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC) 
     103#define _QAC_SECTION 
     104 
     105typedef struct { 
     106    double R31, R32; 
     107} QACRotation; 
     108 
     109// Fill in the rotation matrix R from the view angles (theta, phi) and the 
     110// jitter angles (dtheta, dphi).  This matrix can be applied to all of the 
     111// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]' 
     112static void 
     113qac_rotation( 
     114    QACRotation *rotation, 
     115    double theta, double phi, 
     116    double dtheta, double dphi) 
    91117{ 
    92118    double sin_theta, cos_theta; 
    93119    double sin_phi, cos_phi; 
    94120 
    95     // reverse view 
     121    // reverse view matrix 
    96122    SINCOS(theta*M_PI_180, sin_theta, cos_theta); 
    97123    SINCOS(phi*M_PI_180, sin_phi, cos_phi); 
    98     const double qa = qx*cos_phi*cos_theta + qy*sin_phi*cos_theta; 
    99     const double qb = -qx*sin_phi + qy*cos_phi; 
    100     const double qc = qx*sin_theta*cos_phi + qy*sin_phi*sin_theta; 
    101  
    102     // reverse jitter after view 
    103     SINCOS(table.theta*M_PI_180, sin_theta, cos_theta); 
    104     SINCOS(table.phi*M_PI_180, sin_phi, cos_phi); 
    105     const double dqc = qa*sin_theta - qb*sin_phi*cos_theta + qc*cos_phi*cos_theta; 
    106  
     124    const double V11 = cos_phi*cos_theta; 
     125    const double V12 = sin_phi*cos_theta; 
     126    const double V21 = -sin_phi; 
     127    const double V22 = cos_phi; 
     128    const double V31 = sin_theta*cos_phi; 
     129    const double V32 = sin_phi*sin_theta; 
     130 
     131    // reverse jitter matrix 
     132    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta); 
     133    SINCOS(dphi*M_PI_180, sin_phi, cos_phi); 
     134    const double J31 = sin_theta; 
     135    const double J32 = -sin_phi*cos_theta; 
     136    const double J33 = cos_phi*cos_theta; 
     137 
     138    // reverse matrix 
     139    rotation->R31 = J31*V11 + J32*V21 + J33*V31; 
     140    rotation->R32 = J31*V12 + J32*V22 + J33*V32; 
     141} 
     142 
     143// Apply the rotation matrix returned from qac_rotation to the point (qx,qy), 
     144// returning R*[qx,qy]' = [qa,qc]' 
     145static double 
     146qac_apply( 
     147    QACRotation rotation, 
     148    double qx, double qy, 
     149    double *qa_out, double *qc_out) 
     150{ 
     151    const double dqc = rotation.R31*qx + rotation.R32*qy; 
    107152    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2 
    108153    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy); 
    109154 
    110     return CALL_IQ_AC(dqa, dqc, table); 
    111 } 
    112  
    113 #elif defined(CALL_IQ_ABC) // oriented asymmetric 
    114  
    115 static double 
    116 view_direct(double qx, double qy, 
    117              double theta, double phi, double psi, 
    118              ParameterTable table) 
    119 { 
    120     double sin_theta, cos_theta; 
    121     double sin_phi, cos_phi; 
    122     double sin_psi, cos_psi; 
    123  
    124     // reverse view 
    125     SINCOS(theta*M_PI_180, sin_theta, cos_theta); 
    126     SINCOS(phi*M_PI_180, sin_phi, cos_phi); 
    127     SINCOS(psi*M_PI_180, sin_psi, cos_psi); 
    128     const double qa = qx*(-sin_phi*sin_psi + cos_phi*cos_psi*cos_theta) + qy*(sin_phi*cos_psi*cos_theta + sin_psi*cos_phi); 
    129     const double qb = qx*(-sin_phi*cos_psi - sin_psi*cos_phi*cos_theta) + qy*(-sin_phi*sin_psi*cos_theta + cos_phi*cos_psi); 
    130     const double qc = qx*sin_theta*cos_phi + qy*sin_phi*sin_theta; 
    131  
    132     // reverse jitter after view 
    133     SINCOS(table.theta*M_PI_180, sin_theta, cos_theta); 
    134     SINCOS(table.phi*M_PI_180, sin_phi, cos_phi); 
    135     SINCOS(table.psi*M_PI_180, sin_psi, cos_psi); 
    136     const double dqa = qa*cos_psi*cos_theta + qb*(sin_phi*sin_theta*cos_psi + sin_psi*cos_phi) + qc*(sin_phi*sin_psi - sin_theta*cos_phi*cos_psi); 
    137     const double dqb = -qa*sin_psi*cos_theta + qb*(-sin_phi*sin_psi*sin_theta + cos_phi*cos_psi) + qc*(sin_phi*cos_psi + sin_psi*sin_theta*cos_phi); 
    138     const double dqc = qa*sin_theta - qb*sin_phi*cos_theta + qc*cos_phi*cos_theta; 
    139  
    140     return CALL_IQ_ABC(dqa, dqb, dqc, table); 
    141 } 
    142 /* TODO: use precalculated jitter for faster 2D calcs on DLL. 
     155    *qa_out = dqa; 
     156    *qc_out = dqc; 
     157} 
     158#endif // _QAC_SECTION 
     159 
     160#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC) 
     161#define _QABC_SECTION 
     162 
     163typedef struct { 
     164    double R11, R12; 
     165    double R21, R22; 
     166    double R31, R32; 
     167} QABCRotation; 
     168 
     169// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the 
     170// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the 
     171// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]' 
    143172static void 
    144 view_precalc( 
     173qabc_rotation( 
     174    QABCRotation *rotation, 
    145175    double theta, double phi, double psi, 
    146     ParameterTable table, 
    147     double *R11, double *R12, double *R21, 
    148     double *R22, double *R31, double *R32) 
     176    double dtheta, double dphi, double dpsi) 
    149177{ 
    150178    double sin_theta, cos_theta; 
     
    156184    SINCOS(phi*M_PI_180, sin_phi, cos_phi); 
    157185    SINCOS(psi*M_PI_180, sin_psi, cos_psi); 
    158     const double V11 = sin_phi*sin_psi + cos_phi*cos_psi*cos_theta; 
    159     const double V12 = sin_phi*cos_psi*cos_theta - sin_psi*cos_phi; 
    160     const double V21 = -sin_phi*cos_psi + sin_psi*cos_phi*cos_theta; 
    161     const double V22 = sin_phi*sin_psi*cos_theta + cos_phi*cos_psi; 
     186    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta; 
     187    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi; 
     188    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta; 
     189    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi; 
    162190    const double V31 = sin_theta*cos_phi; 
    163191    const double V32 = sin_phi*sin_theta; 
    164192 
    165193    // reverse jitter matrix 
    166     SINCOS(table.theta*M_PI_180, sin_theta, cos_theta); 
    167     SINCOS(table.phi*M_PI_180, sin_phi, cos_phi); 
    168     SINCOS(table.psi*M_PI_180, sin_psi, cos_psi); 
     194    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta); 
     195    SINCOS(dphi*M_PI_180, sin_phi, cos_phi); 
     196    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi); 
    169197    const double J11 = cos_psi*cos_theta; 
    170     const double J12 = sin_phi*sin_theta*cos_psi - sin_psi*cos_phi; 
    171     const double J13 = -sin_phi*sin_psi - sin_theta*cos_phi*cos_psi; 
    172     const double J21 = sin_psi*cos_theta; 
    173     const double J22 = sin_phi*sin_psi*sin_theta + cos_phi*cos_psi; 
    174     const double J23 = sin_phi*cos_psi - sin_psi*sin_theta*cos_phi; 
     198    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi; 
     199    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi; 
     200    const double J21 = -sin_psi*cos_theta; 
     201    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi; 
     202    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi; 
    175203    const double J31 = sin_theta; 
    176204    const double J32 = -sin_phi*cos_theta; 
     
    178206 
    179207    // reverse matrix 
    180     *R11 = J11*V11 + J12*V21 + J13*V31; 
    181     *R12 = J11*V12 + J12*V22 + J13*V32; 
    182     *R21 = J21*V11 + J22*V21 + J23*V31; 
    183     *R22 = J21*V12 + J22*V22 + J23*V32; 
    184     *R31 = J31*V11 + J32*V21 + J33*V31; 
    185     *R32 = J31*V12 + J32*V22 + J33*V32; 
    186 } 
    187  
     208    rotation->R11 = J11*V11 + J12*V21 + J13*V31; 
     209    rotation->R12 = J11*V12 + J12*V22 + J13*V32; 
     210    rotation->R21 = J21*V11 + J22*V21 + J23*V31; 
     211    rotation->R22 = J21*V12 + J22*V22 + J23*V32; 
     212    rotation->R31 = J31*V11 + J32*V21 + J33*V31; 
     213    rotation->R32 = J31*V12 + J32*V22 + J33*V32; 
     214} 
     215 
     216// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy), 
     217// returning R*[qx,qy]' = [qa,qb,qc]' 
    188218static double 
    189 view_apply(double qx, double qy, 
    190     double R11, double R12, double R21, double R22, double R31, double R32, 
    191     ParameterTable table) 
    192 { 
    193     const double dqa = R11*qx + R12*qy; 
    194     const double dqb = R21*qx + R22*qy; 
    195     const double dqc = R31*qx + R32*qy; 
    196  
    197     CALL_IQ_ABC(dqa, dqb, dqc, table); 
    198 } 
    199 */ 
    200 #endif 
    201  
    202 #endif // !MAGNETIC 
     219qabc_apply( 
     220    QABCRotation rotation, 
     221    double qx, double qy, 
     222    double *qa_out, double *qb_out, double *qc_out) 
     223{ 
     224    *qa_out = rotation.R11*qx + rotation.R12*qy; 
     225    *qb_out = rotation.R21*qx + rotation.R22*qy; 
     226    *qc_out = rotation.R31*qx + rotation.R32*qy; 
     227} 
     228 
     229#endif // _QABC_SECTION 
     230 
     231 
     232// ==================== KERNEL CODE ======================== 
    203233 
    204234kernel 
     
    214244    ) 
    215245{ 
     246#ifdef USE_OPENCL 
     247  // who we are and what element we are working with 
     248  const int q_index = get_global_id(0); 
     249  if (q_index >= nq) return; 
     250#else 
     251  // Define q_index here so that debugging statements can be written to work 
     252  // for both OpenCL and DLL using: 
     253  //    if (q_index == 0) {printf(...);} 
     254  int q_index = 0; 
     255#endif 
     256 
    216257  // Storage for the current parameter values.  These will be updated as we 
    217258  // walk the polydispersity cube. 
     
    225266  #endif 
    226267 
    227   // TODO: could precompute these outside of the kernel. 
    228268  // Interpret polarization cross section. 
    229269  //     up_frac_i = values[NUM_PARS+2]; 
    230270  //     up_frac_f = values[NUM_PARS+3]; 
    231271  //     up_angle = values[NUM_PARS+4]; 
     272  // TODO: could precompute more magnetism parameters before calling the kernel. 
    232273  double spins[4]; 
    233274  double cos_mspin, sin_mspin; 
     
    235276  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin); 
    236277#endif // MAGNETIC 
    237  
    238 #if defined(CALL_IQ_AC) // oriented symmetric 
    239   const double theta = values[details->theta_par+2]; 
    240   const double phi = values[details->theta_par+3]; 
    241 #elif defined(CALL_IQ_ABC) // oriented asymmetric 
    242   const double theta = values[details->theta_par+2]; 
    243   const double phi = values[details->theta_par+3]; 
    244   const double psi = values[details->theta_par+4]; 
    245 #endif 
    246278 
    247279  // Fill in the initial variables 
     
    253285  for (int i=0; i < NUM_PARS; i++) { 
    254286    local_values.vector[i] = values[2+i]; 
    255 //printf("p%d = %g\n",i, local_values.vector[i]); 
     287//if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]); 
    256288  } 
    257 //printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD); 
    258 //printf("start:%d  stop:%d\n", pd_start, pd_stop); 
    259  
    260   double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
    261   if (pd_start == 0) { 
    262     #ifdef USE_OPENMP 
    263     #pragma omp parallel for 
    264     #endif 
    265     for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 
     289//if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD); 
     290//if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop); 
     291 
     292  // If pd_start is zero that means that we are starting a new calculation, 
     293  // and must initialize the result to zero.  Otherwise, we are restarting 
     294  // the calculation from somewhere in the middle of the dispersity mesh, 
     295  // and we update the value rather than reset it. Similarly for the 
     296  // normalization factor, which is stored as the final value in the 
     297  // results vector (one past the number of q values). 
     298  // 
     299  // The code differs slightly between opencl and dll since opencl is only 
     300  // seeing one q value (stored in the variable "this_result") while the dll 
     301  // version must loop over all q. 
     302  #ifdef USE_OPENCL 
     303    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
     304    double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 
     305  #else // !USE_OPENCL 
     306    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
     307    if (pd_start == 0) { 
     308      #ifdef USE_OPENMP 
     309      #pragma omp parallel for 
     310      #endif 
     311      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0; 
     312    } 
     313//if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 
     314#endif // !USE_OPENCL 
     315 
     316 
     317// ====== macros to set up the parts of the loop ======= 
     318/* 
     319Based on the level of the loop, uses C preprocessor magic to construct 
     320level-specific looping variables, including these from loop level 3: 
     321 
     322  int n3 : length of loop for mesh level 3 
     323  int i3 : current position in the loop for level 3, which is calculated 
     324       from a combination of pd_start, pd_stride[3] and pd_length[3]. 
     325  int p3 : is the index into the parameter table for mesh level 3 
     326  double v3[] : pointer into dispersity array to values for loop 3 
     327  double w3[] : pointer into dispersity array to weights for loop 3 
     328  double weight3 : the product of weights from levels 3 and up, computed 
     329       as weight5*weight4*w3[i3].  Note that we need an outermost 
     330       value weight5 set to 1.0 for this to work properly. 
     331 
     332After expansion, the loop struction will look like the following: 
     333 
     334  // --- PD_INIT(4) --- 
     335  const int n4 = pd_length[4]; 
     336  const int p4 = pd_par[4]; 
     337  global const double *v4 = pd_value + pd_offset[4]; 
     338  global const double *w4 = pd_weight + pd_offset[4]; 
     339  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start 
     340 
     341  // --- PD_INIT(3) --- 
     342  const int n3 = pd_length[3]; 
     343  ... 
     344  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start 
     345 
     346  PD_INIT(2) 
     347  PD_INIT(1) 
     348  PD_INIT(0) 
     349 
     350  // --- PD_OUTERMOST_WEIGHT(5) --- 
     351  const double weight5 = 1.0; 
     352 
     353  // --- PD_OPEN(4,5) --- 
     354  while (i4 < n4) { 
     355    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point 
     356    const double weight4 = w4[i4] * weight5; 
     357 
     358    // from PD_OPEN(3,4) 
     359    while (i3 < n3) { 
     360      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point 
     361      const double weight3 = w3[i3] * weight4; 
     362 
     363      PD_OPEN(3,2) 
     364      PD_OPEN(2,1) 
     365      PD_OPEN(0,1) 
     366 
     367      ... main loop body ... 
     368 
     369      ++step;  // increment counter representing position in dispersity mesh 
     370 
     371      PD_CLOSE(0) 
     372      PD_CLOSE(1) 
     373      PD_CLOSE(2) 
     374 
     375      // --- PD_CLOSE(3) --- 
     376      if (step >= pd_stop) break; 
     377      ++i3; 
     378    } 
     379    i3 = 0; // reset loop counter for next round through the loop 
     380 
     381    // --- PD_CLOSE(4) --- 
     382    if (step >= pd_stop) break; 
     383    ++i4; 
    266384  } 
    267 //printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 
    268  
     385  i4 = 0; // reset loop counter even though no more rounds through the loop 
     386 
     387*/ 
     388 
     389// Define looping variables 
     390#define PD_INIT(_LOOP) \ 
     391  const int n##_LOOP = details->pd_length[_LOOP]; \ 
     392  const int p##_LOOP = details->pd_par[_LOOP]; \ 
     393  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 
     394  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 
     395  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 
     396 
     397// Jump into the middle of the polydispersity loop 
     398#define PD_OPEN(_LOOP,_OUTER) \ 
     399  while (i##_LOOP < n##_LOOP) { \ 
     400    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \ 
     401    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER; 
     402 
     403// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD). 
     404#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0; 
     405#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n) 
     406 
     407// Close out the loop 
     408#define PD_CLOSE(_LOOP) \ 
     409    if (step >= pd_stop) break; \ 
     410    ++i##_LOOP; \ 
     411  } \ 
     412  i##_LOOP = 0; 
     413 
     414// The main loop body is essentially: 
     415// 
     416//    BUILD_ROTATION      // construct the rotation matrix qxy => qabc 
     417//    for each q 
     418//        FETCH_Q         // set qx,qy from the q input vector 
     419//        APPLY_ROTATION  // convert qx,qy to qa,qb,qc 
     420//        CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...) 
     421// 
     422// Depending on the shape type (radial, axial, triaxial), the variables 
     423// and calling parameters will be slightly different.  These macros 
     424// capture the differences in one spot so the rest of the code is easier 
     425// to read. 
     426#if defined(CALL_IQ) 
     427  // unoriented 1D 
     428  double qk; 
     429  #define FETCH_Q do { qk = q[q_index]; } while (0) 
     430  #define BUILD_ROTATION do {} while(0) 
     431  #define APPLY_ROTATION do {} while(0) 
     432  #define CALL_KERNEL CALL_IQ(qk, local_values.table) 
     433 
     434#elif defined(CALL_IQ_A) 
     435  // unoriented 2D 
     436  double qx, qy; 
     437  #define FETCH_Q do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 
     438  #define BUILD_ROTATION do {} while(0) 
     439  #define APPLY_ROTATION do {} while(0) 
     440  #define CALL_KERNEL CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table) 
     441 
     442#elif defined(CALL_IQ_AC) 
     443  // oriented symmetric 2D 
     444  double qx, qy; 
     445  #define FETCH_Q do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 
     446  double qa, qc; 
     447  QACRotation rotation; 
     448  // Grab the "view" angles (theta, phi) from the initial parameter table. 
     449  // These are separate from the "jitter" angles (dtheta, dphi) which are 
     450  // stored with the dispersity values and copied to the local parameter 
     451  // table as we go through the mesh. 
     452  const double theta = values[details->theta_par+2]; 
     453  const double phi = values[details->theta_par+3]; 
     454  #define BUILD_ROTATION qac_rotation(&rotation, \ 
     455    theta, phi, local_values.table.theta, local_values.table.phi) 
     456  #define APPLY_ROTATION qac_apply(rotation, qx, qy, &qa, &qc) 
     457  #define CALL_KERNEL CALL_IQ_AC(qa, qc, local_values.table) 
     458 
     459#elif defined(CALL_IQ_ABC) 
     460  // oriented asymmetric 2D 
     461  double qx, qy; 
     462  #define FETCH_Q do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0) 
     463  double qa, qb, qc; 
     464  QABCRotation rotation; 
     465  // Grab the "view" angles (theta, phi, psi) from the initial parameter table. 
     466  // These are separate from the "jitter" angles (dtheta, dphi, dpsi) which are 
     467  // stored with the dispersity values and copied to the local parameter 
     468  // table as we go through the mesh. 
     469  const double theta = values[details->theta_par+2]; 
     470  const double phi = values[details->theta_par+3]; 
     471  const double psi = values[details->theta_par+4]; 
     472  #define BUILD_ROTATION qabc_rotation(&rotation, \ 
     473    theta, phi, psi, local_values.table.theta, \ 
     474    local_values.table.phi, local_values.table.psi) 
     475  #define APPLY_ROTATION qabc_apply(rotation, qx, qy, &qa, &qb, &qc) 
     476  #define CALL_KERNEL CALL_IQ_ABC(qa, qb, qc, local_values.table) 
     477 
     478#endif 
     479 
     480// ====== construct the loops ======= 
     481 
     482// Pointers to the start of the dispersity and weight vectors, if needed. 
    269483#if MAX_PD>0 
    270484  global const double *pd_value = values + NUM_VALUES; 
     
    272486#endif 
    273487 
    274   // Jump into the middle of the polydispersity loop 
     488// The variable "step" is the current position in the dispersity loop. 
     489// It will be incremented each time a new point in the mesh is accumulated, 
     490// and used to test whether we have reached pd_stop. 
     491int step = pd_start; 
     492 
     493// define looping variables 
    275494#if MAX_PD>4 
    276   int n4=details->pd_length[4]; 
    277   int i4=(pd_start/details->pd_stride[4])%n4; 
    278   const int p4=details->pd_par[4]; 
    279   global const double *v4 = pd_value + details->pd_offset[4]; 
    280   global const double *w4 = pd_weight + details->pd_offset[4]; 
     495  PD_INIT(4) 
    281496#endif 
    282497#if MAX_PD>3 
    283   int n3=details->pd_length[3]; 
    284   int i3=(pd_start/details->pd_stride[3])%n3; 
    285   const int p3=details->pd_par[3]; 
    286   global const double *v3 = pd_value + details->pd_offset[3]; 
    287   global const double *w3 = pd_weight + details->pd_offset[3]; 
    288 //printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES); 
     498  PD_INIT(3) 
    289499#endif 
    290500#if MAX_PD>2 
    291   int n2=details->pd_length[2]; 
    292   int i2=(pd_start/details->pd_stride[2])%n2; 
    293   const int p2=details->pd_par[2]; 
    294   global const double *v2 = pd_value + details->pd_offset[2]; 
    295   global const double *w2 = pd_weight + details->pd_offset[2]; 
     501  PD_INIT(2) 
    296502#endif 
    297503#if MAX_PD>1 
    298   int n1=details->pd_length[1]; 
    299   int i1=(pd_start/details->pd_stride[1])%n1; 
    300   const int p1=details->pd_par[1]; 
    301   global const double *v1 = pd_value + details->pd_offset[1]; 
    302   global const double *w1 = pd_weight + details->pd_offset[1]; 
     504  PD_INIT(1) 
    303505#endif 
    304506#if MAX_PD>0 
    305   int n0=details->pd_length[0]; 
    306   int i0=(pd_start/details->pd_stride[0])%n0; 
    307   const int p0=details->pd_par[0]; 
    308   global const double *v0 = pd_value + details->pd_offset[0]; 
    309   global const double *w0 = pd_weight + details->pd_offset[0]; 
    310 //printf("w0:%p, values:%p, diff:%ld, %d\n",w0,values,(w0-values), NUM_VALUES); 
    311 #endif 
    312  
    313  
    314   int step = pd_start; 
    315  
     507  PD_INIT(0) 
     508#endif 
     509 
     510// open nested loops 
     511PD_OUTERMOST_WEIGHT(MAX_PD) 
    316512#if MAX_PD>4 
    317   const double weight5 = 1.0; 
    318   while (i4 < n4) { 
    319     local_values.vector[p4] = v4[i4]; 
    320     double weight4 = w4[i4] * weight5; 
    321 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, local_values.vector[p4], weight4); 
    322 #elif MAX_PD>3 
    323     const double weight4 = 1.0; 
     513  PD_OPEN(4,5) 
    324514#endif 
    325515#if MAX_PD>3 
    326   while (i3 < n3) { 
    327     local_values.vector[p3] = v3[i3]; 
    328     double weight3 = w3[i3] * weight4; 
    329 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, local_values.vector[p3], weight3); 
    330 #elif MAX_PD>2 
    331     const double weight3 = 1.0; 
     516  PD_OPEN(3,4) 
    332517#endif 
    333518#if MAX_PD>2 
    334   while (i2 < n2) { 
    335     local_values.vector[p2] = v2[i2]; 
    336     double weight2 = w2[i2] * weight3; 
    337 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, local_values.vector[p2], weight2); 
    338 #elif MAX_PD>1 
    339     const double weight2 = 1.0; 
     519  PD_OPEN(2,3) 
    340520#endif 
    341521#if MAX_PD>1 
    342   while (i1 < n1) { 
    343     local_values.vector[p1] = v1[i1]; 
    344     double weight1 = w1[i1] * weight2; 
    345 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, local_values.vector[p1], weight1); 
    346 #elif MAX_PD>0 
    347     const double weight1 = 1.0; 
     522  PD_OPEN(1,2) 
    348523#endif 
    349524#if MAX_PD>0 
    350   while(i0 < n0) { 
    351     local_values.vector[p0] = v0[i0]; 
    352     double weight0 = w0[i0] * weight1; 
    353 //printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, local_values.vector[p0], weight0); 
    354 #else 
    355     const double weight0 = 1.0; 
    356 #endif 
    357  
    358 //printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n"); 
    359 //printf("sphcor: %g\n", spherical_correction); 
    360  
    361     #ifdef INVALID 
    362     if (!INVALID(local_values.table)) 
    363     #endif 
    364     { 
    365       // Accumulate I(q) 
    366       // Note: weight==0 must always be excluded 
    367       if (weight0 > cutoff) { 
    368         pd_norm += weight0 * CALL_VOLUME(local_values.table); 
    369  
    370         #ifdef USE_OPENMP 
    371         #pragma omp parallel for 
    372         #endif 
    373         for (int q_index=0; q_index<nq; q_index++) { 
    374 #if defined(MAGNETIC) && NUM_MAGNETIC > 0 
    375           const double qx = q[2*q_index]; 
    376           const double qy = q[2*q_index+1]; 
    377           const double qsq = qx*qx + qy*qy; 
    378  
    379           // Constant across orientation, polydispersity for given qx, qy 
    380           double scattering = 0.0; 
    381           // TODO: what is the magnetic scattering at q=0 
    382           if (qsq > 1.e-16) { 
    383             double p[4];  // dd, du, ud, uu 
    384             p[0] = (qy*cos_mspin + qx*sin_mspin)/qsq; 
    385             p[3] = -p[0]; 
    386             p[1] = p[2] = (qy*sin_mspin - qx*cos_mspin)/qsq; 
    387  
    388             for (int index=0; index<4; index++) { 
    389               const double xs = spins[index]; 
    390               if (xs > 1.e-8) { 
    391                 const int spin_flip = (index==1) || (index==2); 
    392                 const double pk = p[index]; 
    393                 for (int axis=0; axis<=spin_flip; axis++) { 
    394                   #define M1 NUM_PARS+5 
    395                   #define M2 NUM_PARS+8 
    396                   #define M3 NUM_PARS+13 
    397                   #define SLD(_M_offset, _sld_offset) \ 
    398                       local_values.vector[_sld_offset] = xs * (axis \ 
    399                       ? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \ 
    400                       : mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \ 
    401                                 (spin_flip ? 0.0 : values[_sld_offset+2]))) 
    402                   #if NUM_MAGNETIC==1 
    403                       SLD(M1, MAGNETIC_PAR1); 
    404                   #elif NUM_MAGNETIC==2 
    405                       SLD(M1, MAGNETIC_PAR1); 
    406                       SLD(M2, MAGNETIC_PAR2); 
    407                   #elif NUM_MAGNETIC==3 
    408                       SLD(M1, MAGNETIC_PAR1); 
    409                       SLD(M2, MAGNETIC_PAR2); 
    410                       SLD(M3, MAGNETIC_PAR3); 
    411                   #else 
     525  PD_OPEN(0,1) 
     526#endif 
     527 
     528 
     529//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");} 
     530 
     531  // ====== loop body ======= 
     532  #ifdef INVALID 
     533  if (!INVALID(local_values.table)) 
     534  #endif 
     535  { 
     536    // Accumulate I(q) 
     537    // Note: weight==0 must always be excluded 
     538    if (weight0 > cutoff) { 
     539      pd_norm += weight0 * CALL_VOLUME(local_values.table); 
     540      BUILD_ROTATION; 
     541 
     542#ifndef USE_OPENCL 
     543      // DLL needs to explicitly loop over the q values. 
     544      #ifdef USE_OPENMP 
     545      #pragma omp parallel for 
     546      #endif 
     547      for (q_index=0; q_index<nq; q_index++) 
     548#endif // !USE_OPENCL 
     549      { 
     550 
     551        FETCH_Q; 
     552        APPLY_ROTATION; 
     553 
     554        // ======= COMPUTE SCATTERING ========== 
     555        #if defined(MAGNETIC) && NUM_MAGNETIC > 0 
     556        // Constant across orientation, polydispersity for given qx, qy 
     557        double scattering = 0.0; 
     558        // TODO: what is the magnetic scattering at q=0 
     559        const double qsq = qx*qx + qy*qy; 
     560        if (qsq > 1.e-16) { 
     561          double p[4];  // dd, du, ud, uu 
     562          p[0] = (qy*cos_mspin + qx*sin_mspin)/qsq; 
     563          p[3] = -p[0]; 
     564          p[1] = p[2] = (qy*sin_mspin - qx*cos_mspin)/qsq; 
     565 
     566          for (int index=0; index<4; index++) { 
     567            const double xs = spins[index]; 
     568            if (xs > 1.e-8) { 
     569              const int spin_flip = (index==1) || (index==2); 
     570              const double pk = p[index]; 
     571              for (int axis=0; axis<=spin_flip; axis++) { 
     572                #define SLD(_M_offset, _sld_offset) \ 
     573                    local_values.vector[_sld_offset] = xs * (axis \ 
     574                    ? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \ 
     575                    : mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \ 
     576                              (spin_flip ? 0.0 : values[_sld_offset+2]))) 
     577                #define M1 NUM_PARS+5 
     578                #if NUM_MAGNETIC==1 
     579                  SLD(M1, MAGNETIC_PAR1); 
     580                #elif NUM_MAGNETIC==2 
     581                  SLD(M1, MAGNETIC_PAR1); 
     582                  SLD(M1+3, MAGNETIC_PAR2); 
     583                #elif NUM_MAGNETIC==3 
     584                  SLD(M1, MAGNETIC_PAR1); 
     585                  SLD(M1+3, MAGNETIC_PAR2); 
     586                  SLD(M1+6, MAGNETIC_PAR3); 
     587                #else 
    412588                  for (int sk=0; sk<NUM_MAGNETIC; sk++) { 
    413589                      SLD(M1+3*sk, slds[sk]); 
    414590                  } 
    415                   #endif 
    416 #  if defined(CALL_IQ_A) // unoriented 
    417                   scattering += CALL_IQ_A(sqrt(qsq), local_values.table); 
    418 #  elif defined(CALL_IQ_AC) // oriented symmetric 
    419                   scattering += view_direct(qx, qy, theta, phi, local_values.table); 
    420 #  elif defined(CALL_IQ_ABC) // oriented asymmetric 
    421                   scattering += view_direct(qx, qy, theta, phi, psi, local_values.table); 
    422 #  endif 
    423                 } 
     591                #endif 
     592                #undef SLD 
     593                scattering += CALL_KERNEL; 
    424594              } 
    425595            } 
    426596          } 
    427 #elif defined(CALL_IQ) // 1d, not MAGNETIC 
    428           const double scattering = CALL_IQ(q[q_index], local_values.table); 
    429 #else  // 2d data, not magnetic 
    430           const double qx = q[2*q_index]; 
    431           const double qy = q[2*q_index+1]; 
    432 #  if defined(CALL_IQ_A) // unoriented 
    433           const double scattering = CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table); 
    434 #  elif defined(CALL_IQ_AC) // oriented symmetric 
    435           const double scattering = view_direct(qx, qy, theta, phi, local_values.table); 
    436 #  elif defined(CALL_IQ_ABC) // oriented asymmetric 
    437           const double scattering = view_direct(qx, qy, theta, phi, psi, local_values.table); 
    438 #  endif 
    439 #endif // !MAGNETIC 
    440 //printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0); 
    441           result[q_index] += weight0 * scattering; 
    442597        } 
     598        #else  // !MAGNETIC 
     599        const double scattering = CALL_KERNEL; 
     600        #endif // !MAGNETIC 
     601//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight, weight0); 
     602 
     603        #ifdef USE_OPENCL 
     604        this_result += weight0 * scattering; 
     605        #else // !USE_OPENCL 
     606        result[q_index] += weight0 * scattering; 
     607        #endif // !USE_OPENCL 
    443608      } 
    444609    } 
    445     ++step; 
     610  } 
     611 
     612// close nested loops 
     613++step; 
    446614#if MAX_PD>0 
    447     if (step >= pd_stop) break; 
    448     ++i0; 
    449   } 
    450   i0 = 0; 
     615  PD_CLOSE(0) 
    451616#endif 
    452617#if MAX_PD>1 
    453     if (step >= pd_stop) break; 
    454     ++i1; 
    455   } 
    456   i1 = 0; 
     618  PD_CLOSE(1) 
    457619#endif 
    458620#if MAX_PD>2 
    459     if (step >= pd_stop) break; 
    460     ++i2; 
    461   } 
    462   i2 = 0; 
     621  PD_CLOSE(2) 
    463622#endif 
    464623#if MAX_PD>3 
    465     if (step >= pd_stop) break; 
    466     ++i3; 
    467   } 
    468   i3 = 0; 
     624  PD_CLOSE(3) 
    469625#endif 
    470626#if MAX_PD>4 
    471     if (step >= pd_stop) break; 
    472     ++i4; 
    473   } 
    474   i4 = 0; 
    475 #endif 
    476  
     627  PD_CLOSE(4) 
     628#endif 
     629 
     630// clear the macros in preparation for the next kernel 
     631#undef PD_INIT 
     632#undef PD_OPEN 
     633#undef PD_CLOSE 
     634#undef FETCH_Q 
     635#undef BUILD_ROTATION 
     636#undef APPLY_ROTATION 
     637#undef CALL_KERNEL 
     638 
     639// Remember the current result and the updated norm. 
     640#ifdef USE_OPENCL 
     641  result[q_index] = this_result; 
     642  if (q_index == 0) result[nq] = pd_norm; 
     643//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 
     644#else // !USE_OPENCL 
     645  result[nq] = pd_norm; 
    477646//printf("res: %g/%g\n", result[0], pd_norm); 
    478   // Remember the updated norm. 
    479   result[nq] = pd_norm; 
    480 } 
     647#endif // !USE_OPENCL 
     648} 
Note: See TracChangeset for help on using the changeset viewer.