source: sasmodels/sasmodels/kernel_iq.c @ b0de252

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since b0de252 was b0de252, checked in by pkienzle, 5 years ago

improve control over cuda context

  • Property mode set to 100644
File size: 26.4 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
31//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
32//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
33//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
34//  INVALID(table) : test if the current point is feesible to calculate.  This
35//      will be defined in the kernel definition file.
36//  PROJECTION : equirectangular=1, sinusoidal=2
37//      see explore/jitter.py for definitions.
38
39#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
40#define _PAR_BLOCK_
41
42typedef struct {
43#if MAX_PD > 0
44    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
45    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
46    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
47    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
48#endif // MAX_PD > 0
49    int32_t num_eval;           // total number of voxels in hypercube
50    int32_t num_weights;        // total length of the weights vector
51    int32_t num_active;         // number of non-trivial pd loops
52    int32_t theta_par;          // id of first orientation variable
53} ProblemDetails;
54
55// Intel HD 4000 needs private arrays to be a multiple of 4 long
56typedef struct {
57    PARAMETER_TABLE
58} ParameterTable;
59typedef union {
60    ParameterTable table;
61    double vector[4*((NUM_PARS+3)/4)];
62} ParameterBlock;
63#endif // _PAR_BLOCK_
64
65#if defined(MAGNETIC) && NUM_MAGNETIC > 0
66// ===== Helper functions for magnetism =====
67
68// Return value restricted between low and high
69__device__
70static double clip(double value, double low, double high)
71{
72  return (value < low ? low : (value > high ? high : value));
73}
74
75// Compute spin cross sections given in_spin and out_spin
76// To convert spin cross sections to sld b:
77//     uu * (sld - m_sigma_x);
78//     dd * (sld + m_sigma_x);
79//     ud * (m_sigma_y - 1j*m_sigma_z);
80//     du * (m_sigma_y + 1j*m_sigma_z);
81// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
82__device__
83static void set_spin_weights(double in_spin, double out_spin, double weight[6])
84{
85  in_spin = clip(in_spin, 0.0, 1.0);
86  out_spin = clip(out_spin, 0.0, 1.0);
87  // Previous version of this function took the square root of the weights,
88  // under the assumption that
89  //
90  //     w*I(q, rho1, rho2, ...) = I(q, sqrt(w)*rho1, sqrt(w)*rho2, ...)
91  //
92  // However, since the weights are applied to the final intensity and
93  // are not interned inside the I(q) function, we want the full
94  // weight and not the square root.  Any function using
95  // set_spin_weights as part of calculating an amplitude will need to
96  // manually take that square root, but there is currently no such
97  // function.
98  weight[0] = (1.0-in_spin) * (1.0-out_spin); // dd
99  weight[1] = (1.0-in_spin) * out_spin;       // du
100  weight[2] = in_spin * (1.0-out_spin);       // ud
101  weight[3] = in_spin * out_spin;             // uu
102  weight[4] = weight[1]; // du.imag
103  weight[5] = weight[2]; // ud.imag
104}
105
106// Compute the magnetic sld
107__device__
108static double mag_sld(
109  const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag
110  const double qx, const double qy,
111  const double px, const double py,
112  const double sld,
113  const double mx, const double my, const double mz
114)
115{
116  if (xs < 4) {
117    const double perp = qy*mx - qx*my;
118    switch (xs) {
119      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
120      case 0: // uu => sld - D M_perpx
121          return sld - px*perp;
122      case 1: // ud.real => -D M_perpy
123          return py*perp;
124      case 2: // du.real => -D M_perpy
125          return py*perp;
126      case 3: // dd => sld + D M_perpx
127          return sld + px*perp;
128    }
129  } else {
130    if (xs== 4) {
131      return -mz;  // du.imag => +D M_perpz
132    } else { // index == 5
133      return +mz;  // ud.imag => -D M_perpz
134    }
135  }
136}
137
138
139#endif
140
141// ===== Helper functions for orientation and jitter =====
142
143// To change the definition of the angles, run explore/angles.py, which
144// uses sympy to generate the equations.
145
146#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
147#define _QAC_SECTION
148
149typedef struct {
150    double R31, R32;
151} QACRotation;
152
153// Fill in the rotation matrix R from the view angles (theta, phi) and the
154// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
155// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
156__device__
157static void
158qac_rotation(
159    QACRotation *rotation,
160    double theta, double phi,
161    double dtheta, double dphi)
162{
163    double sin_theta, cos_theta;
164    double sin_phi, cos_phi;
165
166    // reverse view matrix
167    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
168    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
169    const double V11 = cos_phi*cos_theta;
170    const double V12 = sin_phi*cos_theta;
171    const double V21 = -sin_phi;
172    const double V22 = cos_phi;
173    const double V31 = sin_theta*cos_phi;
174    const double V32 = sin_phi*sin_theta;
175
176    // reverse jitter matrix
177    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
178    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
179    const double J31 = sin_theta;
180    const double J32 = -sin_phi*cos_theta;
181    const double J33 = cos_phi*cos_theta;
182
183    // reverse matrix
184    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
185    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
186}
187
188// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
189// returning R*[qx,qy]' = [qa,qc]'
190__device__
191static void
192qac_apply(
193    QACRotation *rotation,
194    double qx, double qy,
195    double *qab_out, double *qc_out)
196{
197    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
198    const double dqc = rotation->R31*qx + rotation->R32*qy;
199    const double dqab_sq = -dqc*dqc + qx*qx + qy*qy;
200    //*qab_out = sqrt(fabs(dqab_sq));
201    *qab_out = dqab_sq > 0.0 ? sqrt(dqab_sq) : 0.0;
202    *qc_out = dqc;
203}
204#endif // _QAC_SECTION
205
206#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
207#define _QABC_SECTION
208
209typedef struct {
210    double R11, R12;
211    double R21, R22;
212    double R31, R32;
213} QABCRotation;
214
215// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
216// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
217// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
218__device__
219static void
220qabc_rotation(
221    QABCRotation *rotation,
222    double theta, double phi, double psi,
223    double dtheta, double dphi, double dpsi)
224{
225    double sin_theta, cos_theta;
226    double sin_phi, cos_phi;
227    double sin_psi, cos_psi;
228
229    // reverse view matrix
230    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
231    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
232    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
233    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
234    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
235    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
236    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
237    const double V31 = sin_theta*cos_phi;
238    const double V32 = sin_phi*sin_theta;
239
240    // reverse jitter matrix
241    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
242    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
243    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
244    const double J11 = cos_psi*cos_theta;
245    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
246    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
247    const double J21 = -sin_psi*cos_theta;
248    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
249    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
250    const double J31 = sin_theta;
251    const double J32 = -sin_phi*cos_theta;
252    const double J33 = cos_phi*cos_theta;
253
254    // reverse matrix
255    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
256    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
257    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
258    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
259    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
260    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
261}
262
263// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
264// returning R*[qx,qy]' = [qa,qb,qc]'
265__device__
266static void
267qabc_apply(
268    QABCRotation *rotation,
269    double qx, double qy,
270    double *qa_out, double *qb_out, double *qc_out)
271{
272    *qa_out = rotation->R11*qx + rotation->R12*qy;
273    *qb_out = rotation->R21*qx + rotation->R22*qy;
274    *qc_out = rotation->R31*qx + rotation->R32*qy;
275}
276
277#endif // _QABC_SECTION
278
279
280// ==================== KERNEL CODE ========================
281
282kernel
283void KERNEL_NAME(
284    int32_t nq,                 // number of q values
285    const int32_t pd_start,     // where we are in the dispersity loop
286    const int32_t pd_stop,      // where we are stopping in the dispersity loop
287    global_par const ProblemDetails *details,
288    global_par const double *values,
289    global_par const double *q, // nq q values, with padding to boundary
290    global_par double *result,  // nq+1 return values, again with padding
291    const double cutoff     // cutoff in the dispersity weight product
292    )
293{
294#if defined(USE_GPU)
295  // who we are and what element we are working with
296  #if defined(USE_OPENCL)
297  const int q_index = get_global_id(0);
298  #else // USE_CUDA
299  const int q_index = threadIdx.x + blockIdx.x * blockDim.x;
300  #endif
301  if (q_index >= nq) return;
302#else
303  // Define q_index here so that debugging statements can be written to work
304  // for both OpenCL and DLL using:
305  //    if (q_index == 0) {printf(...);}
306  int q_index = 0;
307#endif
308
309  // ** Fill in the local values table **
310  // Storage for the current parameter values.
311  // These will be updated as we walk the dispersity mesh.
312  ParameterBlock local_values;
313  //   values[0] is scale
314  //   values[1] is background
315  #ifdef USE_OPENMP
316  #pragma omp parallel for
317  #endif
318  for (int i=0; i < NUM_PARS; i++) {
319    local_values.vector[i] = values[2+i];
320    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
321  }
322  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
323  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
324
325  // ** Precompute magnatism values **
326#if defined(MAGNETIC) && NUM_MAGNETIC>0
327  // Location of the sld parameters in the parameter vector.
328  // These parameters are updated with the effective sld due to magnetism.
329  const int32_t slds[] = { MAGNETIC_PARS };
330
331  // Interpret polarization cross section.
332  //     up_frac_i = values[NUM_PARS+2];
333  //     up_frac_f = values[NUM_PARS+3];
334  //     up_angle = values[NUM_PARS+4];
335  // TODO: could precompute more magnetism parameters before calling the kernel.
336  double xs_weights[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
337  double cos_mspin, sin_mspin;
338  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], xs_weights);
339  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
340#endif // MAGNETIC
341
342  // ** Fill in the initial results **
343  // If pd_start is zero that means that we are starting a new calculation,
344  // and must initialize the result to zero.  Otherwise, we are restarting
345  // the calculation from somewhere in the middle of the dispersity mesh,
346  // and we update the value rather than reset it. Similarly for the
347  // normalization factor, which is stored as the final value in the
348  // results vector (one past the number of q values).
349  //
350  // The code differs slightly between opencl and dll since opencl is only
351  // seeing one q value (stored in the variable "this_result") while the dll
352  // version must loop over all q.
353  #if defined(USE_GPU)
354    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
355    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
356  #else // !USE_GPU
357    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
358    if (pd_start == 0) {
359      #ifdef USE_OPENMP
360      #pragma omp parallel for
361      #endif
362      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
363    }
364    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
365#endif // !USE_GPU
366
367
368// ====== macros to set up the parts of the loop =======
369/*
370Based on the level of the loop, uses C preprocessor magic to construct
371level-specific looping variables, including these from loop level 3:
372
373  int n3 : length of loop for mesh level 3
374  int i3 : current position in the loop for level 3, which is calculated
375       from a combination of pd_start, pd_stride[3] and pd_length[3].
376  int p3 : is the index into the parameter table for mesh level 3
377  double v3[] : pointer into dispersity array to values for loop 3
378  double w3[] : pointer into dispersity array to weights for loop 3
379  double weight3 : the product of weights from levels 3 and up, computed
380       as weight5*weight4*w3[i3].  Note that we need an outermost
381       value weight5 set to 1.0 for this to work properly.
382
383After expansion, the loop struction will look like the following:
384
385  // --- PD_INIT(4) ---
386  const int n4 = pd_length[4];
387  const int p4 = pd_par[4];
388  global_var const double *v4 = pd_value + pd_offset[4];
389  global_var const double *w4 = pd_weight + pd_offset[4];
390  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
391
392  // --- PD_INIT(3) ---
393  const int n3 = pd_length[3];
394  ...
395  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
396
397  PD_INIT(2)
398  PD_INIT(1)
399  PD_INIT(0)
400
401  // --- PD_OUTERMOST_WEIGHT(5) ---
402  const double weight5 = 1.0;
403
404  // --- PD_OPEN(4,5) ---
405  while (i4 < n4) {
406    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
407    const double weight4 = w4[i4] * weight5;
408
409    // from PD_OPEN(3,4)
410    while (i3 < n3) {
411      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
412      const double weight3 = w3[i3] * weight4;
413
414      PD_OPEN(3,2)
415      PD_OPEN(2,1)
416      PD_OPEN(0,1)
417
418      // ... main loop body ...
419      APPLY_PROJECTION    // convert jitter values to spherical coords
420      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
421      for each q
422          FETCH_Q         // set qx,qy from the q input vector
423          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
424          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
425
426      ++step;  // increment counter representing position in dispersity mesh
427
428      PD_CLOSE(0)
429      PD_CLOSE(1)
430      PD_CLOSE(2)
431
432      // --- PD_CLOSE(3) ---
433      if (step >= pd_stop) break;
434      ++i3;
435    }
436    i3 = 0; // reset loop counter for next round through the loop
437
438    // --- PD_CLOSE(4) ---
439    if (step >= pd_stop) break;
440    ++i4;
441  }
442  i4 = 0; // reset loop counter even though no more rounds through the loop
443
444*/
445
446
447// ** prepare inner loops **
448
449// Depending on the shape type (radial, axial, triaxial), the variables
450// and calling parameters in the loop body will be slightly different.
451// Macros capture the differences in one spot so the rest of the code
452// is easier to read. The code below both declares variables for the
453// inner loop and defines the macros that use them.
454
455#if defined(CALL_IQ)
456  // unoriented 1D
457  double qk;
458  #define FETCH_Q() do { qk = q[q_index]; } while (0)
459  #define BUILD_ROTATION() do {} while(0)
460  #define APPLY_ROTATION() do {} while(0)
461  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
462
463#elif defined(CALL_IQ_A)
464  // unoriented 2D
465  double qx, qy;
466  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
467  #define BUILD_ROTATION() do {} while(0)
468  #define APPLY_ROTATION() do {} while(0)
469  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
470
471#elif defined(CALL_IQ_AC)
472  // oriented symmetric 2D
473  double qx, qy;
474  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
475  double qa, qc;
476  QACRotation rotation;
477  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
478  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
479  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
480  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
481
482#elif defined(CALL_IQ_ABC)
483  // oriented asymmetric 2D
484  double qx, qy;
485  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
486  double qa, qb, qc;
487  QABCRotation rotation;
488  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
489  // psi and dpsi are only for IQ_ABC, so they are processed here.
490  const double psi = values[details->theta_par+4];
491  local_values.table.psi = 0.;
492  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
493  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
494  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
495#elif defined(CALL_IQ_XY)
496  // direct call to qx,qy calculator
497  double qx, qy;
498  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
499  #define BUILD_ROTATION() do {} while(0)
500  #define APPLY_ROTATION() do {} while(0)
501  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
502#endif
503
504// Define APPLY_PROJECTION depending on model symmetries. We do this outside
505// the previous if block so that we don't need to repeat the identical
506// logic in the IQ_AC and IQ_ABC branches.  This will become more important
507// if we implement more projections, or more complicated projections.
508#if defined(CALL_IQ) || defined(CALL_IQ_A)  // no orientation
509  #define APPLY_PROJECTION() const double weight=weight0
510#elif defined(CALL_IQ_XY) // pass orientation to the model
511  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
512  // Need to plug the values for the orientation angles back into parameter
513  // table in case they were overridden by the orientation offset.  This
514  // means that orientation dispersity will not work for these models, but
515  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
516  // the user model wants full control of orientation/magnetism.
517  #if defined(HAVE_PSI)
518    const double theta = values[details->theta_par+2];
519    const double phi = values[details->theta_par+3];
520    const double psi = values[details->theta_par+4];
521    double weight;
522    #define APPLY_PROJECTION() do { \
523      local_values.table.theta = theta; \
524      local_values.table.phi = phi; \
525      local_values.table.psi = psi; \
526      weight=weight0; \
527    } while (0)
528  #elif defined(HAVE_THETA)
529    const double theta = values[details->theta_par+2];
530    const double phi = values[details->theta_par+3];
531    double weight;
532    #define APPLY_PROJECTION() do { \
533      local_values.table.theta = theta; \
534      local_values.table.phi = phi; \
535      weight=weight0; \
536    } while (0)
537  #else
538    #define APPLY_PROJECTION() const double weight=weight0
539  #endif
540#else // apply jitter and view before calling the model
541  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
542  const double theta = values[details->theta_par+2];
543  const double phi = values[details->theta_par+3];
544  // Make sure jitter angle defaults to zero if there is no jitter distribution
545  local_values.table.theta = 0.;
546  local_values.table.phi = 0.;
547  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
548  // dispersity values and copied to the local parameter table as
549  // we go through the mesh.
550  double dtheta, dphi, weight;
551  #if PROJECTION == 1 // equirectangular
552    #define APPLY_PROJECTION() do { \
553      dtheta = local_values.table.theta; \
554      dphi = local_values.table.phi; \
555      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
556    } while (0)
557  #elif PROJECTION == 2 // sinusoidal
558    #define APPLY_PROJECTION() do { \
559      dtheta = local_values.table.theta; \
560      dphi = local_values.table.phi; \
561      weight = weight0; \
562      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
563      else if (dphi != 0.0) weight = 0.; \
564      if (fabs(dphi) >= 180.) weight = 0.; \
565    } while (0)
566  #endif
567#endif // done defining APPLY_PROJECTION
568
569// ** define looping macros **
570
571// Define looping variables
572#define PD_INIT(_LOOP) \
573  const int n##_LOOP = details->pd_length[_LOOP]; \
574  const int p##_LOOP = details->pd_par[_LOOP]; \
575  global_var const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
576  global_var const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
577  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
578
579// Jump into the middle of the dispersity loop
580#define PD_OPEN(_LOOP,_OUTER) \
581  while (i##_LOOP < n##_LOOP) { \
582    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
583    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
584
585// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
586#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
587#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
588
589// Close out the loop
590#define PD_CLOSE(_LOOP) \
591    if (step >= pd_stop) break; \
592    ++i##_LOOP; \
593  } \
594  i##_LOOP = 0;
595
596// ====== construct the loops =======
597
598// Pointers to the start of the dispersity and weight vectors, if needed.
599#if MAX_PD>0
600  global_var const double *pd_value = values + NUM_VALUES;
601  global_var const double *pd_weight = pd_value + details->num_weights;
602#endif
603
604// The variable "step" is the current position in the dispersity loop.
605// It will be incremented each time a new point in the mesh is accumulated,
606// and used to test whether we have reached pd_stop.
607int step = pd_start;
608
609// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
610
611// define looping variables
612#if MAX_PD>4
613  PD_INIT(4)
614#endif
615#if MAX_PD>3
616  PD_INIT(3)
617#endif
618#if MAX_PD>2
619  PD_INIT(2)
620#endif
621#if MAX_PD>1
622  PD_INIT(1)
623#endif
624#if MAX_PD>0
625  PD_INIT(0)
626#endif
627
628// open nested loops
629PD_OUTERMOST_WEIGHT(MAX_PD)
630#if MAX_PD>4
631  PD_OPEN(4,5)
632#endif
633#if MAX_PD>3
634  PD_OPEN(3,4)
635#endif
636#if MAX_PD>2
637  PD_OPEN(2,3)
638#endif
639#if MAX_PD>1
640  PD_OPEN(1,2)
641#endif
642#if MAX_PD>0
643  PD_OPEN(0,1)
644#endif
645
646//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
647
648  // ====== loop body =======
649  #ifdef INVALID
650  if (!INVALID(local_values.table))
651  #endif
652  {
653     APPLY_PROJECTION();
654
655    // Accumulate I(q)
656    // Note: weight==0 must always be excluded
657    if (weight > cutoff) {
658      pd_norm += weight * CALL_VOLUME(local_values.table);
659      BUILD_ROTATION();
660
661#if !defined(USE_GPU)
662      // DLL needs to explicitly loop over the q values.
663      #ifdef USE_OPENMP
664      #pragma omp parallel for
665      #endif
666      for (q_index=0; q_index<nq; q_index++)
667#endif // !USE_GPU
668      {
669
670        FETCH_Q();
671        APPLY_ROTATION();
672
673        // ======= COMPUTE SCATTERING ==========
674        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
675          // Compute the scattering from the magnetic cross sections.
676          double scattering = 0.0;
677          const double qsq = qx*qx + qy*qy;
678          if (qsq > 1.e-16) {
679            // TODO: what is the magnetic scattering at q=0
680            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
681            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
682
683            // loop over uu, ud real, du real, dd, ud imag, du imag
684            for (unsigned int xs=0; xs<6; xs++) {
685              const double xs_weight = xs_weights[xs];
686              if (xs_weight > 1.e-8) {
687                // Since the cross section weight is significant, set the slds
688                // to the effective slds for this cross section, call the
689                // kernel, and add according to weight.
690                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
691                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
692                  const int32_t sld_index = slds[sk];
693                  const double mx = values[mag_index];
694                  const double my = values[mag_index+1];
695                  const double mz = values[mag_index+2];
696                  local_values.vector[sld_index] =
697                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
698//if (q_index==0) printf("%d: (qx,qy)=(%g,%g) xs=%d sld%d=%g p=(%g,%g) m=(%g,%g,%g)\n",
699//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz);
700                }
701                scattering += xs_weight * CALL_KERNEL();
702              }
703            }
704          }
705        #else  // !MAGNETIC
706          const double scattering = CALL_KERNEL();
707        #endif // !MAGNETIC
708//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
709
710        #if defined(USE_GPU)
711          this_result += weight * scattering;
712        #else // !USE_GPU
713          result[q_index] += weight * scattering;
714        #endif // !USE_GPU
715      }
716    }
717  }
718
719// close nested loops
720++step;
721#if MAX_PD>0
722  PD_CLOSE(0)
723#endif
724#if MAX_PD>1
725  PD_CLOSE(1)
726#endif
727#if MAX_PD>2
728  PD_CLOSE(2)
729#endif
730#if MAX_PD>3
731  PD_CLOSE(3)
732#endif
733#if MAX_PD>4
734  PD_CLOSE(4)
735#endif
736
737// Remember the current result and the updated norm.
738#if defined(USE_GPU)
739  result[q_index] = this_result;
740  if (q_index == 0) result[nq] = pd_norm;
741//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
742#else // !USE_GPU
743  result[nq] = pd_norm;
744//printf("res: %g/%g\n", result[0], pd_norm);
745#endif // !USE_GPU
746
747// ** clear the macros in preparation for the next kernel **
748#undef PD_INIT
749#undef PD_OPEN
750#undef PD_CLOSE
751#undef FETCH_Q
752#undef APPLY_PROJECTION
753#undef BUILD_ROTATION
754#undef APPLY_ROTATION
755#undef CALL_KERNEL
756}
Note: See TracBrowser for help on using the repository browser.