source: sasmodels/sasmodels/kernel_iq.c @ 74e9b5f

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 74e9b5f was 74e9b5f, checked in by pkienzle, 6 years ago

autotag functions as device functions for cuda. Refs #1076.

  • Property mode set to 100644
File size: 26.3 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
31//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
32//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
33//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
34//  INVALID(table) : test if the current point is feesible to calculate.  This
35//      will be defined in the kernel definition file.
36//  PROJECTION : equirectangular=1, sinusoidal=2
37//      see explore/jitter.py for definitions.
38
39#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
40#define _PAR_BLOCK_
41
42typedef struct {
43#if MAX_PD > 0
44    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
45    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
46    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
47    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
48#endif // MAX_PD > 0
49    int32_t num_eval;           // total number of voxels in hypercube
50    int32_t num_weights;        // total length of the weights vector
51    int32_t num_active;         // number of non-trivial pd loops
52    int32_t theta_par;          // id of first orientation variable
53} ProblemDetails;
54
55// Intel HD 4000 needs private arrays to be a multiple of 4 long
56typedef struct {
57    PARAMETER_TABLE
58} ParameterTable;
59typedef union {
60    ParameterTable table;
61    double vector[4*((NUM_PARS+3)/4)];
62} ParameterBlock;
63#endif // _PAR_BLOCK_
64
65#if defined(MAGNETIC) && NUM_MAGNETIC > 0
66// ===== Helper functions for magnetism =====
67
68// Return value restricted between low and high
69static double clip(double value, double low, double high)
70{
71  return (value < low ? low : (value > high ? high : value));
72}
73
74// Compute spin cross sections given in_spin and out_spin
75// To convert spin cross sections to sld b:
76//     uu * (sld - m_sigma_x);
77//     dd * (sld + m_sigma_x);
78//     ud * (m_sigma_y - 1j*m_sigma_z);
79//     du * (m_sigma_y + 1j*m_sigma_z);
80// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
81static void set_spin_weights(double in_spin, double out_spin, double weight[6])
82{
83  in_spin = clip(in_spin, 0.0, 1.0);
84  out_spin = clip(out_spin, 0.0, 1.0);
85  // Previous version of this function took the square root of the weights,
86  // under the assumption that
87  //
88  //     w*I(q, rho1, rho2, ...) = I(q, sqrt(w)*rho1, sqrt(w)*rho2, ...)
89  //
90  // However, since the weights are applied to the final intensity and
91  // are not interned inside the I(q) function, we want the full
92  // weight and not the square root.  Any function using
93  // set_spin_weights as part of calculating an amplitude will need to
94  // manually take that square root, but there is currently no such
95  // function.
96  weight[0] = (1.0-in_spin) * (1.0-out_spin); // dd
97  weight[1] = (1.0-in_spin) * out_spin;       // du
98  weight[2] = in_spin * (1.0-out_spin);       // ud
99  weight[3] = in_spin * out_spin;             // uu
100  weight[4] = weight[1]; // du.imag
101  weight[5] = weight[2]; // ud.imag
102}
103
104// Compute the magnetic sld
105static double mag_sld(
106  const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag
107  const double qx, const double qy,
108  const double px, const double py,
109  const double sld,
110  const double mx, const double my, const double mz
111)
112{
113  if (xs < 4) {
114    const double perp = qy*mx - qx*my;
115    switch (xs) {
116      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
117      case 0: // uu => sld - D M_perpx
118          return sld - px*perp;
119      case 1: // ud.real => -D M_perpy
120          return py*perp;
121      case 2: // du.real => -D M_perpy
122          return py*perp;
123      case 3: // dd => sld + D M_perpx
124          return sld + px*perp;
125    }
126  } else {
127    if (xs== 4) {
128      return -mz;  // du.imag => +D M_perpz
129    } else { // index == 5
130      return +mz;  // ud.imag => -D M_perpz
131    }
132  }
133}
134
135
136#endif
137
138// ===== Helper functions for orientation and jitter =====
139
140// To change the definition of the angles, run explore/angles.py, which
141// uses sympy to generate the equations.
142
143#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
144#define _QAC_SECTION
145
146typedef struct {
147    double R31, R32;
148} QACRotation;
149
150// Fill in the rotation matrix R from the view angles (theta, phi) and the
151// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
152// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
153static void
154qac_rotation(
155    QACRotation *rotation,
156    double theta, double phi,
157    double dtheta, double dphi)
158{
159    double sin_theta, cos_theta;
160    double sin_phi, cos_phi;
161
162    // reverse view matrix
163    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
164    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
165    const double V11 = cos_phi*cos_theta;
166    const double V12 = sin_phi*cos_theta;
167    const double V21 = -sin_phi;
168    const double V22 = cos_phi;
169    const double V31 = sin_theta*cos_phi;
170    const double V32 = sin_phi*sin_theta;
171
172    // reverse jitter matrix
173    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
174    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
175    const double J31 = sin_theta;
176    const double J32 = -sin_phi*cos_theta;
177    const double J33 = cos_phi*cos_theta;
178
179    // reverse matrix
180    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
181    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
182}
183
184// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
185// returning R*[qx,qy]' = [qa,qc]'
186static void
187qac_apply(
188    QACRotation *rotation,
189    double qx, double qy,
190    double *qab_out, double *qc_out)
191{
192    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
193    const double dqc = rotation->R31*qx + rotation->R32*qy;
194    const double dqab_sq = -dqc*dqc + qx*qx + qy*qy;
195    //*qab_out = sqrt(fabs(dqab_sq));
196    *qab_out = dqab_sq > 0.0 ? sqrt(dqab_sq) : 0.0;
197    *qc_out = dqc;
198}
199#endif // _QAC_SECTION
200
201#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
202#define _QABC_SECTION
203
204typedef struct {
205    double R11, R12;
206    double R21, R22;
207    double R31, R32;
208} QABCRotation;
209
210// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
211// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
212// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
213static void
214qabc_rotation(
215    QABCRotation *rotation,
216    double theta, double phi, double psi,
217    double dtheta, double dphi, double dpsi)
218{
219    double sin_theta, cos_theta;
220    double sin_phi, cos_phi;
221    double sin_psi, cos_psi;
222
223    // reverse view matrix
224    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
225    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
226    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
227    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
228    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
229    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
230    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
231    const double V31 = sin_theta*cos_phi;
232    const double V32 = sin_phi*sin_theta;
233
234    // reverse jitter matrix
235    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
236    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
237    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
238    const double J11 = cos_psi*cos_theta;
239    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
240    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
241    const double J21 = -sin_psi*cos_theta;
242    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
243    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
244    const double J31 = sin_theta;
245    const double J32 = -sin_phi*cos_theta;
246    const double J33 = cos_phi*cos_theta;
247
248    // reverse matrix
249    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
250    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
251    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
252    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
253    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
254    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
255}
256
257// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
258// returning R*[qx,qy]' = [qa,qb,qc]'
259static void
260qabc_apply(
261    QABCRotation *rotation,
262    double qx, double qy,
263    double *qa_out, double *qb_out, double *qc_out)
264{
265    *qa_out = rotation->R11*qx + rotation->R12*qy;
266    *qb_out = rotation->R21*qx + rotation->R22*qy;
267    *qc_out = rotation->R31*qx + rotation->R32*qy;
268}
269
270#endif // _QABC_SECTION
271
272
273// ==================== KERNEL CODE ========================
274
275kernel
276void KERNEL_NAME(
277    int32_t nq,                 // number of q values
278    const int32_t pd_start,     // where we are in the dispersity loop
279    const int32_t pd_stop,      // where we are stopping in the dispersity loop
280    pglobal const ProblemDetails *details,
281    pglobal const double *values,
282    pglobal const double *q, // nq q values, with padding to boundary
283    pglobal double *result,  // nq+1 return values, again with padding
284    const double cutoff     // cutoff in the dispersity weight product
285    )
286{
287#if defined(USE_GPU)
288  // who we are and what element we are working with
289  #if defined(USE_OPENCL)
290  const int q_index = get_global_id(0);
291  #else // USE_CUDA
292  const int q_index = threadIdx.x + blockIdx.x * blockDim.x;
293  #endif
294  if (q_index >= nq) return;
295#else
296  // Define q_index here so that debugging statements can be written to work
297  // for both OpenCL and DLL using:
298  //    if (q_index == 0) {printf(...);}
299  int q_index = 0;
300#endif
301
302  // ** Fill in the local values table **
303  // Storage for the current parameter values.
304  // These will be updated as we walk the dispersity mesh.
305  ParameterBlock local_values;
306  //   values[0] is scale
307  //   values[1] is background
308  #ifdef USE_OPENMP
309  #pragma omp parallel for
310  #endif
311  for (int i=0; i < NUM_PARS; i++) {
312    local_values.vector[i] = values[2+i];
313    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
314  }
315  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
316  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
317
318  // ** Precompute magnatism values **
319#if defined(MAGNETIC) && NUM_MAGNETIC>0
320  // Location of the sld parameters in the parameter vector.
321  // These parameters are updated with the effective sld due to magnetism.
322  const int32_t slds[] = { MAGNETIC_PARS };
323
324  // Interpret polarization cross section.
325  //     up_frac_i = values[NUM_PARS+2];
326  //     up_frac_f = values[NUM_PARS+3];
327  //     up_angle = values[NUM_PARS+4];
328  // TODO: could precompute more magnetism parameters before calling the kernel.
329  double xs_weights[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
330  double cos_mspin, sin_mspin;
331  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], xs_weights);
332  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
333#endif // MAGNETIC
334
335  // ** Fill in the initial results **
336  // If pd_start is zero that means that we are starting a new calculation,
337  // and must initialize the result to zero.  Otherwise, we are restarting
338  // the calculation from somewhere in the middle of the dispersity mesh,
339  // and we update the value rather than reset it. Similarly for the
340  // normalization factor, which is stored as the final value in the
341  // results vector (one past the number of q values).
342  //
343  // The code differs slightly between opencl and dll since opencl is only
344  // seeing one q value (stored in the variable "this_result") while the dll
345  // version must loop over all q.
346  #if defined(USE_GPU)
347    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
348    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
349  #else // !USE_GPU
350    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
351    if (pd_start == 0) {
352      #ifdef USE_OPENMP
353      #pragma omp parallel for
354      #endif
355      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
356    }
357    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
358#endif // !USE_GPU
359
360
361// ====== macros to set up the parts of the loop =======
362/*
363Based on the level of the loop, uses C preprocessor magic to construct
364level-specific looping variables, including these from loop level 3:
365
366  int n3 : length of loop for mesh level 3
367  int i3 : current position in the loop for level 3, which is calculated
368       from a combination of pd_start, pd_stride[3] and pd_length[3].
369  int p3 : is the index into the parameter table for mesh level 3
370  double v3[] : pointer into dispersity array to values for loop 3
371  double w3[] : pointer into dispersity array to weights for loop 3
372  double weight3 : the product of weights from levels 3 and up, computed
373       as weight5*weight4*w3[i3].  Note that we need an outermost
374       value weight5 set to 1.0 for this to work properly.
375
376After expansion, the loop struction will look like the following:
377
378  // --- PD_INIT(4) ---
379  const int n4 = pd_length[4];
380  const int p4 = pd_par[4];
381  pglobal const double *v4 = pd_value + pd_offset[4];
382  pglobal const double *w4 = pd_weight + pd_offset[4];
383  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
384
385  // --- PD_INIT(3) ---
386  const int n3 = pd_length[3];
387  ...
388  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
389
390  PD_INIT(2)
391  PD_INIT(1)
392  PD_INIT(0)
393
394  // --- PD_OUTERMOST_WEIGHT(5) ---
395  const double weight5 = 1.0;
396
397  // --- PD_OPEN(4,5) ---
398  while (i4 < n4) {
399    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
400    const double weight4 = w4[i4] * weight5;
401
402    // from PD_OPEN(3,4)
403    while (i3 < n3) {
404      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
405      const double weight3 = w3[i3] * weight4;
406
407      PD_OPEN(3,2)
408      PD_OPEN(2,1)
409      PD_OPEN(0,1)
410
411      // ... main loop body ...
412      APPLY_PROJECTION    // convert jitter values to spherical coords
413      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
414      for each q
415          FETCH_Q         // set qx,qy from the q input vector
416          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
417          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
418
419      ++step;  // increment counter representing position in dispersity mesh
420
421      PD_CLOSE(0)
422      PD_CLOSE(1)
423      PD_CLOSE(2)
424
425      // --- PD_CLOSE(3) ---
426      if (step >= pd_stop) break;
427      ++i3;
428    }
429    i3 = 0; // reset loop counter for next round through the loop
430
431    // --- PD_CLOSE(4) ---
432    if (step >= pd_stop) break;
433    ++i4;
434  }
435  i4 = 0; // reset loop counter even though no more rounds through the loop
436
437*/
438
439
440// ** prepare inner loops **
441
442// Depending on the shape type (radial, axial, triaxial), the variables
443// and calling parameters in the loop body will be slightly different.
444// Macros capture the differences in one spot so the rest of the code
445// is easier to read. The code below both declares variables for the
446// inner loop and defines the macros that use them.
447
448#if defined(CALL_IQ)
449  // unoriented 1D
450  double qk;
451  #define FETCH_Q() do { qk = q[q_index]; } while (0)
452  #define BUILD_ROTATION() do {} while(0)
453  #define APPLY_ROTATION() do {} while(0)
454  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
455
456#elif defined(CALL_IQ_A)
457  // unoriented 2D
458  double qx, qy;
459  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
460  #define BUILD_ROTATION() do {} while(0)
461  #define APPLY_ROTATION() do {} while(0)
462  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
463
464#elif defined(CALL_IQ_AC)
465  // oriented symmetric 2D
466  double qx, qy;
467  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
468  double qa, qc;
469  QACRotation rotation;
470  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
471  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
472  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
473  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
474
475#elif defined(CALL_IQ_ABC)
476  // oriented asymmetric 2D
477  double qx, qy;
478  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
479  double qa, qb, qc;
480  QABCRotation rotation;
481  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
482  // psi and dpsi are only for IQ_ABC, so they are processed here.
483  const double psi = values[details->theta_par+4];
484  local_values.table.psi = 0.;
485  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
486  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
487  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
488#elif defined(CALL_IQ_XY)
489  // direct call to qx,qy calculator
490  double qx, qy;
491  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
492  #define BUILD_ROTATION() do {} while(0)
493  #define APPLY_ROTATION() do {} while(0)
494  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
495#endif
496
497// Define APPLY_PROJECTION depending on model symmetries. We do this outside
498// the previous if block so that we don't need to repeat the identical
499// logic in the IQ_AC and IQ_ABC branches.  This will become more important
500// if we implement more projections, or more complicated projections.
501#if defined(CALL_IQ) || defined(CALL_IQ_A)  // no orientation
502  #define APPLY_PROJECTION() const double weight=weight0
503#elif defined(CALL_IQ_XY) // pass orientation to the model
504  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
505  // Need to plug the values for the orientation angles back into parameter
506  // table in case they were overridden by the orientation offset.  This
507  // means that orientation dispersity will not work for these models, but
508  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
509  // the user model wants full control of orientation/magnetism.
510  #if defined(HAVE_PSI)
511    const double theta = values[details->theta_par+2];
512    const double phi = values[details->theta_par+3];
513    const double psi = values[details->theta_par+4];
514    double weight;
515    #define APPLY_PROJECTION() do { \
516      local_values.table.theta = theta; \
517      local_values.table.phi = phi; \
518      local_values.table.psi = psi; \
519      weight=weight0; \
520    } while (0)
521  #elif defined(HAVE_THETA)
522    const double theta = values[details->theta_par+2];
523    const double phi = values[details->theta_par+3];
524    double weight;
525    #define APPLY_PROJECTION() do { \
526      local_values.table.theta = theta; \
527      local_values.table.phi = phi; \
528      weight=weight0; \
529    } while (0)
530  #else
531    #define APPLY_PROJECTION() const double weight=weight0
532  #endif
533#else // apply jitter and view before calling the model
534  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
535  const double theta = values[details->theta_par+2];
536  const double phi = values[details->theta_par+3];
537  // Make sure jitter angle defaults to zero if there is no jitter distribution
538  local_values.table.theta = 0.;
539  local_values.table.phi = 0.;
540  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
541  // dispersity values and copied to the local parameter table as
542  // we go through the mesh.
543  double dtheta, dphi, weight;
544  #if PROJECTION == 1 // equirectangular
545    #define APPLY_PROJECTION() do { \
546      dtheta = local_values.table.theta; \
547      dphi = local_values.table.phi; \
548      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
549    } while (0)
550  #elif PROJECTION == 2 // sinusoidal
551    #define APPLY_PROJECTION() do { \
552      dtheta = local_values.table.theta; \
553      dphi = local_values.table.phi; \
554      weight = weight0; \
555      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
556      else if (dphi != 0.0) weight = 0.; \
557      if (fabs(dphi) >= 180.) weight = 0.; \
558    } while (0)
559  #endif
560#endif // done defining APPLY_PROJECTION
561
562// ** define looping macros **
563
564// Define looping variables
565#define PD_INIT(_LOOP) \
566  const int n##_LOOP = details->pd_length[_LOOP]; \
567  const int p##_LOOP = details->pd_par[_LOOP]; \
568  pglobal const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
569  pglobal const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
570  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
571
572// Jump into the middle of the dispersity loop
573#define PD_OPEN(_LOOP,_OUTER) \
574  while (i##_LOOP < n##_LOOP) { \
575    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
576    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
577
578// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
579#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
580#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
581
582// Close out the loop
583#define PD_CLOSE(_LOOP) \
584    if (step >= pd_stop) break; \
585    ++i##_LOOP; \
586  } \
587  i##_LOOP = 0;
588
589// ====== construct the loops =======
590
591// Pointers to the start of the dispersity and weight vectors, if needed.
592#if MAX_PD>0
593  pglobal const double *pd_value = values + NUM_VALUES;
594  pglobal const double *pd_weight = pd_value + details->num_weights;
595#endif
596
597// The variable "step" is the current position in the dispersity loop.
598// It will be incremented each time a new point in the mesh is accumulated,
599// and used to test whether we have reached pd_stop.
600int step = pd_start;
601
602// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
603
604// define looping variables
605#if MAX_PD>4
606  PD_INIT(4)
607#endif
608#if MAX_PD>3
609  PD_INIT(3)
610#endif
611#if MAX_PD>2
612  PD_INIT(2)
613#endif
614#if MAX_PD>1
615  PD_INIT(1)
616#endif
617#if MAX_PD>0
618  PD_INIT(0)
619#endif
620
621// open nested loops
622PD_OUTERMOST_WEIGHT(MAX_PD)
623#if MAX_PD>4
624  PD_OPEN(4,5)
625#endif
626#if MAX_PD>3
627  PD_OPEN(3,4)
628#endif
629#if MAX_PD>2
630  PD_OPEN(2,3)
631#endif
632#if MAX_PD>1
633  PD_OPEN(1,2)
634#endif
635#if MAX_PD>0
636  PD_OPEN(0,1)
637#endif
638
639//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
640
641  // ====== loop body =======
642  #ifdef INVALID
643  if (!INVALID(local_values.table))
644  #endif
645  {
646     APPLY_PROJECTION();
647
648    // Accumulate I(q)
649    // Note: weight==0 must always be excluded
650    if (weight > cutoff) {
651      pd_norm += weight * CALL_VOLUME(local_values.table);
652      BUILD_ROTATION();
653
654#if !defined(USE_GPU)
655      // DLL needs to explicitly loop over the q values.
656      #ifdef USE_OPENMP
657      #pragma omp parallel for
658      #endif
659      for (q_index=0; q_index<nq; q_index++)
660#endif // !USE_GPU
661      {
662
663        FETCH_Q();
664        APPLY_ROTATION();
665
666        // ======= COMPUTE SCATTERING ==========
667        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
668          // Compute the scattering from the magnetic cross sections.
669          double scattering = 0.0;
670          const double qsq = qx*qx + qy*qy;
671          if (qsq > 1.e-16) {
672            // TODO: what is the magnetic scattering at q=0
673            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
674            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
675
676            // loop over uu, ud real, du real, dd, ud imag, du imag
677            for (unsigned int xs=0; xs<6; xs++) {
678              const double xs_weight = xs_weights[xs];
679              if (xs_weight > 1.e-8) {
680                // Since the cross section weight is significant, set the slds
681                // to the effective slds for this cross section, call the
682                // kernel, and add according to weight.
683                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
684                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
685                  const int32_t sld_index = slds[sk];
686                  const double mx = values[mag_index];
687                  const double my = values[mag_index+1];
688                  const double mz = values[mag_index+2];
689                  local_values.vector[sld_index] =
690                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
691//if (q_index==0) printf("%d: (qx,qy)=(%g,%g) xs=%d sld%d=%g p=(%g,%g) m=(%g,%g,%g)\n",
692//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz);
693                }
694                scattering += xs_weight * CALL_KERNEL();
695              }
696            }
697          }
698        #else  // !MAGNETIC
699          const double scattering = CALL_KERNEL();
700        #endif // !MAGNETIC
701//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
702
703        #if defined(USE_GPU)
704          this_result += weight * scattering;
705        #else // !USE_GPU
706          result[q_index] += weight * scattering;
707        #endif // !USE_GPU
708      }
709    }
710  }
711
712// close nested loops
713++step;
714#if MAX_PD>0
715  PD_CLOSE(0)
716#endif
717#if MAX_PD>1
718  PD_CLOSE(1)
719#endif
720#if MAX_PD>2
721  PD_CLOSE(2)
722#endif
723#if MAX_PD>3
724  PD_CLOSE(3)
725#endif
726#if MAX_PD>4
727  PD_CLOSE(4)
728#endif
729
730// Remember the current result and the updated norm.
731#if defined(USE_GPU)
732  result[q_index] = this_result;
733  if (q_index == 0) result[nq] = pd_norm;
734//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
735#else // !USE_GPU
736  result[nq] = pd_norm;
737//printf("res: %g/%g\n", result[0], pd_norm);
738#endif // !USE_GPU
739
740// ** clear the macros in preparation for the next kernel **
741#undef PD_INIT
742#undef PD_OPEN
743#undef PD_CLOSE
744#undef FETCH_Q
745#undef APPLY_PROJECTION
746#undef BUILD_ROTATION
747#undef APPLY_ROTATION
748#undef CALL_KERNEL
749}
Note: See TracBrowser for help on using the repository browser.