source: sasmodels/sasmodels/kernel_iq.c @ 47fb816

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 47fb816 was 47fb816, checked in by Paul Kienzle <pkienzle@…>, 6 years ago

Merge branch 'master' into cuda-test

  • Property mode set to 100644
File size: 26.4 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
31//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
32//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
33//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
34//  INVALID(table) : test if the current point is feesible to calculate.  This
35//      will be defined in the kernel definition file.
36//  PROJECTION : equirectangular=1, sinusoidal=2
37//      see explore/jitter.py for definitions.
38
39#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
40#define _PAR_BLOCK_
41
42typedef struct {
43#if MAX_PD > 0
44    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
45    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
46    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
47    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
48#endif // MAX_PD > 0
49    int32_t num_eval;           // total number of voxels in hypercube
50    int32_t num_weights;        // total length of the weights vector
51    int32_t num_active;         // number of non-trivial pd loops
52    int32_t theta_par;          // id of first orientation variable
53} ProblemDetails;
54
55// Intel HD 4000 needs private arrays to be a multiple of 4 long
56typedef struct {
57    PARAMETER_TABLE
58} ParameterTable;
59typedef union {
60    ParameterTable table;
61    double vector[4*((NUM_PARS+3)/4)];
62} ParameterBlock;
63#endif // _PAR_BLOCK_
64
65#if defined(MAGNETIC) && NUM_MAGNETIC > 0
66// ===== Helper functions for magnetism =====
67
68// Return value restricted between low and high
69__device__
70static double clip(double value, double low, double high)
71{
72  return (value < low ? low : (value > high ? high : value));
73}
74
75// Compute spin cross sections given in_spin and out_spin
76// To convert spin cross sections to sld b:
77//     uu * (sld - m_sigma_x);
78//     dd * (sld + m_sigma_x);
79//     ud * (m_sigma_y - 1j*m_sigma_z);
80//     du * (m_sigma_y + 1j*m_sigma_z);
81// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
82static void set_spin_weights(double in_spin, double out_spin, double weight[6])
83{
84  in_spin = clip(in_spin, 0.0, 1.0);
85  out_spin = clip(out_spin, 0.0, 1.0);
86  // Previous version of this function took the square root of the weights,
87  // under the assumption that
88  //
89  //     w*I(q, rho1, rho2, ...) = I(q, sqrt(w)*rho1, sqrt(w)*rho2, ...)
90  //
91  // However, since the weights are applied to the final intensity and
92  // are not interned inside the I(q) function, we want the full
93  // weight and not the square root.  Any function using
94  // set_spin_weights as part of calculating an amplitude will need to
95  // manually take that square root, but there is currently no such
96  // function.
97  weight[0] = (1.0-in_spin) * (1.0-out_spin); // dd
98  weight[1] = (1.0-in_spin) * out_spin;       // du
99  weight[2] = in_spin * (1.0-out_spin);       // ud
100  weight[3] = in_spin * out_spin;             // uu
101  weight[4] = weight[1]; // du.imag
102  weight[5] = weight[2]; // ud.imag
103}
104
105// Compute the magnetic sld
106__device__
107static double mag_sld(
108  const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag
109  const double qx, const double qy,
110  const double px, const double py,
111  const double sld,
112  const double mx, const double my, const double mz
113)
114{
115  if (xs < 4) {
116    const double perp = qy*mx - qx*my;
117    switch (xs) {
118      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
119      case 0: // uu => sld - D M_perpx
120          return sld - px*perp;
121      case 1: // ud.real => -D M_perpy
122          return py*perp;
123      case 2: // du.real => -D M_perpy
124          return py*perp;
125      case 3: // dd => sld + D M_perpx
126          return sld + px*perp;
127    }
128  } else {
129    if (xs== 4) {
130      return -mz;  // du.imag => +D M_perpz
131    } else { // index == 5
132      return +mz;  // ud.imag => -D M_perpz
133    }
134  }
135}
136
137
138#endif
139
140// ===== Helper functions for orientation and jitter =====
141
142// To change the definition of the angles, run explore/angles.py, which
143// uses sympy to generate the equations.
144
145#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
146#define _QAC_SECTION
147
148typedef struct {
149    double R31, R32;
150} QACRotation;
151
152// Fill in the rotation matrix R from the view angles (theta, phi) and the
153// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
154// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
155__device__
156static void
157qac_rotation(
158    QACRotation *rotation,
159    double theta, double phi,
160    double dtheta, double dphi)
161{
162    double sin_theta, cos_theta;
163    double sin_phi, cos_phi;
164
165    // reverse view matrix
166    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
167    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
168    const double V11 = cos_phi*cos_theta;
169    const double V12 = sin_phi*cos_theta;
170    const double V21 = -sin_phi;
171    const double V22 = cos_phi;
172    const double V31 = sin_theta*cos_phi;
173    const double V32 = sin_phi*sin_theta;
174
175    // reverse jitter matrix
176    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
177    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
178    const double J31 = sin_theta;
179    const double J32 = -sin_phi*cos_theta;
180    const double J33 = cos_phi*cos_theta;
181
182    // reverse matrix
183    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
184    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
185}
186
187// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
188// returning R*[qx,qy]' = [qa,qc]'
189__device__
190static void
191qac_apply(
192    QACRotation *rotation,
193    double qx, double qy,
194    double *qab_out, double *qc_out)
195{
196    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
197    const double dqc = rotation->R31*qx + rotation->R32*qy;
198    const double dqab_sq = -dqc*dqc + qx*qx + qy*qy;
199    //*qab_out = sqrt(fabs(dqab_sq));
200    *qab_out = dqab_sq > 0.0 ? sqrt(dqab_sq) : 0.0;
201    *qc_out = dqc;
202}
203#endif // _QAC_SECTION
204
205#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
206#define _QABC_SECTION
207
208typedef struct {
209    double R11, R12;
210    double R21, R22;
211    double R31, R32;
212} QABCRotation;
213
214// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
215// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
216// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
217__device__
218static void
219qabc_rotation(
220    QABCRotation *rotation,
221    double theta, double phi, double psi,
222    double dtheta, double dphi, double dpsi)
223{
224    double sin_theta, cos_theta;
225    double sin_phi, cos_phi;
226    double sin_psi, cos_psi;
227
228    // reverse view matrix
229    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
230    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
231    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
232    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
233    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
234    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
235    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
236    const double V31 = sin_theta*cos_phi;
237    const double V32 = sin_phi*sin_theta;
238
239    // reverse jitter matrix
240    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
241    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
242    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
243    const double J11 = cos_psi*cos_theta;
244    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
245    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
246    const double J21 = -sin_psi*cos_theta;
247    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
248    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
249    const double J31 = sin_theta;
250    const double J32 = -sin_phi*cos_theta;
251    const double J33 = cos_phi*cos_theta;
252
253    // reverse matrix
254    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
255    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
256    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
257    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
258    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
259    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
260}
261
262// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
263// returning R*[qx,qy]' = [qa,qb,qc]'
264__device__
265static void
266qabc_apply(
267    QABCRotation *rotation,
268    double qx, double qy,
269    double *qa_out, double *qb_out, double *qc_out)
270{
271    *qa_out = rotation->R11*qx + rotation->R12*qy;
272    *qb_out = rotation->R21*qx + rotation->R22*qy;
273    *qc_out = rotation->R31*qx + rotation->R32*qy;
274}
275
276#endif // _QABC_SECTION
277
278
279// ==================== KERNEL CODE ========================
280
281kernel
282void KERNEL_NAME(
283    int32_t nq,                 // number of q values
284    const int32_t pd_start,     // where we are in the dispersity loop
285    const int32_t pd_stop,      // where we are stopping in the dispersity loop
286    global_par const ProblemDetails *details,
287    global_par const double *values,
288    global_par const double *q, // nq q values, with padding to boundary
289    global_par double *result,  // nq+1 return values, again with padding
290    const double cutoff     // cutoff in the dispersity weight product
291    )
292{
293#if defined(USE_GPU)
294  // who we are and what element we are working with
295  #if defined(USE_OPENCL)
296  const int q_index = get_global_id(0);
297  #else // USE_CUDA
298  const int q_index = threadIdx.x + blockIdx.x * blockDim.x;
299  #endif
300  if (q_index >= nq) return;
301#else
302  // Define q_index here so that debugging statements can be written to work
303  // for both OpenCL and DLL using:
304  //    if (q_index == 0) {printf(...);}
305  int q_index = 0;
306#endif
307
308  // ** Fill in the local values table **
309  // Storage for the current parameter values.
310  // These will be updated as we walk the dispersity mesh.
311  ParameterBlock local_values;
312  //   values[0] is scale
313  //   values[1] is background
314  #ifdef USE_OPENMP
315  #pragma omp parallel for
316  #endif
317  for (int i=0; i < NUM_PARS; i++) {
318    local_values.vector[i] = values[2+i];
319    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
320  }
321  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
322  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
323
324  // ** Precompute magnatism values **
325#if defined(MAGNETIC) && NUM_MAGNETIC>0
326  // Location of the sld parameters in the parameter vector.
327  // These parameters are updated with the effective sld due to magnetism.
328  const int32_t slds[] = { MAGNETIC_PARS };
329
330  // Interpret polarization cross section.
331  //     up_frac_i = values[NUM_PARS+2];
332  //     up_frac_f = values[NUM_PARS+3];
333  //     up_angle = values[NUM_PARS+4];
334  // TODO: could precompute more magnetism parameters before calling the kernel.
335  double xs_weights[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
336  double cos_mspin, sin_mspin;
337  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], xs_weights);
338  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
339#endif // MAGNETIC
340
341  // ** Fill in the initial results **
342  // If pd_start is zero that means that we are starting a new calculation,
343  // and must initialize the result to zero.  Otherwise, we are restarting
344  // the calculation from somewhere in the middle of the dispersity mesh,
345  // and we update the value rather than reset it. Similarly for the
346  // normalization factor, which is stored as the final value in the
347  // results vector (one past the number of q values).
348  //
349  // The code differs slightly between opencl and dll since opencl is only
350  // seeing one q value (stored in the variable "this_result") while the dll
351  // version must loop over all q.
352  #if defined(USE_GPU)
353    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
354    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
355  #else // !USE_GPU
356    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
357    if (pd_start == 0) {
358      #ifdef USE_OPENMP
359      #pragma omp parallel for
360      #endif
361      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
362    }
363    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
364#endif // !USE_GPU
365
366
367// ====== macros to set up the parts of the loop =======
368/*
369Based on the level of the loop, uses C preprocessor magic to construct
370level-specific looping variables, including these from loop level 3:
371
372  int n3 : length of loop for mesh level 3
373  int i3 : current position in the loop for level 3, which is calculated
374       from a combination of pd_start, pd_stride[3] and pd_length[3].
375  int p3 : is the index into the parameter table for mesh level 3
376  double v3[] : pointer into dispersity array to values for loop 3
377  double w3[] : pointer into dispersity array to weights for loop 3
378  double weight3 : the product of weights from levels 3 and up, computed
379       as weight5*weight4*w3[i3].  Note that we need an outermost
380       value weight5 set to 1.0 for this to work properly.
381
382After expansion, the loop struction will look like the following:
383
384  // --- PD_INIT(4) ---
385  const int n4 = pd_length[4];
386  const int p4 = pd_par[4];
387  global_var const double *v4 = pd_value + pd_offset[4];
388  global_var const double *w4 = pd_weight + pd_offset[4];
389  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
390
391  // --- PD_INIT(3) ---
392  const int n3 = pd_length[3];
393  ...
394  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
395
396  PD_INIT(2)
397  PD_INIT(1)
398  PD_INIT(0)
399
400  // --- PD_OUTERMOST_WEIGHT(5) ---
401  const double weight5 = 1.0;
402
403  // --- PD_OPEN(4,5) ---
404  while (i4 < n4) {
405    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
406    const double weight4 = w4[i4] * weight5;
407
408    // from PD_OPEN(3,4)
409    while (i3 < n3) {
410      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
411      const double weight3 = w3[i3] * weight4;
412
413      PD_OPEN(3,2)
414      PD_OPEN(2,1)
415      PD_OPEN(0,1)
416
417      // ... main loop body ...
418      APPLY_PROJECTION    // convert jitter values to spherical coords
419      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
420      for each q
421          FETCH_Q         // set qx,qy from the q input vector
422          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
423          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
424
425      ++step;  // increment counter representing position in dispersity mesh
426
427      PD_CLOSE(0)
428      PD_CLOSE(1)
429      PD_CLOSE(2)
430
431      // --- PD_CLOSE(3) ---
432      if (step >= pd_stop) break;
433      ++i3;
434    }
435    i3 = 0; // reset loop counter for next round through the loop
436
437    // --- PD_CLOSE(4) ---
438    if (step >= pd_stop) break;
439    ++i4;
440  }
441  i4 = 0; // reset loop counter even though no more rounds through the loop
442
443*/
444
445
446// ** prepare inner loops **
447
448// Depending on the shape type (radial, axial, triaxial), the variables
449// and calling parameters in the loop body will be slightly different.
450// Macros capture the differences in one spot so the rest of the code
451// is easier to read. The code below both declares variables for the
452// inner loop and defines the macros that use them.
453
454#if defined(CALL_IQ)
455  // unoriented 1D
456  double qk;
457  #define FETCH_Q() do { qk = q[q_index]; } while (0)
458  #define BUILD_ROTATION() do {} while(0)
459  #define APPLY_ROTATION() do {} while(0)
460  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
461
462#elif defined(CALL_IQ_A)
463  // unoriented 2D
464  double qx, qy;
465  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
466  #define BUILD_ROTATION() do {} while(0)
467  #define APPLY_ROTATION() do {} while(0)
468  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
469
470#elif defined(CALL_IQ_AC)
471  // oriented symmetric 2D
472  double qx, qy;
473  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
474  double qa, qc;
475  QACRotation rotation;
476  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
477  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
478  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
479  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
480
481#elif defined(CALL_IQ_ABC)
482  // oriented asymmetric 2D
483  double qx, qy;
484  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
485  double qa, qb, qc;
486  QABCRotation rotation;
487  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
488  // psi and dpsi are only for IQ_ABC, so they are processed here.
489  const double psi = values[details->theta_par+4];
490  local_values.table.psi = 0.;
491  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
492  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
493  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
494#elif defined(CALL_IQ_XY)
495  // direct call to qx,qy calculator
496  double qx, qy;
497  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
498  #define BUILD_ROTATION() do {} while(0)
499  #define APPLY_ROTATION() do {} while(0)
500  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
501#endif
502
503// Define APPLY_PROJECTION depending on model symmetries. We do this outside
504// the previous if block so that we don't need to repeat the identical
505// logic in the IQ_AC and IQ_ABC branches.  This will become more important
506// if we implement more projections, or more complicated projections.
507#if defined(CALL_IQ) || defined(CALL_IQ_A)  // no orientation
508  #define APPLY_PROJECTION() const double weight=weight0
509#elif defined(CALL_IQ_XY) // pass orientation to the model
510  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
511  // Need to plug the values for the orientation angles back into parameter
512  // table in case they were overridden by the orientation offset.  This
513  // means that orientation dispersity will not work for these models, but
514  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
515  // the user model wants full control of orientation/magnetism.
516  #if defined(HAVE_PSI)
517    const double theta = values[details->theta_par+2];
518    const double phi = values[details->theta_par+3];
519    const double psi = values[details->theta_par+4];
520    double weight;
521    #define APPLY_PROJECTION() do { \
522      local_values.table.theta = theta; \
523      local_values.table.phi = phi; \
524      local_values.table.psi = psi; \
525      weight=weight0; \
526    } while (0)
527  #elif defined(HAVE_THETA)
528    const double theta = values[details->theta_par+2];
529    const double phi = values[details->theta_par+3];
530    double weight;
531    #define APPLY_PROJECTION() do { \
532      local_values.table.theta = theta; \
533      local_values.table.phi = phi; \
534      weight=weight0; \
535    } while (0)
536  #else
537    #define APPLY_PROJECTION() const double weight=weight0
538  #endif
539#else // apply jitter and view before calling the model
540  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
541  const double theta = values[details->theta_par+2];
542  const double phi = values[details->theta_par+3];
543  // Make sure jitter angle defaults to zero if there is no jitter distribution
544  local_values.table.theta = 0.;
545  local_values.table.phi = 0.;
546  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
547  // dispersity values and copied to the local parameter table as
548  // we go through the mesh.
549  double dtheta, dphi, weight;
550  #if PROJECTION == 1 // equirectangular
551    #define APPLY_PROJECTION() do { \
552      dtheta = local_values.table.theta; \
553      dphi = local_values.table.phi; \
554      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
555    } while (0)
556  #elif PROJECTION == 2 // sinusoidal
557    #define APPLY_PROJECTION() do { \
558      dtheta = local_values.table.theta; \
559      dphi = local_values.table.phi; \
560      weight = weight0; \
561      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
562      else if (dphi != 0.0) weight = 0.; \
563      if (fabs(dphi) >= 180.) weight = 0.; \
564    } while (0)
565  #endif
566#endif // done defining APPLY_PROJECTION
567
568// ** define looping macros **
569
570// Define looping variables
571#define PD_INIT(_LOOP) \
572  const int n##_LOOP = details->pd_length[_LOOP]; \
573  const int p##_LOOP = details->pd_par[_LOOP]; \
574  global_var const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
575  global_var const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
576  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
577
578// Jump into the middle of the dispersity loop
579#define PD_OPEN(_LOOP,_OUTER) \
580  while (i##_LOOP < n##_LOOP) { \
581    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
582    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
583
584// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
585#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
586#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
587
588// Close out the loop
589#define PD_CLOSE(_LOOP) \
590    if (step >= pd_stop) break; \
591    ++i##_LOOP; \
592  } \
593  i##_LOOP = 0;
594
595// ====== construct the loops =======
596
597// Pointers to the start of the dispersity and weight vectors, if needed.
598#if MAX_PD>0
599  global_var const double *pd_value = values + NUM_VALUES;
600  global_var const double *pd_weight = pd_value + details->num_weights;
601#endif
602
603// The variable "step" is the current position in the dispersity loop.
604// It will be incremented each time a new point in the mesh is accumulated,
605// and used to test whether we have reached pd_stop.
606int step = pd_start;
607
608// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
609
610// define looping variables
611#if MAX_PD>4
612  PD_INIT(4)
613#endif
614#if MAX_PD>3
615  PD_INIT(3)
616#endif
617#if MAX_PD>2
618  PD_INIT(2)
619#endif
620#if MAX_PD>1
621  PD_INIT(1)
622#endif
623#if MAX_PD>0
624  PD_INIT(0)
625#endif
626
627// open nested loops
628PD_OUTERMOST_WEIGHT(MAX_PD)
629#if MAX_PD>4
630  PD_OPEN(4,5)
631#endif
632#if MAX_PD>3
633  PD_OPEN(3,4)
634#endif
635#if MAX_PD>2
636  PD_OPEN(2,3)
637#endif
638#if MAX_PD>1
639  PD_OPEN(1,2)
640#endif
641#if MAX_PD>0
642  PD_OPEN(0,1)
643#endif
644
645//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
646
647  // ====== loop body =======
648  #ifdef INVALID
649  if (!INVALID(local_values.table))
650  #endif
651  {
652     APPLY_PROJECTION();
653
654    // Accumulate I(q)
655    // Note: weight==0 must always be excluded
656    if (weight > cutoff) {
657      pd_norm += weight * CALL_VOLUME(local_values.table);
658      BUILD_ROTATION();
659
660#if !defined(USE_GPU)
661      // DLL needs to explicitly loop over the q values.
662      #ifdef USE_OPENMP
663      #pragma omp parallel for
664      #endif
665      for (q_index=0; q_index<nq; q_index++)
666#endif // !USE_GPU
667      {
668
669        FETCH_Q();
670        APPLY_ROTATION();
671
672        // ======= COMPUTE SCATTERING ==========
673        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
674          // Compute the scattering from the magnetic cross sections.
675          double scattering = 0.0;
676          const double qsq = qx*qx + qy*qy;
677          if (qsq > 1.e-16) {
678            // TODO: what is the magnetic scattering at q=0
679            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
680            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
681
682            // loop over uu, ud real, du real, dd, ud imag, du imag
683            for (unsigned int xs=0; xs<6; xs++) {
684              const double xs_weight = xs_weights[xs];
685              if (xs_weight > 1.e-8) {
686                // Since the cross section weight is significant, set the slds
687                // to the effective slds for this cross section, call the
688                // kernel, and add according to weight.
689                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
690                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
691                  const int32_t sld_index = slds[sk];
692                  const double mx = values[mag_index];
693                  const double my = values[mag_index+1];
694                  const double mz = values[mag_index+2];
695                  local_values.vector[sld_index] =
696                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
697//if (q_index==0) printf("%d: (qx,qy)=(%g,%g) xs=%d sld%d=%g p=(%g,%g) m=(%g,%g,%g)\n",
698//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz);
699                }
700                scattering += xs_weight * CALL_KERNEL();
701              }
702            }
703          }
704        #else  // !MAGNETIC
705          const double scattering = CALL_KERNEL();
706        #endif // !MAGNETIC
707//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
708
709        #if defined(USE_GPU)
710          this_result += weight * scattering;
711        #else // !USE_GPU
712          result[q_index] += weight * scattering;
713        #endif // !USE_GPU
714      }
715    }
716  }
717
718// close nested loops
719++step;
720#if MAX_PD>0
721  PD_CLOSE(0)
722#endif
723#if MAX_PD>1
724  PD_CLOSE(1)
725#endif
726#if MAX_PD>2
727  PD_CLOSE(2)
728#endif
729#if MAX_PD>3
730  PD_CLOSE(3)
731#endif
732#if MAX_PD>4
733  PD_CLOSE(4)
734#endif
735
736// Remember the current result and the updated norm.
737#if defined(USE_GPU)
738  result[q_index] = this_result;
739  if (q_index == 0) result[nq] = pd_norm;
740//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
741#else // !USE_GPU
742  result[nq] = pd_norm;
743//printf("res: %g/%g\n", result[0], pd_norm);
744#endif // !USE_GPU
745
746// ** clear the macros in preparation for the next kernel **
747#undef PD_INIT
748#undef PD_OPEN
749#undef PD_CLOSE
750#undef FETCH_Q
751#undef APPLY_PROJECTION
752#undef BUILD_ROTATION
753#undef APPLY_ROTATION
754#undef CALL_KERNEL
755}
Note: See TracBrowser for help on using the repository browser.