source: sasmodels/sasmodels/kernel_iq.c @ d86f0fc

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since d86f0fc was d86f0fc, checked in by Paul Kienzle <pkienzle@…>, 6 years ago

lint reduction

  • Property mode set to 100644
File size: 25.4 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
31//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
32//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
33//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
34//  INVALID(table) : test if the current point is feesible to calculate.  This
35//      will be defined in the kernel definition file.
36//  PROJECTION : equirectangular=1, sinusoidal=2
37//      see explore/jitter.py for definitions.
38
39#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
40#define _PAR_BLOCK_
41
42typedef struct {
43#if MAX_PD > 0
44    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
45    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
46    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
47    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
48#endif // MAX_PD > 0
49    int32_t num_eval;           // total number of voxels in hypercube
50    int32_t num_weights;        // total length of the weights vector
51    int32_t num_active;         // number of non-trivial pd loops
52    int32_t theta_par;          // id of first orientation variable
53} ProblemDetails;
54
55// Intel HD 4000 needs private arrays to be a multiple of 4 long
56typedef struct {
57    PARAMETER_TABLE
58} ParameterTable;
59typedef union {
60    ParameterTable table;
61    double vector[4*((NUM_PARS+3)/4)];
62} ParameterBlock;
63#endif // _PAR_BLOCK_
64
65#if defined(MAGNETIC) && NUM_MAGNETIC > 0
66// ===== Helper functions for magnetism =====
67
68// Return value restricted between low and high
69static double clip(double value, double low, double high)
70{
71  return (value < low ? low : (value > high ? high : value));
72}
73
74// Compute spin cross sections given in_spin and out_spin
75// To convert spin cross sections to sld b:
76//     uu * (sld - m_sigma_x);
77//     dd * (sld + m_sigma_x);
78//     ud * (m_sigma_y - 1j*m_sigma_z);
79//     du * (m_sigma_y + 1j*m_sigma_z);
80// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
81static void set_spin_weights(double in_spin, double out_spin, double spins[4])
82{
83  in_spin = clip(in_spin, 0.0, 1.0);
84  out_spin = clip(out_spin, 0.0, 1.0);
85  spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
86  spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin));       // du real
87  spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin)));       // ud real
88  spins[3] = sqrt(sqrt(in_spin * out_spin));             // uu
89  spins[4] = spins[1]; // du imag
90  spins[5] = spins[2]; // ud imag
91}
92
93// Compute the magnetic sld
94static double mag_sld(
95  const unsigned int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag
96  const double qx, const double qy,
97  const double px, const double py,
98  const double sld,
99  const double mx, const double my, const double mz
100)
101{
102  if (xs < 4) {
103    const double perp = qy*mx - qx*my;
104    switch (xs) {
105      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
106      case 0: // uu => sld - D M_perpx
107          return sld - px*perp;
108      case 1: // ud real => -D M_perpy
109          return py*perp;
110      case 2: // du real => -D M_perpy
111          return py*perp;
112      case 3: // dd real => sld + D M_perpx
113          return sld + px*perp;
114    }
115  } else {
116    if (xs== 4) {
117      return -mz;  // ud imag => -D M_perpz
118    } else { // index == 5
119      return mz;   // du imag => D M_perpz
120    }
121  }
122}
123
124
125#endif
126
127// ===== Helper functions for orientation and jitter =====
128
129// To change the definition of the angles, run explore/angles.py, which
130// uses sympy to generate the equations.
131
132#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
133#define _QAC_SECTION
134
135typedef struct {
136    double R31, R32;
137} QACRotation;
138
139// Fill in the rotation matrix R from the view angles (theta, phi) and the
140// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
141// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
142static void
143qac_rotation(
144    QACRotation *rotation,
145    double theta, double phi,
146    double dtheta, double dphi)
147{
148    double sin_theta, cos_theta;
149    double sin_phi, cos_phi;
150
151    // reverse view matrix
152    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
153    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
154    const double V11 = cos_phi*cos_theta;
155    const double V12 = sin_phi*cos_theta;
156    const double V21 = -sin_phi;
157    const double V22 = cos_phi;
158    const double V31 = sin_theta*cos_phi;
159    const double V32 = sin_phi*sin_theta;
160
161    // reverse jitter matrix
162    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
163    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
164    const double J31 = sin_theta;
165    const double J32 = -sin_phi*cos_theta;
166    const double J33 = cos_phi*cos_theta;
167
168    // reverse matrix
169    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
170    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
171}
172
173// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
174// returning R*[qx,qy]' = [qa,qc]'
175static void
176qac_apply(
177    QACRotation *rotation,
178    double qx, double qy,
179    double *qa_out, double *qc_out)
180{
181    const double dqc = rotation->R31*qx + rotation->R32*qy;
182    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
183    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy);
184
185    *qa_out = dqa;
186    *qc_out = dqc;
187}
188#endif // _QAC_SECTION
189
190#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
191#define _QABC_SECTION
192
193typedef struct {
194    double R11, R12;
195    double R21, R22;
196    double R31, R32;
197} QABCRotation;
198
199// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
200// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
201// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
202static void
203qabc_rotation(
204    QABCRotation *rotation,
205    double theta, double phi, double psi,
206    double dtheta, double dphi, double dpsi)
207{
208    double sin_theta, cos_theta;
209    double sin_phi, cos_phi;
210    double sin_psi, cos_psi;
211
212    // reverse view matrix
213    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
214    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
215    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
216    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
217    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
218    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
219    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
220    const double V31 = sin_theta*cos_phi;
221    const double V32 = sin_phi*sin_theta;
222
223    // reverse jitter matrix
224    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
225    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
226    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
227    const double J11 = cos_psi*cos_theta;
228    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
229    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
230    const double J21 = -sin_psi*cos_theta;
231    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
232    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
233    const double J31 = sin_theta;
234    const double J32 = -sin_phi*cos_theta;
235    const double J33 = cos_phi*cos_theta;
236
237    // reverse matrix
238    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
239    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
240    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
241    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
242    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
243    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
244}
245
246// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
247// returning R*[qx,qy]' = [qa,qb,qc]'
248static void
249qabc_apply(
250    QABCRotation *rotation,
251    double qx, double qy,
252    double *qa_out, double *qb_out, double *qc_out)
253{
254    *qa_out = rotation->R11*qx + rotation->R12*qy;
255    *qb_out = rotation->R21*qx + rotation->R22*qy;
256    *qc_out = rotation->R31*qx + rotation->R32*qy;
257}
258
259#endif // _QABC_SECTION
260
261
262// ==================== KERNEL CODE ========================
263
264kernel
265void KERNEL_NAME(
266    int32_t nq,                 // number of q values
267    const int32_t pd_start,     // where we are in the dispersity loop
268    const int32_t pd_stop,      // where we are stopping in the dispersity loop
269    global const ProblemDetails *details,
270    global const double *values,
271    global const double *q, // nq q values, with padding to boundary
272    global double *result,  // nq+1 return values, again with padding
273    const double cutoff     // cutoff in the dispersity weight product
274    )
275{
276#ifdef USE_OPENCL
277  // who we are and what element we are working with
278  const int q_index = get_global_id(0);
279  if (q_index >= nq) return;
280#else
281  // Define q_index here so that debugging statements can be written to work
282  // for both OpenCL and DLL using:
283  //    if (q_index == 0) {printf(...);}
284  int q_index = 0;
285#endif
286
287  // ** Fill in the local values table **
288  // Storage for the current parameter values.
289  // These will be updated as we walk the dispersity mesh.
290  ParameterBlock local_values;
291  //   values[0] is scale
292  //   values[1] is background
293  #ifdef USE_OPENMP
294  #pragma omp parallel for
295  #endif
296  for (int i=0; i < NUM_PARS; i++) {
297    local_values.vector[i] = values[2+i];
298    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
299  }
300  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
301  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
302
303  // ** Precompute magnatism values **
304#if defined(MAGNETIC) && NUM_MAGNETIC>0
305  // Location of the sld parameters in the parameter vector.
306  // These parameters are updated with the effective sld due to magnetism.
307  const int32_t slds[] = { MAGNETIC_PARS };
308
309  // Interpret polarization cross section.
310  //     up_frac_i = values[NUM_PARS+2];
311  //     up_frac_f = values[NUM_PARS+3];
312  //     up_angle = values[NUM_PARS+4];
313  // TODO: could precompute more magnetism parameters before calling the kernel.
314  double spins[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
315  double cos_mspin, sin_mspin;
316  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], spins);
317  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
318#endif // MAGNETIC
319
320  // ** Fill in the initial results **
321  // If pd_start is zero that means that we are starting a new calculation,
322  // and must initialize the result to zero.  Otherwise, we are restarting
323  // the calculation from somewhere in the middle of the dispersity mesh,
324  // and we update the value rather than reset it. Similarly for the
325  // normalization factor, which is stored as the final value in the
326  // results vector (one past the number of q values).
327  //
328  // The code differs slightly between opencl and dll since opencl is only
329  // seeing one q value (stored in the variable "this_result") while the dll
330  // version must loop over all q.
331  #ifdef USE_OPENCL
332    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
333    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
334  #else // !USE_OPENCL
335    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
336    if (pd_start == 0) {
337      #ifdef USE_OPENMP
338      #pragma omp parallel for
339      #endif
340      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
341    }
342    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
343#endif // !USE_OPENCL
344
345
346// ====== macros to set up the parts of the loop =======
347/*
348Based on the level of the loop, uses C preprocessor magic to construct
349level-specific looping variables, including these from loop level 3:
350
351  int n3 : length of loop for mesh level 3
352  int i3 : current position in the loop for level 3, which is calculated
353       from a combination of pd_start, pd_stride[3] and pd_length[3].
354  int p3 : is the index into the parameter table for mesh level 3
355  double v3[] : pointer into dispersity array to values for loop 3
356  double w3[] : pointer into dispersity array to weights for loop 3
357  double weight3 : the product of weights from levels 3 and up, computed
358       as weight5*weight4*w3[i3].  Note that we need an outermost
359       value weight5 set to 1.0 for this to work properly.
360
361After expansion, the loop struction will look like the following:
362
363  // --- PD_INIT(4) ---
364  const int n4 = pd_length[4];
365  const int p4 = pd_par[4];
366  global const double *v4 = pd_value + pd_offset[4];
367  global const double *w4 = pd_weight + pd_offset[4];
368  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
369
370  // --- PD_INIT(3) ---
371  const int n3 = pd_length[3];
372  ...
373  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
374
375  PD_INIT(2)
376  PD_INIT(1)
377  PD_INIT(0)
378
379  // --- PD_OUTERMOST_WEIGHT(5) ---
380  const double weight5 = 1.0;
381
382  // --- PD_OPEN(4,5) ---
383  while (i4 < n4) {
384    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
385    const double weight4 = w4[i4] * weight5;
386
387    // from PD_OPEN(3,4)
388    while (i3 < n3) {
389      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
390      const double weight3 = w3[i3] * weight4;
391
392      PD_OPEN(3,2)
393      PD_OPEN(2,1)
394      PD_OPEN(0,1)
395
396      // ... main loop body ...
397      APPLY_PROJECTION    // convert jitter values to spherical coords
398      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
399      for each q
400          FETCH_Q         // set qx,qy from the q input vector
401          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
402          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
403
404      ++step;  // increment counter representing position in dispersity mesh
405
406      PD_CLOSE(0)
407      PD_CLOSE(1)
408      PD_CLOSE(2)
409
410      // --- PD_CLOSE(3) ---
411      if (step >= pd_stop) break;
412      ++i3;
413    }
414    i3 = 0; // reset loop counter for next round through the loop
415
416    // --- PD_CLOSE(4) ---
417    if (step >= pd_stop) break;
418    ++i4;
419  }
420  i4 = 0; // reset loop counter even though no more rounds through the loop
421
422*/
423
424
425// ** prepare inner loops **
426
427// Depending on the shape type (radial, axial, triaxial), the variables
428// and calling parameters in the loop body will be slightly different.
429// Macros capture the differences in one spot so the rest of the code
430// is easier to read. The code below both declares variables for the
431// inner loop and defines the macros that use them.
432
433#if defined(CALL_IQ)
434  // unoriented 1D
435  double qk;
436  #define FETCH_Q() do { qk = q[q_index]; } while (0)
437  #define BUILD_ROTATION() do {} while(0)
438  #define APPLY_ROTATION() do {} while(0)
439  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
440
441#elif defined(CALL_IQ_A)
442  // unoriented 2D
443  double qx, qy;
444  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
445  #define BUILD_ROTATION() do {} while(0)
446  #define APPLY_ROTATION() do {} while(0)
447  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
448
449#elif defined(CALL_IQ_AC)
450  // oriented symmetric 2D
451  double qx, qy;
452  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
453  double qa, qc;
454  QACRotation rotation;
455  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
456  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
457  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
458  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
459
460#elif defined(CALL_IQ_ABC)
461  // oriented asymmetric 2D
462  double qx, qy;
463  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
464  double qa, qb, qc;
465  QABCRotation rotation;
466  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
467  // psi and dpsi are only for IQ_ABC, so they are processed here.
468  const double psi = values[details->theta_par+4];
469  local_values.table.psi = 0.;
470  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
471  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
472  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
473#elif defined(CALL_IQ_XY)
474  // direct call to qx,qy calculator
475  double qx, qy;
476  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
477  #define BUILD_ROTATION() do {} while(0)
478  #define APPLY_ROTATION() do {} while(0)
479  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
480#endif
481
482// Define APPLY_PROJECTION depending on model symmetries. We do this outside
483// the previous if block so that we don't need to repeat the identical
484// logic in the IQ_AC and IQ_ABC branches.  This will become more important
485// if we implement more projections, or more complicated projections.
486#if defined(CALL_IQ) || defined(CALL_IQ_A)  // no orientation
487  #define APPLY_PROJECTION() const double weight=weight0
488#elif defined(CALL_IQ_XY) // pass orientation to the model
489  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
490  // Need to plug the values for the orientation angles back into parameter
491  // table in case they were overridden by the orientation offset.  This
492  // means that orientation dispersity will not work for these models, but
493  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
494  // the user model wants full control of orientation/magnetism.
495  #if defined(HAVE_PSI)
496    const double theta = values[details->theta_par+2];
497    const double phi = values[details->theta_par+3];
498    const double psi = values[details->theta_par+4];
499    double weight;
500    #define APPLY_PROJECTION() do { \
501      local_values.table.theta = theta; \
502      local_values.table.phi = phi; \
503      local_values.table.psi = psi; \
504      weight=weight0; \
505    } while (0)
506  #elif defined(HAVE_THETA)
507    const double theta = values[details->theta_par+2];
508    const double phi = values[details->theta_par+3];
509    double weight;
510    #define APPLY_PROJECTION() do { \
511      local_values.table.theta = theta; \
512      local_values.table.phi = phi; \
513      weight=weight0; \
514    } while (0)
515  #else
516    #define APPLY_PROJECTION() const double weight=weight0
517  #endif
518#else // apply jitter and view before calling the model
519  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
520  const double theta = values[details->theta_par+2];
521  const double phi = values[details->theta_par+3];
522  // Make sure jitter angle defaults to zero if there is no jitter distribution
523  local_values.table.theta = 0.;
524  local_values.table.phi = 0.;
525  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
526  // dispersity values and copied to the local parameter table as
527  // we go through the mesh.
528  double dtheta, dphi, weight;
529  #if PROJECTION == 1 // equirectangular
530    #define APPLY_PROJECTION() do { \
531      dtheta = local_values.table.theta; \
532      dphi = local_values.table.phi; \
533      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
534    } while (0)
535  #elif PROJECTION == 2 // sinusoidal
536    #define APPLY_PROJECTION() do { \
537      dtheta = local_values.table.theta; \
538      dphi = local_values.table.phi; \
539      weight = weight0; \
540      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
541      else if (dphi != 0.0) weight = 0.; \
542      if (fabs(dphi) >= 180.) weight = 0.; \
543    } while (0)
544  #endif
545#endif // done defining APPLY_PROJECTION
546
547// ** define looping macros **
548
549// Define looping variables
550#define PD_INIT(_LOOP) \
551  const int n##_LOOP = details->pd_length[_LOOP]; \
552  const int p##_LOOP = details->pd_par[_LOOP]; \
553  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
554  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
555  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
556
557// Jump into the middle of the dispersity loop
558#define PD_OPEN(_LOOP,_OUTER) \
559  while (i##_LOOP < n##_LOOP) { \
560    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
561    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
562
563// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
564#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
565#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
566
567// Close out the loop
568#define PD_CLOSE(_LOOP) \
569    if (step >= pd_stop) break; \
570    ++i##_LOOP; \
571  } \
572  i##_LOOP = 0;
573
574// ====== construct the loops =======
575
576// Pointers to the start of the dispersity and weight vectors, if needed.
577#if MAX_PD>0
578  global const double *pd_value = values + NUM_VALUES;
579  global const double *pd_weight = pd_value + details->num_weights;
580#endif
581
582// The variable "step" is the current position in the dispersity loop.
583// It will be incremented each time a new point in the mesh is accumulated,
584// and used to test whether we have reached pd_stop.
585int step = pd_start;
586
587// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
588
589// define looping variables
590#if MAX_PD>4
591  PD_INIT(4)
592#endif
593#if MAX_PD>3
594  PD_INIT(3)
595#endif
596#if MAX_PD>2
597  PD_INIT(2)
598#endif
599#if MAX_PD>1
600  PD_INIT(1)
601#endif
602#if MAX_PD>0
603  PD_INIT(0)
604#endif
605
606// open nested loops
607PD_OUTERMOST_WEIGHT(MAX_PD)
608#if MAX_PD>4
609  PD_OPEN(4,5)
610#endif
611#if MAX_PD>3
612  PD_OPEN(3,4)
613#endif
614#if MAX_PD>2
615  PD_OPEN(2,3)
616#endif
617#if MAX_PD>1
618  PD_OPEN(1,2)
619#endif
620#if MAX_PD>0
621  PD_OPEN(0,1)
622#endif
623
624//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
625
626  // ====== loop body =======
627  #ifdef INVALID
628  if (!INVALID(local_values.table))
629  #endif
630  {
631     APPLY_PROJECTION();
632
633    // Accumulate I(q)
634    // Note: weight==0 must always be excluded
635    if (weight > cutoff) {
636      pd_norm += weight * CALL_VOLUME(local_values.table);
637      BUILD_ROTATION();
638
639#ifndef USE_OPENCL
640      // DLL needs to explicitly loop over the q values.
641      #ifdef USE_OPENMP
642      #pragma omp parallel for
643      #endif
644      for (q_index=0; q_index<nq; q_index++)
645#endif // !USE_OPENCL
646      {
647
648        FETCH_Q();
649        APPLY_ROTATION();
650
651        // ======= COMPUTE SCATTERING ==========
652        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
653          // Compute the scattering from the magnetic cross sections.
654          double scattering = 0.0;
655          const double qsq = qx*qx + qy*qy;
656          if (qsq > 1.e-16) {
657            // TODO: what is the magnetic scattering at q=0
658            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
659            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
660
661            // loop over uu, ud real, du real, dd, ud imag, du imag
662            for (unsigned int xs=0; xs<6; xs++) {
663              const double xs_weight = spins[xs];
664              if (xs_weight > 1.e-8) {
665                // Since the cross section weight is significant, set the slds
666                // to the effective slds for this cross section, call the
667                // kernel, and add according to weight.
668                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
669                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
670                  const int32_t sld_index = slds[sk];
671                  const double mx = values[mag_index];
672                  const double my = values[mag_index+1];
673                  const double mz = values[mag_index+2];
674                  local_values.vector[sld_index] =
675                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
676                }
677                scattering += xs_weight * CALL_KERNEL();
678              }
679            }
680          }
681        #else  // !MAGNETIC
682          const double scattering = CALL_KERNEL();
683        #endif // !MAGNETIC
684//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
685
686        #ifdef USE_OPENCL
687          this_result += weight * scattering;
688        #else // !USE_OPENCL
689          result[q_index] += weight * scattering;
690        #endif // !USE_OPENCL
691      }
692    }
693  }
694
695// close nested loops
696++step;
697#if MAX_PD>0
698  PD_CLOSE(0)
699#endif
700#if MAX_PD>1
701  PD_CLOSE(1)
702#endif
703#if MAX_PD>2
704  PD_CLOSE(2)
705#endif
706#if MAX_PD>3
707  PD_CLOSE(3)
708#endif
709#if MAX_PD>4
710  PD_CLOSE(4)
711#endif
712
713// Remember the current result and the updated norm.
714#ifdef USE_OPENCL
715  result[q_index] = this_result;
716  if (q_index == 0) result[nq] = pd_norm;
717//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
718#else // !USE_OPENCL
719  result[nq] = pd_norm;
720//printf("res: %g/%g\n", result[0], pd_norm);
721#endif // !USE_OPENCL
722
723// ** clear the macros in preparation for the next kernel **
724#undef PD_INIT
725#undef PD_OPEN
726#undef PD_CLOSE
727#undef FETCH_Q
728#undef APPLY_PROJECTION
729#undef BUILD_ROTATION
730#undef APPLY_ROTATION
731#undef CALL_KERNEL
732}
Note: See TracBrowser for help on using the repository browser.