source: sasmodels/sasmodels/kernel_iq.c @ ef07e95

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since ef07e95 was 6aee3ab, checked in by Paul Kienzle <pkienzle@…>, 6 years ago

make sure that jitter angle defaults to zero if no polydispersity

  • Property mode set to 100644
File size: 23.6 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
31//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
32//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
33//  INVALID(table) : test if the current point is feesible to calculate.  This
34//      will be defined in the kernel definition file.
35//  PROJECTION : equirectangular=1, sinusoidal=2
36//      see explore/jitter.py for definitions.
37
38#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
39#define _PAR_BLOCK_
40
41typedef struct {
42#if MAX_PD > 0
43    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
44    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
45    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
46    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
47#endif // MAX_PD > 0
48    int32_t num_eval;           // total number of voxels in hypercube
49    int32_t num_weights;        // total length of the weights vector
50    int32_t num_active;         // number of non-trivial pd loops
51    int32_t theta_par;          // id of first orientation variable
52} ProblemDetails;
53
54// Intel HD 4000 needs private arrays to be a multiple of 4 long
55typedef struct {
56    PARAMETER_TABLE
57} ParameterTable;
58typedef union {
59    ParameterTable table;
60    double vector[4*((NUM_PARS+3)/4)];
61} ParameterBlock;
62#endif // _PAR_BLOCK_
63
64#if defined(MAGNETIC) && NUM_MAGNETIC > 0
65// ===== Helper functions for magnetism =====
66
67// Return value restricted between low and high
68static double clip(double value, double low, double high)
69{
70  return (value < low ? low : (value > high ? high : value));
71}
72
73// Compute spin cross sections given in_spin and out_spin
74// To convert spin cross sections to sld b:
75//     uu * (sld - m_sigma_x);
76//     dd * (sld + m_sigma_x);
77//     ud * (m_sigma_y - 1j*m_sigma_z);
78//     du * (m_sigma_y + 1j*m_sigma_z);
79// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
80static void set_spin_weights(double in_spin, double out_spin, double spins[4])
81{
82  in_spin = clip(in_spin, 0.0, 1.0);
83  out_spin = clip(out_spin, 0.0, 1.0);
84  spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
85  spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin));       // du real
86  spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin)));       // ud real
87  spins[3] = sqrt(sqrt(in_spin * out_spin));             // uu
88  spins[4] = spins[1]; // du imag
89  spins[5] = spins[2]; // ud imag
90}
91
92// Compute the magnetic sld
93static double mag_sld(
94  const int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag
95  const double qx, const double qy,
96  const double px, const double py,
97  const double sld,
98  const double mx, const double my, const double mz
99)
100{
101  if (xs < 4) {
102    const double perp = qy*mx - qx*my;
103    switch (xs) {
104      case 0: // uu => sld - D M_perpx
105          return sld - px*perp;
106      case 1: // ud real => -D M_perpy
107          return py*perp;
108      case 2: // du real => -D M_perpy
109          return py*perp;
110      case 3: // dd real => sld + D M_perpx
111          return sld + px*perp;
112    }
113  } else {
114    if (xs== 4) {
115      return -mz;  // ud imag => -D M_perpz
116    } else { // index == 5
117      return mz;   // du imag => D M_perpz
118    }
119  }
120}
121
122
123#endif
124
125// ===== Helper functions for orientation and jitter =====
126
127// To change the definition of the angles, run explore/angles.py, which
128// uses sympy to generate the equations.
129
130#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
131#define _QAC_SECTION
132
133typedef struct {
134    double R31, R32;
135} QACRotation;
136
137// Fill in the rotation matrix R from the view angles (theta, phi) and the
138// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
139// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
140static void
141qac_rotation(
142    QACRotation *rotation,
143    double theta, double phi,
144    double dtheta, double dphi)
145{
146    double sin_theta, cos_theta;
147    double sin_phi, cos_phi;
148
149    // reverse view matrix
150    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
151    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
152    const double V11 = cos_phi*cos_theta;
153    const double V12 = sin_phi*cos_theta;
154    const double V21 = -sin_phi;
155    const double V22 = cos_phi;
156    const double V31 = sin_theta*cos_phi;
157    const double V32 = sin_phi*sin_theta;
158
159    // reverse jitter matrix
160    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
161    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
162    const double J31 = sin_theta;
163    const double J32 = -sin_phi*cos_theta;
164    const double J33 = cos_phi*cos_theta;
165
166    // reverse matrix
167    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
168    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
169}
170
171// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
172// returning R*[qx,qy]' = [qa,qc]'
173static double
174qac_apply(
175    QACRotation rotation,
176    double qx, double qy,
177    double *qa_out, double *qc_out)
178{
179    const double dqc = rotation.R31*qx + rotation.R32*qy;
180    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
181    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy);
182
183    *qa_out = dqa;
184    *qc_out = dqc;
185}
186#endif // _QAC_SECTION
187
188#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
189#define _QABC_SECTION
190
191typedef struct {
192    double R11, R12;
193    double R21, R22;
194    double R31, R32;
195} QABCRotation;
196
197// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
198// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
199// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
200static void
201qabc_rotation(
202    QABCRotation *rotation,
203    double theta, double phi, double psi,
204    double dtheta, double dphi, double dpsi)
205{
206    double sin_theta, cos_theta;
207    double sin_phi, cos_phi;
208    double sin_psi, cos_psi;
209
210    // reverse view matrix
211    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
212    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
213    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
214    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
215    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
216    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
217    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
218    const double V31 = sin_theta*cos_phi;
219    const double V32 = sin_phi*sin_theta;
220
221    // reverse jitter matrix
222    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
223    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
224    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
225    const double J11 = cos_psi*cos_theta;
226    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
227    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
228    const double J21 = -sin_psi*cos_theta;
229    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
230    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
231    const double J31 = sin_theta;
232    const double J32 = -sin_phi*cos_theta;
233    const double J33 = cos_phi*cos_theta;
234
235    // reverse matrix
236    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
237    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
238    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
239    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
240    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
241    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
242}
243
244// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
245// returning R*[qx,qy]' = [qa,qb,qc]'
246static double
247qabc_apply(
248    QABCRotation rotation,
249    double qx, double qy,
250    double *qa_out, double *qb_out, double *qc_out)
251{
252    *qa_out = rotation.R11*qx + rotation.R12*qy;
253    *qb_out = rotation.R21*qx + rotation.R22*qy;
254    *qc_out = rotation.R31*qx + rotation.R32*qy;
255}
256
257#endif // _QABC_SECTION
258
259
260// ==================== KERNEL CODE ========================
261
262kernel
263void KERNEL_NAME(
264    int32_t nq,                 // number of q values
265    const int32_t pd_start,     // where we are in the dispersity loop
266    const int32_t pd_stop,      // where we are stopping in the dispersity loop
267    global const ProblemDetails *details,
268    global const double *values,
269    global const double *q, // nq q values, with padding to boundary
270    global double *result,  // nq+1 return values, again with padding
271    const double cutoff     // cutoff in the dispersity weight product
272    )
273{
274#ifdef USE_OPENCL
275  // who we are and what element we are working with
276  const int q_index = get_global_id(0);
277  if (q_index >= nq) return;
278#else
279  // Define q_index here so that debugging statements can be written to work
280  // for both OpenCL and DLL using:
281  //    if (q_index == 0) {printf(...);}
282  int q_index = 0;
283#endif
284
285  // ** Fill in the local values table **
286  // Storage for the current parameter values.
287  // These will be updated as we walk the dispersity mesh.
288  ParameterBlock local_values;
289  //   values[0] is scale
290  //   values[1] is background
291  #ifdef USE_OPENMP
292  #pragma omp parallel for
293  #endif
294  for (int i=0; i < NUM_PARS; i++) {
295    local_values.vector[i] = values[2+i];
296    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
297  }
298  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
299  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
300
301  // ** Precompute magnatism values **
302#if defined(MAGNETIC) && NUM_MAGNETIC>0
303  // Location of the sld parameters in the parameter vector.
304  // These parameters are updated with the effective sld due to magnetism.
305  const int32_t slds[] = { MAGNETIC_PARS };
306
307  // Interpret polarization cross section.
308  //     up_frac_i = values[NUM_PARS+2];
309  //     up_frac_f = values[NUM_PARS+3];
310  //     up_angle = values[NUM_PARS+4];
311  // TODO: could precompute more magnetism parameters before calling the kernel.
312  double spins[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
313  double cos_mspin, sin_mspin;
314  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], spins);
315  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
316#endif // MAGNETIC
317
318  // ** Fill in the initial results **
319  // If pd_start is zero that means that we are starting a new calculation,
320  // and must initialize the result to zero.  Otherwise, we are restarting
321  // the calculation from somewhere in the middle of the dispersity mesh,
322  // and we update the value rather than reset it. Similarly for the
323  // normalization factor, which is stored as the final value in the
324  // results vector (one past the number of q values).
325  //
326  // The code differs slightly between opencl and dll since opencl is only
327  // seeing one q value (stored in the variable "this_result") while the dll
328  // version must loop over all q.
329  #ifdef USE_OPENCL
330    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
331    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
332  #else // !USE_OPENCL
333    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
334    if (pd_start == 0) {
335      #ifdef USE_OPENMP
336      #pragma omp parallel for
337      #endif
338      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
339    }
340    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
341#endif // !USE_OPENCL
342
343
344// ====== macros to set up the parts of the loop =======
345/*
346Based on the level of the loop, uses C preprocessor magic to construct
347level-specific looping variables, including these from loop level 3:
348
349  int n3 : length of loop for mesh level 3
350  int i3 : current position in the loop for level 3, which is calculated
351       from a combination of pd_start, pd_stride[3] and pd_length[3].
352  int p3 : is the index into the parameter table for mesh level 3
353  double v3[] : pointer into dispersity array to values for loop 3
354  double w3[] : pointer into dispersity array to weights for loop 3
355  double weight3 : the product of weights from levels 3 and up, computed
356       as weight5*weight4*w3[i3].  Note that we need an outermost
357       value weight5 set to 1.0 for this to work properly.
358
359After expansion, the loop struction will look like the following:
360
361  // --- PD_INIT(4) ---
362  const int n4 = pd_length[4];
363  const int p4 = pd_par[4];
364  global const double *v4 = pd_value + pd_offset[4];
365  global const double *w4 = pd_weight + pd_offset[4];
366  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
367
368  // --- PD_INIT(3) ---
369  const int n3 = pd_length[3];
370  ...
371  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
372
373  PD_INIT(2)
374  PD_INIT(1)
375  PD_INIT(0)
376
377  // --- PD_OUTERMOST_WEIGHT(5) ---
378  const double weight5 = 1.0;
379
380  // --- PD_OPEN(4,5) ---
381  while (i4 < n4) {
382    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
383    const double weight4 = w4[i4] * weight5;
384
385    // from PD_OPEN(3,4)
386    while (i3 < n3) {
387      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
388      const double weight3 = w3[i3] * weight4;
389
390      PD_OPEN(3,2)
391      PD_OPEN(2,1)
392      PD_OPEN(0,1)
393
394      // ... main loop body ...
395      APPLY_PROJECTION    // convert jitter values to spherical coords
396      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
397      for each q
398          FETCH_Q         // set qx,qy from the q input vector
399          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
400          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
401
402      ++step;  // increment counter representing position in dispersity mesh
403
404      PD_CLOSE(0)
405      PD_CLOSE(1)
406      PD_CLOSE(2)
407
408      // --- PD_CLOSE(3) ---
409      if (step >= pd_stop) break;
410      ++i3;
411    }
412    i3 = 0; // reset loop counter for next round through the loop
413
414    // --- PD_CLOSE(4) ---
415    if (step >= pd_stop) break;
416    ++i4;
417  }
418  i4 = 0; // reset loop counter even though no more rounds through the loop
419
420*/
421
422
423// ** prepare inner loops **
424
425// Depending on the shape type (radial, axial, triaxial), the variables
426// and calling parameters in the loop body will be slightly different.
427// Macros capture the differences in one spot so the rest of the code
428// is easier to read. The code below both declares variables for the
429// inner loop and defines the macros that use them.
430
431#if defined(CALL_IQ)
432  // unoriented 1D
433  double qk;
434  #define FETCH_Q() do { qk = q[q_index]; } while (0)
435  #define BUILD_ROTATION() do {} while(0)
436  #define APPLY_ROTATION() do {} while(0)
437  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
438
439#elif defined(CALL_IQ_A)
440  // unoriented 2D
441  double qx, qy;
442  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
443  #define BUILD_ROTATION() do {} while(0)
444  #define APPLY_ROTATION() do {} while(0)
445  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
446
447#elif defined(CALL_IQ_AC)
448  // oriented symmetric 2D
449  double qx, qy;
450  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
451  double qa, qc;
452  QACRotation rotation;
453  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
454  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
455  #define APPLY_ROTATION() qac_apply(rotation, qx, qy, &qa, &qc)
456  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
457
458#elif defined(CALL_IQ_ABC)
459  // oriented asymmetric 2D
460  double qx, qy;
461  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
462  double qa, qb, qc;
463  QABCRotation rotation;
464  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
465  // psi and dpsi are only for IQ_ABC, so they are processed here.
466  const double psi = values[details->theta_par+4];
467  local_values.table.psi = 0.;
468  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
469  #define APPLY_ROTATION() qabc_apply(rotation, qx, qy, &qa, &qb, &qc)
470  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
471#endif
472
473// Doing jitter projection code outside the previous if block so that we don't
474// need to repeat the identical logic in the IQ_AC and IQ_ABC branches.  This
475// will become more important if we implement more projections, or more
476// complicated projections.
477#if defined(CALL_IQ) || defined(CALL_IQ_A)
478  #define APPLY_PROJECTION() const double weight=weight0
479#else // !spherosymmetric projection
480  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
481  const double theta = values[details->theta_par+2];
482  const double phi = values[details->theta_par+3];
483  // Make sure jitter angle defaults to zero if there is no jitter distribution
484  local_values.table.theta = 0.;
485  local_values.table.phi = 0.;
486  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
487  // dispersity values and copied to the local parameter table as
488  // we go through the mesh.
489  double dtheta, dphi, weight;
490  #if PROJECTION == 1
491    #define APPLY_PROJECTION() do { \
492      dtheta = local_values.table.theta; \
493      dphi = local_values.table.phi; \
494      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
495    } while (0)
496  #elif PROJECTION == 2
497    #define APPLY_PROJECTION() do { \
498      dtheta = local_values.table.theta; \
499      dphi = local_values.table.phi; \
500      weight = weight0; \
501      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
502      else if (dphi != 0.0) weight = 0.; \
503      if (fabs(dphi) >= 180.) weight = 0.; \
504    } while (0)
505  #endif
506#endif // !spherosymmetric projection
507
508// ** define looping macros **
509
510// Define looping variables
511#define PD_INIT(_LOOP) \
512  const int n##_LOOP = details->pd_length[_LOOP]; \
513  const int p##_LOOP = details->pd_par[_LOOP]; \
514  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
515  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
516  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
517
518// Jump into the middle of the dispersity loop
519#define PD_OPEN(_LOOP,_OUTER) \
520  while (i##_LOOP < n##_LOOP) { \
521    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
522    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
523
524// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
525#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
526#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
527
528// Close out the loop
529#define PD_CLOSE(_LOOP) \
530    if (step >= pd_stop) break; \
531    ++i##_LOOP; \
532  } \
533  i##_LOOP = 0;
534
535// ====== construct the loops =======
536
537// Pointers to the start of the dispersity and weight vectors, if needed.
538#if MAX_PD>0
539  global const double *pd_value = values + NUM_VALUES;
540  global const double *pd_weight = pd_value + details->num_weights;
541#endif
542
543// The variable "step" is the current position in the dispersity loop.
544// It will be incremented each time a new point in the mesh is accumulated,
545// and used to test whether we have reached pd_stop.
546int step = pd_start;
547
548// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
549
550// define looping variables
551#if MAX_PD>4
552  PD_INIT(4)
553#endif
554#if MAX_PD>3
555  PD_INIT(3)
556#endif
557#if MAX_PD>2
558  PD_INIT(2)
559#endif
560#if MAX_PD>1
561  PD_INIT(1)
562#endif
563#if MAX_PD>0
564  PD_INIT(0)
565#endif
566
567// open nested loops
568PD_OUTERMOST_WEIGHT(MAX_PD)
569#if MAX_PD>4
570  PD_OPEN(4,5)
571#endif
572#if MAX_PD>3
573  PD_OPEN(3,4)
574#endif
575#if MAX_PD>2
576  PD_OPEN(2,3)
577#endif
578#if MAX_PD>1
579  PD_OPEN(1,2)
580#endif
581#if MAX_PD>0
582  PD_OPEN(0,1)
583#endif
584
585//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
586
587  // ====== loop body =======
588  #ifdef INVALID
589  if (!INVALID(local_values.table))
590  #endif
591  {
592     APPLY_PROJECTION();
593
594    // Accumulate I(q)
595    // Note: weight==0 must always be excluded
596    if (weight > cutoff) {
597      pd_norm += weight * CALL_VOLUME(local_values.table);
598      BUILD_ROTATION();
599
600#ifndef USE_OPENCL
601      // DLL needs to explicitly loop over the q values.
602      #ifdef USE_OPENMP
603      #pragma omp parallel for
604      #endif
605      for (q_index=0; q_index<nq; q_index++)
606#endif // !USE_OPENCL
607      {
608
609        FETCH_Q();
610        APPLY_ROTATION();
611
612        // ======= COMPUTE SCATTERING ==========
613        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
614          // Compute the scattering from the magnetic cross sections.
615          double scattering = 0.0;
616          const double qsq = qx*qx + qy*qy;
617          if (qsq > 1.e-16) {
618            // TODO: what is the magnetic scattering at q=0
619            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
620            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
621
622            // loop over uu, ud real, du real, dd, ud imag, du imag
623            for (int xs=0; xs<6; xs++) {
624              const double xs_weight = spins[xs];
625              if (xs_weight > 1.e-8) {
626                // Since the cross section weight is significant, set the slds
627                // to the effective slds for this cross section, call the
628                // kernel, and add according to weight.
629                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
630                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
631                  const int32_t sld_index = slds[sk];
632                  const double mx = values[mag_index];
633                  const double my = values[mag_index+1];
634                  const double mz = values[mag_index+2];
635                  local_values.vector[sld_index] =
636                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
637                }
638                scattering += xs_weight * CALL_KERNEL();
639              }
640            }
641          }
642        #else  // !MAGNETIC
643          const double scattering = CALL_KERNEL();
644        #endif // !MAGNETIC
645//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
646
647        #ifdef USE_OPENCL
648          this_result += weight * scattering;
649        #else // !USE_OPENCL
650          result[q_index] += weight * scattering;
651        #endif // !USE_OPENCL
652      }
653    }
654  }
655
656// close nested loops
657++step;
658#if MAX_PD>0
659  PD_CLOSE(0)
660#endif
661#if MAX_PD>1
662  PD_CLOSE(1)
663#endif
664#if MAX_PD>2
665  PD_CLOSE(2)
666#endif
667#if MAX_PD>3
668  PD_CLOSE(3)
669#endif
670#if MAX_PD>4
671  PD_CLOSE(4)
672#endif
673
674// Remember the current result and the updated norm.
675#ifdef USE_OPENCL
676  result[q_index] = this_result;
677  if (q_index == 0) result[nq] = pd_norm;
678//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
679#else // !USE_OPENCL
680  result[nq] = pd_norm;
681//printf("res: %g/%g\n", result[0], pd_norm);
682#endif // !USE_OPENCL
683
684// ** clear the macros in preparation for the next kernel **
685#undef PD_INIT
686#undef PD_OPEN
687#undef PD_CLOSE
688#undef FETCH_Q
689#undef APPLY_PROJECTION
690#undef BUILD_ROTATION
691#undef APPLY_ROTATION
692#undef CALL_KERNEL
693}
Note: See TracBrowser for help on using the repository browser.