source: sasmodels/sasmodels/kernel_iq.c @ 924a119

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 924a119 was 924a119, checked in by GitHub <noreply@…>, 6 years ago

Merge pull request #57 from SasView?/ticket-1043

use Iqac/Iqabc? for the new orientation interface but Iqxy for the old. Fixes #1043

  • Property mode set to 100644
File size: 25.3 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
31//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
32//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
33//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
34//  INVALID(table) : test if the current point is feesible to calculate.  This
35//      will be defined in the kernel definition file.
36//  PROJECTION : equirectangular=1, sinusoidal=2
37//      see explore/jitter.py for definitions.
38
39#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
40#define _PAR_BLOCK_
41
42typedef struct {
43#if MAX_PD > 0
44    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
45    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
46    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
47    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
48#endif // MAX_PD > 0
49    int32_t num_eval;           // total number of voxels in hypercube
50    int32_t num_weights;        // total length of the weights vector
51    int32_t num_active;         // number of non-trivial pd loops
52    int32_t theta_par;          // id of first orientation variable
53} ProblemDetails;
54
55// Intel HD 4000 needs private arrays to be a multiple of 4 long
56typedef struct {
57    PARAMETER_TABLE
58} ParameterTable;
59typedef union {
60    ParameterTable table;
61    double vector[4*((NUM_PARS+3)/4)];
62} ParameterBlock;
63#endif // _PAR_BLOCK_
64
65#if defined(MAGNETIC) && NUM_MAGNETIC > 0
66// ===== Helper functions for magnetism =====
67
68// Return value restricted between low and high
69static double clip(double value, double low, double high)
70{
71  return (value < low ? low : (value > high ? high : value));
72}
73
74// Compute spin cross sections given in_spin and out_spin
75// To convert spin cross sections to sld b:
76//     uu * (sld - m_sigma_x);
77//     dd * (sld + m_sigma_x);
78//     ud * (m_sigma_y - 1j*m_sigma_z);
79//     du * (m_sigma_y + 1j*m_sigma_z);
80// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
81static void set_spin_weights(double in_spin, double out_spin, double spins[4])
82{
83  in_spin = clip(in_spin, 0.0, 1.0);
84  out_spin = clip(out_spin, 0.0, 1.0);
85  spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
86  spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin));       // du real
87  spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin)));       // ud real
88  spins[3] = sqrt(sqrt(in_spin * out_spin));             // uu
89  spins[4] = spins[1]; // du imag
90  spins[5] = spins[2]; // ud imag
91}
92
93// Compute the magnetic sld
94static double mag_sld(
95  const int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag
96  const double qx, const double qy,
97  const double px, const double py,
98  const double sld,
99  const double mx, const double my, const double mz
100)
101{
102  if (xs < 4) {
103    const double perp = qy*mx - qx*my;
104    switch (xs) {
105      case 0: // uu => sld - D M_perpx
106          return sld - px*perp;
107      case 1: // ud real => -D M_perpy
108          return py*perp;
109      case 2: // du real => -D M_perpy
110          return py*perp;
111      case 3: // dd real => sld + D M_perpx
112          return sld + px*perp;
113    }
114  } else {
115    if (xs== 4) {
116      return -mz;  // ud imag => -D M_perpz
117    } else { // index == 5
118      return mz;   // du imag => D M_perpz
119    }
120  }
121}
122
123
124#endif
125
126// ===== Helper functions for orientation and jitter =====
127
128// To change the definition of the angles, run explore/angles.py, which
129// uses sympy to generate the equations.
130
131#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
132#define _QAC_SECTION
133
134typedef struct {
135    double R31, R32;
136} QACRotation;
137
138// Fill in the rotation matrix R from the view angles (theta, phi) and the
139// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
140// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
141static void
142qac_rotation(
143    QACRotation *rotation,
144    double theta, double phi,
145    double dtheta, double dphi)
146{
147    double sin_theta, cos_theta;
148    double sin_phi, cos_phi;
149
150    // reverse view matrix
151    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
152    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
153    const double V11 = cos_phi*cos_theta;
154    const double V12 = sin_phi*cos_theta;
155    const double V21 = -sin_phi;
156    const double V22 = cos_phi;
157    const double V31 = sin_theta*cos_phi;
158    const double V32 = sin_phi*sin_theta;
159
160    // reverse jitter matrix
161    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
162    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
163    const double J31 = sin_theta;
164    const double J32 = -sin_phi*cos_theta;
165    const double J33 = cos_phi*cos_theta;
166
167    // reverse matrix
168    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
169    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
170}
171
172// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
173// returning R*[qx,qy]' = [qa,qc]'
174static double
175qac_apply(
176    QACRotation *rotation,
177    double qx, double qy,
178    double *qa_out, double *qc_out)
179{
180    const double dqc = rotation->R31*qx + rotation->R32*qy;
181    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
182    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy);
183
184    *qa_out = dqa;
185    *qc_out = dqc;
186}
187#endif // _QAC_SECTION
188
189#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
190#define _QABC_SECTION
191
192typedef struct {
193    double R11, R12;
194    double R21, R22;
195    double R31, R32;
196} QABCRotation;
197
198// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
199// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
200// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
201static void
202qabc_rotation(
203    QABCRotation *rotation,
204    double theta, double phi, double psi,
205    double dtheta, double dphi, double dpsi)
206{
207    double sin_theta, cos_theta;
208    double sin_phi, cos_phi;
209    double sin_psi, cos_psi;
210
211    // reverse view matrix
212    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
213    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
214    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
215    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
216    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
217    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
218    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
219    const double V31 = sin_theta*cos_phi;
220    const double V32 = sin_phi*sin_theta;
221
222    // reverse jitter matrix
223    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
224    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
225    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
226    const double J11 = cos_psi*cos_theta;
227    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
228    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
229    const double J21 = -sin_psi*cos_theta;
230    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
231    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
232    const double J31 = sin_theta;
233    const double J32 = -sin_phi*cos_theta;
234    const double J33 = cos_phi*cos_theta;
235
236    // reverse matrix
237    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
238    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
239    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
240    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
241    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
242    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
243}
244
245// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
246// returning R*[qx,qy]' = [qa,qb,qc]'
247static double
248qabc_apply(
249    QABCRotation *rotation,
250    double qx, double qy,
251    double *qa_out, double *qb_out, double *qc_out)
252{
253    *qa_out = rotation->R11*qx + rotation->R12*qy;
254    *qb_out = rotation->R21*qx + rotation->R22*qy;
255    *qc_out = rotation->R31*qx + rotation->R32*qy;
256}
257
258#endif // _QABC_SECTION
259
260
261// ==================== KERNEL CODE ========================
262
263kernel
264void KERNEL_NAME(
265    int32_t nq,                 // number of q values
266    const int32_t pd_start,     // where we are in the dispersity loop
267    const int32_t pd_stop,      // where we are stopping in the dispersity loop
268    global const ProblemDetails *details,
269    global const double *values,
270    global const double *q, // nq q values, with padding to boundary
271    global double *result,  // nq+1 return values, again with padding
272    const double cutoff     // cutoff in the dispersity weight product
273    )
274{
275#ifdef USE_OPENCL
276  // who we are and what element we are working with
277  const int q_index = get_global_id(0);
278  if (q_index >= nq) return;
279#else
280  // Define q_index here so that debugging statements can be written to work
281  // for both OpenCL and DLL using:
282  //    if (q_index == 0) {printf(...);}
283  int q_index = 0;
284#endif
285
286  // ** Fill in the local values table **
287  // Storage for the current parameter values.
288  // These will be updated as we walk the dispersity mesh.
289  ParameterBlock local_values;
290  //   values[0] is scale
291  //   values[1] is background
292  #ifdef USE_OPENMP
293  #pragma omp parallel for
294  #endif
295  for (int i=0; i < NUM_PARS; i++) {
296    local_values.vector[i] = values[2+i];
297    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
298  }
299  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
300  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
301
302  // ** Precompute magnatism values **
303#if defined(MAGNETIC) && NUM_MAGNETIC>0
304  // Location of the sld parameters in the parameter vector.
305  // These parameters are updated with the effective sld due to magnetism.
306  const int32_t slds[] = { MAGNETIC_PARS };
307
308  // Interpret polarization cross section.
309  //     up_frac_i = values[NUM_PARS+2];
310  //     up_frac_f = values[NUM_PARS+3];
311  //     up_angle = values[NUM_PARS+4];
312  // TODO: could precompute more magnetism parameters before calling the kernel.
313  double spins[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
314  double cos_mspin, sin_mspin;
315  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], spins);
316  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
317#endif // MAGNETIC
318
319  // ** Fill in the initial results **
320  // If pd_start is zero that means that we are starting a new calculation,
321  // and must initialize the result to zero.  Otherwise, we are restarting
322  // the calculation from somewhere in the middle of the dispersity mesh,
323  // and we update the value rather than reset it. Similarly for the
324  // normalization factor, which is stored as the final value in the
325  // results vector (one past the number of q values).
326  //
327  // The code differs slightly between opencl and dll since opencl is only
328  // seeing one q value (stored in the variable "this_result") while the dll
329  // version must loop over all q.
330  #ifdef USE_OPENCL
331    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
332    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
333  #else // !USE_OPENCL
334    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
335    if (pd_start == 0) {
336      #ifdef USE_OPENMP
337      #pragma omp parallel for
338      #endif
339      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
340    }
341    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
342#endif // !USE_OPENCL
343
344
345// ====== macros to set up the parts of the loop =======
346/*
347Based on the level of the loop, uses C preprocessor magic to construct
348level-specific looping variables, including these from loop level 3:
349
350  int n3 : length of loop for mesh level 3
351  int i3 : current position in the loop for level 3, which is calculated
352       from a combination of pd_start, pd_stride[3] and pd_length[3].
353  int p3 : is the index into the parameter table for mesh level 3
354  double v3[] : pointer into dispersity array to values for loop 3
355  double w3[] : pointer into dispersity array to weights for loop 3
356  double weight3 : the product of weights from levels 3 and up, computed
357       as weight5*weight4*w3[i3].  Note that we need an outermost
358       value weight5 set to 1.0 for this to work properly.
359
360After expansion, the loop struction will look like the following:
361
362  // --- PD_INIT(4) ---
363  const int n4 = pd_length[4];
364  const int p4 = pd_par[4];
365  global const double *v4 = pd_value + pd_offset[4];
366  global const double *w4 = pd_weight + pd_offset[4];
367  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
368
369  // --- PD_INIT(3) ---
370  const int n3 = pd_length[3];
371  ...
372  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
373
374  PD_INIT(2)
375  PD_INIT(1)
376  PD_INIT(0)
377
378  // --- PD_OUTERMOST_WEIGHT(5) ---
379  const double weight5 = 1.0;
380
381  // --- PD_OPEN(4,5) ---
382  while (i4 < n4) {
383    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
384    const double weight4 = w4[i4] * weight5;
385
386    // from PD_OPEN(3,4)
387    while (i3 < n3) {
388      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
389      const double weight3 = w3[i3] * weight4;
390
391      PD_OPEN(3,2)
392      PD_OPEN(2,1)
393      PD_OPEN(0,1)
394
395      // ... main loop body ...
396      APPLY_PROJECTION    // convert jitter values to spherical coords
397      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
398      for each q
399          FETCH_Q         // set qx,qy from the q input vector
400          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
401          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
402
403      ++step;  // increment counter representing position in dispersity mesh
404
405      PD_CLOSE(0)
406      PD_CLOSE(1)
407      PD_CLOSE(2)
408
409      // --- PD_CLOSE(3) ---
410      if (step >= pd_stop) break;
411      ++i3;
412    }
413    i3 = 0; // reset loop counter for next round through the loop
414
415    // --- PD_CLOSE(4) ---
416    if (step >= pd_stop) break;
417    ++i4;
418  }
419  i4 = 0; // reset loop counter even though no more rounds through the loop
420
421*/
422
423
424// ** prepare inner loops **
425
426// Depending on the shape type (radial, axial, triaxial), the variables
427// and calling parameters in the loop body will be slightly different.
428// Macros capture the differences in one spot so the rest of the code
429// is easier to read. The code below both declares variables for the
430// inner loop and defines the macros that use them.
431
432#if defined(CALL_IQ)
433  // unoriented 1D
434  double qk;
435  #define FETCH_Q() do { qk = q[q_index]; } while (0)
436  #define BUILD_ROTATION() do {} while(0)
437  #define APPLY_ROTATION() do {} while(0)
438  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
439
440#elif defined(CALL_IQ_A)
441  // unoriented 2D
442  double qx, qy;
443  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
444  #define BUILD_ROTATION() do {} while(0)
445  #define APPLY_ROTATION() do {} while(0)
446  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
447
448#elif defined(CALL_IQ_AC)
449  // oriented symmetric 2D
450  double qx, qy;
451  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
452  double qa, qc;
453  QACRotation rotation;
454  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
455  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
456  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
457  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
458
459#elif defined(CALL_IQ_ABC)
460  // oriented asymmetric 2D
461  double qx, qy;
462  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
463  double qa, qb, qc;
464  QABCRotation rotation;
465  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
466  // psi and dpsi are only for IQ_ABC, so they are processed here.
467  const double psi = values[details->theta_par+4];
468  local_values.table.psi = 0.;
469  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
470  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
471  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
472#elif defined(CALL_IQ_XY)
473  // direct call to qx,qy calculator
474  double qx, qy;
475  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
476  #define BUILD_ROTATION() do {} while(0)
477  #define APPLY_ROTATION() do {} while(0)
478  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
479#endif
480
481// Define APPLY_PROJECTION depending on model symmetries. We do this outside
482// the previous if block so that we don't need to repeat the identical
483// logic in the IQ_AC and IQ_ABC branches.  This will become more important
484// if we implement more projections, or more complicated projections.
485#if defined(CALL_IQ) || defined(CALL_IQ_A)  // no orientation
486  #define APPLY_PROJECTION() const double weight=weight0
487#elif defined(CALL_IQ_XY) // pass orientation to the model
488  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
489  // Need to plug the values for the orientation angles back into parameter
490  // table in case they were overridden by the orientation offset.  This
491  // means that orientation dispersity will not work for these models, but
492  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
493  // the user model wants full control of orientation/magnetism.
494  #if defined(HAVE_PSI)
495    const double theta = values[details->theta_par+2];
496    const double phi = values[details->theta_par+3];
497    const double psi = values[details->theta_par+4];
498    double weight;
499    #define APPLY_PROJECTION() do { \
500      local_values.table.theta = theta; \
501      local_values.table.phi = phi; \
502      local_values.table.psi = psi; \
503      weight=weight0; \
504    } while (0)
505  #elif defined(HAVE_THETA)
506    const double theta = values[details->theta_par+2];
507    const double phi = values[details->theta_par+3];
508    double weight;
509    #define APPLY_PROJECTION() do { \
510      local_values.table.theta = theta; \
511      local_values.table.phi = phi; \
512      weight=weight0; \
513    } while (0)
514  #else
515    #define APPLY_PROJECTION() const double weight=weight0
516  #endif
517#else // apply jitter and view before calling the model
518  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
519  const double theta = values[details->theta_par+2];
520  const double phi = values[details->theta_par+3];
521  // Make sure jitter angle defaults to zero if there is no jitter distribution
522  local_values.table.theta = 0.;
523  local_values.table.phi = 0.;
524  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
525  // dispersity values and copied to the local parameter table as
526  // we go through the mesh.
527  double dtheta, dphi, weight;
528  #if PROJECTION == 1 // equirectangular
529    #define APPLY_PROJECTION() do { \
530      dtheta = local_values.table.theta; \
531      dphi = local_values.table.phi; \
532      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
533    } while (0)
534  #elif PROJECTION == 2 // sinusoidal
535    #define APPLY_PROJECTION() do { \
536      dtheta = local_values.table.theta; \
537      dphi = local_values.table.phi; \
538      weight = weight0; \
539      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
540      else if (dphi != 0.0) weight = 0.; \
541      if (fabs(dphi) >= 180.) weight = 0.; \
542    } while (0)
543  #endif
544#endif // done defining APPLY_PROJECTION
545
546// ** define looping macros **
547
548// Define looping variables
549#define PD_INIT(_LOOP) \
550  const int n##_LOOP = details->pd_length[_LOOP]; \
551  const int p##_LOOP = details->pd_par[_LOOP]; \
552  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
553  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
554  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
555
556// Jump into the middle of the dispersity loop
557#define PD_OPEN(_LOOP,_OUTER) \
558  while (i##_LOOP < n##_LOOP) { \
559    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
560    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
561
562// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
563#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
564#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
565
566// Close out the loop
567#define PD_CLOSE(_LOOP) \
568    if (step >= pd_stop) break; \
569    ++i##_LOOP; \
570  } \
571  i##_LOOP = 0;
572
573// ====== construct the loops =======
574
575// Pointers to the start of the dispersity and weight vectors, if needed.
576#if MAX_PD>0
577  global const double *pd_value = values + NUM_VALUES;
578  global const double *pd_weight = pd_value + details->num_weights;
579#endif
580
581// The variable "step" is the current position in the dispersity loop.
582// It will be incremented each time a new point in the mesh is accumulated,
583// and used to test whether we have reached pd_stop.
584int step = pd_start;
585
586// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
587
588// define looping variables
589#if MAX_PD>4
590  PD_INIT(4)
591#endif
592#if MAX_PD>3
593  PD_INIT(3)
594#endif
595#if MAX_PD>2
596  PD_INIT(2)
597#endif
598#if MAX_PD>1
599  PD_INIT(1)
600#endif
601#if MAX_PD>0
602  PD_INIT(0)
603#endif
604
605// open nested loops
606PD_OUTERMOST_WEIGHT(MAX_PD)
607#if MAX_PD>4
608  PD_OPEN(4,5)
609#endif
610#if MAX_PD>3
611  PD_OPEN(3,4)
612#endif
613#if MAX_PD>2
614  PD_OPEN(2,3)
615#endif
616#if MAX_PD>1
617  PD_OPEN(1,2)
618#endif
619#if MAX_PD>0
620  PD_OPEN(0,1)
621#endif
622
623//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
624
625  // ====== loop body =======
626  #ifdef INVALID
627  if (!INVALID(local_values.table))
628  #endif
629  {
630     APPLY_PROJECTION();
631
632    // Accumulate I(q)
633    // Note: weight==0 must always be excluded
634    if (weight > cutoff) {
635      pd_norm += weight * CALL_VOLUME(local_values.table);
636      BUILD_ROTATION();
637
638#ifndef USE_OPENCL
639      // DLL needs to explicitly loop over the q values.
640      #ifdef USE_OPENMP
641      #pragma omp parallel for
642      #endif
643      for (q_index=0; q_index<nq; q_index++)
644#endif // !USE_OPENCL
645      {
646
647        FETCH_Q();
648        APPLY_ROTATION();
649
650        // ======= COMPUTE SCATTERING ==========
651        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
652          // Compute the scattering from the magnetic cross sections.
653          double scattering = 0.0;
654          const double qsq = qx*qx + qy*qy;
655          if (qsq > 1.e-16) {
656            // TODO: what is the magnetic scattering at q=0
657            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
658            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
659
660            // loop over uu, ud real, du real, dd, ud imag, du imag
661            for (int xs=0; xs<6; xs++) {
662              const double xs_weight = spins[xs];
663              if (xs_weight > 1.e-8) {
664                // Since the cross section weight is significant, set the slds
665                // to the effective slds for this cross section, call the
666                // kernel, and add according to weight.
667                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
668                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
669                  const int32_t sld_index = slds[sk];
670                  const double mx = values[mag_index];
671                  const double my = values[mag_index+1];
672                  const double mz = values[mag_index+2];
673                  local_values.vector[sld_index] =
674                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
675                }
676                scattering += xs_weight * CALL_KERNEL();
677              }
678            }
679          }
680        #else  // !MAGNETIC
681          const double scattering = CALL_KERNEL();
682        #endif // !MAGNETIC
683//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
684
685        #ifdef USE_OPENCL
686          this_result += weight * scattering;
687        #else // !USE_OPENCL
688          result[q_index] += weight * scattering;
689        #endif // !USE_OPENCL
690      }
691    }
692  }
693
694// close nested loops
695++step;
696#if MAX_PD>0
697  PD_CLOSE(0)
698#endif
699#if MAX_PD>1
700  PD_CLOSE(1)
701#endif
702#if MAX_PD>2
703  PD_CLOSE(2)
704#endif
705#if MAX_PD>3
706  PD_CLOSE(3)
707#endif
708#if MAX_PD>4
709  PD_CLOSE(4)
710#endif
711
712// Remember the current result and the updated norm.
713#ifdef USE_OPENCL
714  result[q_index] = this_result;
715  if (q_index == 0) result[nq] = pd_norm;
716//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
717#else // !USE_OPENCL
718  result[nq] = pd_norm;
719//printf("res: %g/%g\n", result[0], pd_norm);
720#endif // !USE_OPENCL
721
722// ** clear the macros in preparation for the next kernel **
723#undef PD_INIT
724#undef PD_OPEN
725#undef PD_CLOSE
726#undef FETCH_Q
727#undef APPLY_PROJECTION
728#undef BUILD_ROTATION
729#undef APPLY_ROTATION
730#undef CALL_KERNEL
731}
Note: See TracBrowser for help on using the repository browser.