source: sasmodels/sasmodels/kernel_iq.c @ 96153e4

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 96153e4 was dc6f601, checked in by Paul Kienzle <pkienzle@…>, 6 years ago

restore up_frac_i/f behaviour of sasview 3.1.2

  • Property mode set to 100644
File size: 25.8 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
31//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
32//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
33//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
34//  INVALID(table) : test if the current point is feesible to calculate.  This
35//      will be defined in the kernel definition file.
36//  PROJECTION : equirectangular=1, sinusoidal=2
37//      see explore/jitter.py for definitions.
38
39#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
40#define _PAR_BLOCK_
41
42typedef struct {
43#if MAX_PD > 0
44    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
45    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
46    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
47    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
48#endif // MAX_PD > 0
49    int32_t num_eval;           // total number of voxels in hypercube
50    int32_t num_weights;        // total length of the weights vector
51    int32_t num_active;         // number of non-trivial pd loops
52    int32_t theta_par;          // id of first orientation variable
53} ProblemDetails;
54
55// Intel HD 4000 needs private arrays to be a multiple of 4 long
56typedef struct {
57    PARAMETER_TABLE
58} ParameterTable;
59typedef union {
60    ParameterTable table;
61    double vector[4*((NUM_PARS+3)/4)];
62} ParameterBlock;
63#endif // _PAR_BLOCK_
64
65#if defined(MAGNETIC) && NUM_MAGNETIC > 0
66// ===== Helper functions for magnetism =====
67
68// Return value restricted between low and high
69static double clip(double value, double low, double high)
70{
71  return (value < low ? low : (value > high ? high : value));
72}
73
74// Compute spin cross sections given in_spin and out_spin
75// To convert spin cross sections to sld b:
76//     uu * (sld - m_sigma_x);
77//     dd * (sld + m_sigma_x);
78//     ud * (m_sigma_y - 1j*m_sigma_z);
79//     du * (m_sigma_y + 1j*m_sigma_z);
80// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
81static void set_spin_weights(double in_spin, double out_spin, double weight[6])
82{
83  in_spin = clip(in_spin, 0.0, 1.0);
84  out_spin = clip(out_spin, 0.0, 1.0);
85  // Note: sasview 3.1 scaled all slds by sqrt(weight) and assumed that
86  //     w*I(q, rho1, rho2, ...) = I(q, sqrt(w)*rho1, sqrt(w)*rho2, ...)
87  // which is likely to be the case for simple models.
88  weight[0] = sqrt((1.0-in_spin) * (1.0-out_spin)); // dd
89  weight[1] = sqrt((1.0-in_spin) * out_spin);       // du.real
90  weight[2] = sqrt(in_spin * (1.0-out_spin));       // ud.real
91  weight[3] = sqrt(in_spin * out_spin);             // uu
92  weight[4] = weight[1]; // du.imag
93  weight[5] = weight[2]; // ud.imag
94}
95
96// Compute the magnetic sld
97static double mag_sld(
98  const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag
99  const double qx, const double qy,
100  const double px, const double py,
101  const double sld,
102  const double mx, const double my, const double mz
103)
104{
105  if (xs < 4) {
106    const double perp = qy*mx - qx*my;
107    switch (xs) {
108      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
109      case 0: // uu => sld - D M_perpx
110          return sld - px*perp;
111      case 1: // ud.real => -D M_perpy
112          return py*perp;
113      case 2: // du.real => -D M_perpy
114          return py*perp;
115      case 3: // dd => sld + D M_perpx
116          return sld + px*perp;
117    }
118  } else {
119    if (xs== 4) {
120      return -mz;  // du.imag => +D M_perpz
121    } else { // index == 5
122      return +mz;  // ud.imag => -D M_perpz
123    }
124  }
125}
126
127
128#endif
129
130// ===== Helper functions for orientation and jitter =====
131
132// To change the definition of the angles, run explore/angles.py, which
133// uses sympy to generate the equations.
134
135#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
136#define _QAC_SECTION
137
138typedef struct {
139    double R31, R32;
140} QACRotation;
141
142// Fill in the rotation matrix R from the view angles (theta, phi) and the
143// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
144// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
145static void
146qac_rotation(
147    QACRotation *rotation,
148    double theta, double phi,
149    double dtheta, double dphi)
150{
151    double sin_theta, cos_theta;
152    double sin_phi, cos_phi;
153
154    // reverse view matrix
155    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
156    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
157    const double V11 = cos_phi*cos_theta;
158    const double V12 = sin_phi*cos_theta;
159    const double V21 = -sin_phi;
160    const double V22 = cos_phi;
161    const double V31 = sin_theta*cos_phi;
162    const double V32 = sin_phi*sin_theta;
163
164    // reverse jitter matrix
165    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
166    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
167    const double J31 = sin_theta;
168    const double J32 = -sin_phi*cos_theta;
169    const double J33 = cos_phi*cos_theta;
170
171    // reverse matrix
172    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
173    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
174}
175
176// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
177// returning R*[qx,qy]' = [qa,qc]'
178static void
179qac_apply(
180    QACRotation *rotation,
181    double qx, double qy,
182    double *qa_out, double *qc_out)
183{
184    const double dqc = rotation->R31*qx + rotation->R32*qy;
185    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
186    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy);
187
188    *qa_out = dqa;
189    *qc_out = dqc;
190}
191#endif // _QAC_SECTION
192
193#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
194#define _QABC_SECTION
195
196typedef struct {
197    double R11, R12;
198    double R21, R22;
199    double R31, R32;
200} QABCRotation;
201
202// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
203// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
204// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
205static void
206qabc_rotation(
207    QABCRotation *rotation,
208    double theta, double phi, double psi,
209    double dtheta, double dphi, double dpsi)
210{
211    double sin_theta, cos_theta;
212    double sin_phi, cos_phi;
213    double sin_psi, cos_psi;
214
215    // reverse view matrix
216    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
217    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
218    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
219    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
220    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
221    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
222    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
223    const double V31 = sin_theta*cos_phi;
224    const double V32 = sin_phi*sin_theta;
225
226    // reverse jitter matrix
227    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
228    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
229    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
230    const double J11 = cos_psi*cos_theta;
231    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
232    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
233    const double J21 = -sin_psi*cos_theta;
234    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
235    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
236    const double J31 = sin_theta;
237    const double J32 = -sin_phi*cos_theta;
238    const double J33 = cos_phi*cos_theta;
239
240    // reverse matrix
241    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
242    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
243    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
244    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
245    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
246    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
247}
248
249// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
250// returning R*[qx,qy]' = [qa,qb,qc]'
251static void
252qabc_apply(
253    QABCRotation *rotation,
254    double qx, double qy,
255    double *qa_out, double *qb_out, double *qc_out)
256{
257    *qa_out = rotation->R11*qx + rotation->R12*qy;
258    *qb_out = rotation->R21*qx + rotation->R22*qy;
259    *qc_out = rotation->R31*qx + rotation->R32*qy;
260}
261
262#endif // _QABC_SECTION
263
264
265// ==================== KERNEL CODE ========================
266
267kernel
268void KERNEL_NAME(
269    int32_t nq,                 // number of q values
270    const int32_t pd_start,     // where we are in the dispersity loop
271    const int32_t pd_stop,      // where we are stopping in the dispersity loop
272    global const ProblemDetails *details,
273    global const double *values,
274    global const double *q, // nq q values, with padding to boundary
275    global double *result,  // nq+1 return values, again with padding
276    const double cutoff     // cutoff in the dispersity weight product
277    )
278{
279#ifdef USE_OPENCL
280  // who we are and what element we are working with
281  const int q_index = get_global_id(0);
282  if (q_index >= nq) return;
283#else
284  // Define q_index here so that debugging statements can be written to work
285  // for both OpenCL and DLL using:
286  //    if (q_index == 0) {printf(...);}
287  int q_index = 0;
288#endif
289
290  // ** Fill in the local values table **
291  // Storage for the current parameter values.
292  // These will be updated as we walk the dispersity mesh.
293  ParameterBlock local_values;
294  //   values[0] is scale
295  //   values[1] is background
296  #ifdef USE_OPENMP
297  #pragma omp parallel for
298  #endif
299  for (int i=0; i < NUM_PARS; i++) {
300    local_values.vector[i] = values[2+i];
301    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
302  }
303  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
304  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
305
306  // ** Precompute magnatism values **
307#if defined(MAGNETIC) && NUM_MAGNETIC>0
308  // Location of the sld parameters in the parameter vector.
309  // These parameters are updated with the effective sld due to magnetism.
310  const int32_t slds[] = { MAGNETIC_PARS };
311
312  // Interpret polarization cross section.
313  //     up_frac_i = values[NUM_PARS+2];
314  //     up_frac_f = values[NUM_PARS+3];
315  //     up_angle = values[NUM_PARS+4];
316  // TODO: could precompute more magnetism parameters before calling the kernel.
317  double xs_weights[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
318  double cos_mspin, sin_mspin;
319  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], xs_weights);
320  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
321#endif // MAGNETIC
322
323  // ** Fill in the initial results **
324  // If pd_start is zero that means that we are starting a new calculation,
325  // and must initialize the result to zero.  Otherwise, we are restarting
326  // the calculation from somewhere in the middle of the dispersity mesh,
327  // and we update the value rather than reset it. Similarly for the
328  // normalization factor, which is stored as the final value in the
329  // results vector (one past the number of q values).
330  //
331  // The code differs slightly between opencl and dll since opencl is only
332  // seeing one q value (stored in the variable "this_result") while the dll
333  // version must loop over all q.
334  #ifdef USE_OPENCL
335    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
336    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
337  #else // !USE_OPENCL
338    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
339    if (pd_start == 0) {
340      #ifdef USE_OPENMP
341      #pragma omp parallel for
342      #endif
343      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
344    }
345    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
346#endif // !USE_OPENCL
347
348
349// ====== macros to set up the parts of the loop =======
350/*
351Based on the level of the loop, uses C preprocessor magic to construct
352level-specific looping variables, including these from loop level 3:
353
354  int n3 : length of loop for mesh level 3
355  int i3 : current position in the loop for level 3, which is calculated
356       from a combination of pd_start, pd_stride[3] and pd_length[3].
357  int p3 : is the index into the parameter table for mesh level 3
358  double v3[] : pointer into dispersity array to values for loop 3
359  double w3[] : pointer into dispersity array to weights for loop 3
360  double weight3 : the product of weights from levels 3 and up, computed
361       as weight5*weight4*w3[i3].  Note that we need an outermost
362       value weight5 set to 1.0 for this to work properly.
363
364After expansion, the loop struction will look like the following:
365
366  // --- PD_INIT(4) ---
367  const int n4 = pd_length[4];
368  const int p4 = pd_par[4];
369  global const double *v4 = pd_value + pd_offset[4];
370  global const double *w4 = pd_weight + pd_offset[4];
371  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
372
373  // --- PD_INIT(3) ---
374  const int n3 = pd_length[3];
375  ...
376  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
377
378  PD_INIT(2)
379  PD_INIT(1)
380  PD_INIT(0)
381
382  // --- PD_OUTERMOST_WEIGHT(5) ---
383  const double weight5 = 1.0;
384
385  // --- PD_OPEN(4,5) ---
386  while (i4 < n4) {
387    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
388    const double weight4 = w4[i4] * weight5;
389
390    // from PD_OPEN(3,4)
391    while (i3 < n3) {
392      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
393      const double weight3 = w3[i3] * weight4;
394
395      PD_OPEN(3,2)
396      PD_OPEN(2,1)
397      PD_OPEN(0,1)
398
399      // ... main loop body ...
400      APPLY_PROJECTION    // convert jitter values to spherical coords
401      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
402      for each q
403          FETCH_Q         // set qx,qy from the q input vector
404          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
405          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
406
407      ++step;  // increment counter representing position in dispersity mesh
408
409      PD_CLOSE(0)
410      PD_CLOSE(1)
411      PD_CLOSE(2)
412
413      // --- PD_CLOSE(3) ---
414      if (step >= pd_stop) break;
415      ++i3;
416    }
417    i3 = 0; // reset loop counter for next round through the loop
418
419    // --- PD_CLOSE(4) ---
420    if (step >= pd_stop) break;
421    ++i4;
422  }
423  i4 = 0; // reset loop counter even though no more rounds through the loop
424
425*/
426
427
428// ** prepare inner loops **
429
430// Depending on the shape type (radial, axial, triaxial), the variables
431// and calling parameters in the loop body will be slightly different.
432// Macros capture the differences in one spot so the rest of the code
433// is easier to read. The code below both declares variables for the
434// inner loop and defines the macros that use them.
435
436#if defined(CALL_IQ)
437  // unoriented 1D
438  double qk;
439  #define FETCH_Q() do { qk = q[q_index]; } while (0)
440  #define BUILD_ROTATION() do {} while(0)
441  #define APPLY_ROTATION() do {} while(0)
442  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
443
444#elif defined(CALL_IQ_A)
445  // unoriented 2D
446  double qx, qy;
447  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
448  #define BUILD_ROTATION() do {} while(0)
449  #define APPLY_ROTATION() do {} while(0)
450  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
451
452#elif defined(CALL_IQ_AC)
453  // oriented symmetric 2D
454  double qx, qy;
455  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
456  double qa, qc;
457  QACRotation rotation;
458  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
459  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
460  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
461  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
462
463#elif defined(CALL_IQ_ABC)
464  // oriented asymmetric 2D
465  double qx, qy;
466  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
467  double qa, qb, qc;
468  QABCRotation rotation;
469  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
470  // psi and dpsi are only for IQ_ABC, so they are processed here.
471  const double psi = values[details->theta_par+4];
472  local_values.table.psi = 0.;
473  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
474  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
475  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
476#elif defined(CALL_IQ_XY)
477  // direct call to qx,qy calculator
478  double qx, qy;
479  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
480  #define BUILD_ROTATION() do {} while(0)
481  #define APPLY_ROTATION() do {} while(0)
482  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
483#endif
484
485// Define APPLY_PROJECTION depending on model symmetries. We do this outside
486// the previous if block so that we don't need to repeat the identical
487// logic in the IQ_AC and IQ_ABC branches.  This will become more important
488// if we implement more projections, or more complicated projections.
489#if defined(CALL_IQ) || defined(CALL_IQ_A)  // no orientation
490  #define APPLY_PROJECTION() const double weight=weight0
491#elif defined(CALL_IQ_XY) // pass orientation to the model
492  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
493  // Need to plug the values for the orientation angles back into parameter
494  // table in case they were overridden by the orientation offset.  This
495  // means that orientation dispersity will not work for these models, but
496  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
497  // the user model wants full control of orientation/magnetism.
498  #if defined(HAVE_PSI)
499    const double theta = values[details->theta_par+2];
500    const double phi = values[details->theta_par+3];
501    const double psi = values[details->theta_par+4];
502    double weight;
503    #define APPLY_PROJECTION() do { \
504      local_values.table.theta = theta; \
505      local_values.table.phi = phi; \
506      local_values.table.psi = psi; \
507      weight=weight0; \
508    } while (0)
509  #elif defined(HAVE_THETA)
510    const double theta = values[details->theta_par+2];
511    const double phi = values[details->theta_par+3];
512    double weight;
513    #define APPLY_PROJECTION() do { \
514      local_values.table.theta = theta; \
515      local_values.table.phi = phi; \
516      weight=weight0; \
517    } while (0)
518  #else
519    #define APPLY_PROJECTION() const double weight=weight0
520  #endif
521#else // apply jitter and view before calling the model
522  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
523  const double theta = values[details->theta_par+2];
524  const double phi = values[details->theta_par+3];
525  // Make sure jitter angle defaults to zero if there is no jitter distribution
526  local_values.table.theta = 0.;
527  local_values.table.phi = 0.;
528  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
529  // dispersity values and copied to the local parameter table as
530  // we go through the mesh.
531  double dtheta, dphi, weight;
532  #if PROJECTION == 1 // equirectangular
533    #define APPLY_PROJECTION() do { \
534      dtheta = local_values.table.theta; \
535      dphi = local_values.table.phi; \
536      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
537    } while (0)
538  #elif PROJECTION == 2 // sinusoidal
539    #define APPLY_PROJECTION() do { \
540      dtheta = local_values.table.theta; \
541      dphi = local_values.table.phi; \
542      weight = weight0; \
543      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
544      else if (dphi != 0.0) weight = 0.; \
545      if (fabs(dphi) >= 180.) weight = 0.; \
546    } while (0)
547  #endif
548#endif // done defining APPLY_PROJECTION
549
550// ** define looping macros **
551
552// Define looping variables
553#define PD_INIT(_LOOP) \
554  const int n##_LOOP = details->pd_length[_LOOP]; \
555  const int p##_LOOP = details->pd_par[_LOOP]; \
556  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
557  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
558  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
559
560// Jump into the middle of the dispersity loop
561#define PD_OPEN(_LOOP,_OUTER) \
562  while (i##_LOOP < n##_LOOP) { \
563    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
564    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
565
566// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
567#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
568#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
569
570// Close out the loop
571#define PD_CLOSE(_LOOP) \
572    if (step >= pd_stop) break; \
573    ++i##_LOOP; \
574  } \
575  i##_LOOP = 0;
576
577// ====== construct the loops =======
578
579// Pointers to the start of the dispersity and weight vectors, if needed.
580#if MAX_PD>0
581  global const double *pd_value = values + NUM_VALUES;
582  global const double *pd_weight = pd_value + details->num_weights;
583#endif
584
585// The variable "step" is the current position in the dispersity loop.
586// It will be incremented each time a new point in the mesh is accumulated,
587// and used to test whether we have reached pd_stop.
588int step = pd_start;
589
590// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
591
592// define looping variables
593#if MAX_PD>4
594  PD_INIT(4)
595#endif
596#if MAX_PD>3
597  PD_INIT(3)
598#endif
599#if MAX_PD>2
600  PD_INIT(2)
601#endif
602#if MAX_PD>1
603  PD_INIT(1)
604#endif
605#if MAX_PD>0
606  PD_INIT(0)
607#endif
608
609// open nested loops
610PD_OUTERMOST_WEIGHT(MAX_PD)
611#if MAX_PD>4
612  PD_OPEN(4,5)
613#endif
614#if MAX_PD>3
615  PD_OPEN(3,4)
616#endif
617#if MAX_PD>2
618  PD_OPEN(2,3)
619#endif
620#if MAX_PD>1
621  PD_OPEN(1,2)
622#endif
623#if MAX_PD>0
624  PD_OPEN(0,1)
625#endif
626
627//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
628
629  // ====== loop body =======
630  #ifdef INVALID
631  if (!INVALID(local_values.table))
632  #endif
633  {
634     APPLY_PROJECTION();
635
636    // Accumulate I(q)
637    // Note: weight==0 must always be excluded
638    if (weight > cutoff) {
639      pd_norm += weight * CALL_VOLUME(local_values.table);
640      BUILD_ROTATION();
641
642#ifndef USE_OPENCL
643      // DLL needs to explicitly loop over the q values.
644      #ifdef USE_OPENMP
645      #pragma omp parallel for
646      #endif
647      for (q_index=0; q_index<nq; q_index++)
648#endif // !USE_OPENCL
649      {
650
651        FETCH_Q();
652        APPLY_ROTATION();
653
654        // ======= COMPUTE SCATTERING ==========
655        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
656          // Compute the scattering from the magnetic cross sections.
657          double scattering = 0.0;
658          const double qsq = qx*qx + qy*qy;
659          if (qsq > 1.e-16) {
660            // TODO: what is the magnetic scattering at q=0
661            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
662            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
663
664            // loop over uu, ud real, du real, dd, ud imag, du imag
665            for (unsigned int xs=0; xs<6; xs++) {
666              const double xs_weight = xs_weights[xs];
667              if (xs_weight > 1.e-8) {
668                // Since the cross section weight is significant, set the slds
669                // to the effective slds for this cross section, call the
670                // kernel, and add according to weight.
671                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
672                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
673                  const int32_t sld_index = slds[sk];
674                  const double mx = values[mag_index];
675                  const double my = values[mag_index+1];
676                  const double mz = values[mag_index+2];
677                  local_values.vector[sld_index] =
678                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
679//if (q_index==0) printf("%d: (qx,qy)=(%g,%g) xs=%d sld%d=%g p=(%g,%g) m=(%g,%g,%g)\n",
680//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz);
681                }
682                scattering += xs_weight * CALL_KERNEL();
683              }
684            }
685          }
686        #else  // !MAGNETIC
687          const double scattering = CALL_KERNEL();
688        #endif // !MAGNETIC
689//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
690
691        #ifdef USE_OPENCL
692          this_result += weight * scattering;
693        #else // !USE_OPENCL
694          result[q_index] += weight * scattering;
695        #endif // !USE_OPENCL
696      }
697    }
698  }
699
700// close nested loops
701++step;
702#if MAX_PD>0
703  PD_CLOSE(0)
704#endif
705#if MAX_PD>1
706  PD_CLOSE(1)
707#endif
708#if MAX_PD>2
709  PD_CLOSE(2)
710#endif
711#if MAX_PD>3
712  PD_CLOSE(3)
713#endif
714#if MAX_PD>4
715  PD_CLOSE(4)
716#endif
717
718// Remember the current result and the updated norm.
719#ifdef USE_OPENCL
720  result[q_index] = this_result;
721  if (q_index == 0) result[nq] = pd_norm;
722//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
723#else // !USE_OPENCL
724  result[nq] = pd_norm;
725//printf("res: %g/%g\n", result[0], pd_norm);
726#endif // !USE_OPENCL
727
728// ** clear the macros in preparation for the next kernel **
729#undef PD_INIT
730#undef PD_OPEN
731#undef PD_CLOSE
732#undef FETCH_Q
733#undef APPLY_PROJECTION
734#undef BUILD_ROTATION
735#undef APPLY_ROTATION
736#undef CALL_KERNEL
737}
Note: See TracBrowser for help on using the repository browser.