source: sasmodels/sasmodels/kernel_iq.c @ e5cb3df

ticket-1257-vesicle-productticket_1156ticket_822_more_unit_tests
Last change on this file since e5cb3df was 12f4c19, checked in by Paul Kienzle <pkienzle@…>, 5 years ago

remove cuda variable conflict errors

  • Property mode set to 100644
File size: 29.4 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(form, shell, table) : assign form and shell values
29//  CALL_EFFECTIVE_RADIUS(type, table) : call the R_eff function
30//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
31//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
32//  CALL_FQ(q, F1, F2, table) : call the Fq function for 1D calcs.
33//  CALL_FQ_A(q, F1, F2, table) : call the Iq function with |q| for 2D data.
34//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
35//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
36//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
37//  INVALID(table) : test if the current point is feesible to calculate.  This
38//      will be defined in the kernel definition file.
39//  PROJECTION : equirectangular=1, sinusoidal=2
40//      see explore/jitter.py for definitions.
41
42
43#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
44#define _PAR_BLOCK_
45
46typedef struct {
47#if MAX_PD > 0
48    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
49    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
50    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
51    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
52#endif // MAX_PD > 0
53    int32_t num_eval;           // total number of voxels in hypercube
54    int32_t num_weights;        // total length of the weights vector
55    int32_t num_active;         // number of non-trivial pd loops
56    int32_t theta_par;          // id of first orientation variable
57} ProblemDetails;
58
59// Intel HD 4000 needs private arrays to be a multiple of 4 long
60typedef struct {
61    PARAMETER_TABLE
62} ParameterTable;
63typedef union {
64    ParameterTable table;
65    double vector[4*((NUM_PARS+3)/4)];
66} ParameterBlock;
67#endif // _PAR_BLOCK_
68
69#if defined(MAGNETIC) && NUM_MAGNETIC > 0
70// ===== Helper functions for magnetism =====
71
72// Return value restricted between low and high
73static double clip(double value, double low, double high)
74{
75  return (value < low ? low : (value > high ? high : value));
76}
77
78// Compute spin cross sections given in_spin and out_spin
79// To convert spin cross sections to sld b:
80//     uu * (sld - m_sigma_x);
81//     dd * (sld + m_sigma_x);
82//     ud * (m_sigma_y - 1j*m_sigma_z);
83//     du * (m_sigma_y + 1j*m_sigma_z);
84// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
85static void set_spin_weights(double in_spin, double out_spin, double weight[6])
86{
87  in_spin = clip(in_spin, 0.0, 1.0);
88  out_spin = clip(out_spin, 0.0, 1.0);
89  // Previous version of this function took the square root of the weights,
90  // under the assumption that
91  //
92  //     w*I(q, rho1, rho2, ...) = I(q, sqrt(w)*rho1, sqrt(w)*rho2, ...)
93  //
94  // However, since the weights are applied to the final intensity and
95  // are not interned inside the I(q) function, we want the full
96  // weight and not the square root.  Any function using
97  // set_spin_weights as part of calculating an amplitude will need to
98  // manually take that square root, but there is currently no such
99  // function.
100  weight[0] = (1.0-in_spin) * (1.0-out_spin); // dd
101  weight[1] = (1.0-in_spin) * out_spin;       // du
102  weight[2] = in_spin * (1.0-out_spin);       // ud
103  weight[3] = in_spin * out_spin;             // uu
104  weight[4] = weight[1]; // du.imag
105  weight[5] = weight[2]; // ud.imag
106}
107
108// Compute the magnetic sld
109static double mag_sld(
110  const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag
111  const double qx, const double qy,
112  const double px, const double py,
113  const double sld,
114  const double mx, const double my, const double mz
115)
116{
117  if (xs < 4) {
118    const double perp = qy*mx - qx*my;
119    switch (xs) {
120      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
121      case 0: // uu => sld - D M_perpx
122          return sld - px*perp;
123      case 1: // ud.real => -D M_perpy
124          return py*perp;
125      case 2: // du.real => -D M_perpy
126          return py*perp;
127      case 3: // dd => sld + D M_perpx
128          return sld + px*perp;
129    }
130  } else {
131    if (xs== 4) {
132      return -mz;  // du.imag => +D M_perpz
133    } else { // index == 5
134      return +mz;  // ud.imag => -D M_perpz
135    }
136  }
137}
138
139
140#endif
141
142// ===== Helper functions for orientation and jitter =====
143
144// To change the definition of the angles, run explore/angles.py, which
145// uses sympy to generate the equations.
146
147#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
148#define _QAC_SECTION
149
150typedef struct {
151    double R31, R32;
152} QACRotation;
153
154// Fill in the rotation matrix R from the view angles (theta, phi) and the
155// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
156// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
157static void
158qac_rotation(
159    QACRotation *rotation,
160    double theta, double phi,
161    double dtheta, double dphi)
162{
163    double sin_theta, cos_theta;
164    double sin_phi, cos_phi;
165
166    // reverse view matrix
167    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
168    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
169    const double V11 = cos_phi*cos_theta;
170    const double V12 = sin_phi*cos_theta;
171    const double V21 = -sin_phi;
172    const double V22 = cos_phi;
173    const double V31 = sin_theta*cos_phi;
174    const double V32 = sin_phi*sin_theta;
175
176    // reverse jitter matrix
177    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
178    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
179    const double J31 = sin_theta;
180    const double J32 = -sin_phi*cos_theta;
181    const double J33 = cos_phi*cos_theta;
182
183    // reverse matrix
184    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
185    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
186}
187
188// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
189// returning R*[qx,qy]' = [qa,qc]'
190static void
191qac_apply(
192    QACRotation *rotation,
193    double qx, double qy,
194    double *qab_out, double *qc_out)
195{
196    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
197    const double dqc = rotation->R31*qx + rotation->R32*qy;
198    const double dqab_sq = -dqc*dqc + qx*qx + qy*qy;
199    //*qab_out = sqrt(fabs(dqab_sq));
200    *qab_out = dqab_sq > 0.0 ? sqrt(dqab_sq) : 0.0;
201    *qc_out = dqc;
202}
203#endif // _QAC_SECTION
204
205#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
206#define _QABC_SECTION
207
208typedef struct {
209    double R11, R12;
210    double R21, R22;
211    double R31, R32;
212} QABCRotation;
213
214// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
215// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
216// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
217static void
218qabc_rotation(
219    QABCRotation *rotation,
220    double theta, double phi, double psi,
221    double dtheta, double dphi, double dpsi)
222{
223    double sin_theta, cos_theta;
224    double sin_phi, cos_phi;
225    double sin_psi, cos_psi;
226
227    // reverse view matrix
228    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
229    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
230    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
231    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
232    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
233    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
234    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
235    const double V31 = sin_theta*cos_phi;
236    const double V32 = sin_phi*sin_theta;
237
238    // reverse jitter matrix
239    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
240    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
241    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
242    const double J11 = cos_psi*cos_theta;
243    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
244    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
245    const double J21 = -sin_psi*cos_theta;
246    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
247    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
248    const double J31 = sin_theta;
249    const double J32 = -sin_phi*cos_theta;
250    const double J33 = cos_phi*cos_theta;
251
252    // reverse matrix
253    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
254    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
255    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
256    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
257    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
258    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
259}
260
261// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
262// returning R*[qx,qy]' = [qa,qb,qc]'
263static void
264qabc_apply(
265    QABCRotation *rotation,
266    double qx, double qy,
267    double *qa_out, double *qb_out, double *qc_out)
268{
269    *qa_out = rotation->R11*qx + rotation->R12*qy;
270    *qb_out = rotation->R21*qx + rotation->R22*qy;
271    *qc_out = rotation->R31*qx + rotation->R32*qy;
272}
273
274#endif // _QABC_SECTION
275
276// ==================== KERNEL CODE ========================
277kernel
278void KERNEL_NAME(
279    int32_t nq,                   // number of q values
280    const int32_t pd_start,       // where we are in the dispersity loop
281    const int32_t pd_stop,        // where we are stopping in the dispersity loop
282    pglobal const ProblemDetails *details,
283    pglobal const double *values, // parameter values and distributions
284    pglobal const double *q,      // nq q values, with padding to boundary
285    pglobal double *result,       // nq+1 return values, again with padding
286    const double cutoff,          // cutoff in the dispersity weight product
287    int32_t effective_radius_type // which effective radius to compute
288    )
289{
290#if defined(USE_GPU)
291  // who we are and what element we are working with
292  #if defined(USE_OPENCL)
293  const int q_index = get_global_id(0);
294  #else // USE_CUDA
295  const int q_index = threadIdx.x + blockIdx.x * blockDim.x;
296  #endif
297  if (q_index >= nq) return;
298#else
299  // Define q_index here so that debugging statements can be written to work
300  // for both OpenCL and DLL using:
301  //    if (q_index == 0) {printf(...);}
302  int q_index = 0;
303#endif
304
305  // ** Fill in the local values table **
306  // Storage for the current parameter values.
307  // These will be updated as we walk the dispersity mesh.
308  ParameterBlock local_values;
309  //   values[0] is scale
310  //   values[1] is background
311  #ifdef USE_OPENMP
312  #pragma omp parallel for
313  #endif
314  for (int i=0; i < NUM_PARS; i++) {
315    local_values.vector[i] = values[2+i];
316    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
317  }
318  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
319  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
320
321  // ** Precompute magnatism values **
322#if defined(MAGNETIC) && NUM_MAGNETIC>0
323  // Location of the sld parameters in the parameter vector.
324  // These parameters are updated with the effective sld due to magnetism.
325  const int32_t slds[] = { MAGNETIC_PARS };
326
327  // Interpret polarization cross section.
328  //     up_frac_i = values[NUM_PARS+2];
329  //     up_frac_f = values[NUM_PARS+3];
330  //     up_angle = values[NUM_PARS+4];
331  // TODO: could precompute more magnetism parameters before calling the kernel.
332  double xs_weights[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
333  double cos_mspin, sin_mspin;
334  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], xs_weights);
335  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
336#endif // MAGNETIC
337
338  // ** Fill in the initial results **
339  // If pd_start is zero that means that we are starting a new calculation,
340  // and must initialize the result to zero.  Otherwise, we are restarting
341  // the calculation from somewhere in the middle of the dispersity mesh,
342  // and we update the value rather than reset it. Similarly for the
343  // normalization factor, which is stored as the final value in the
344  // results vector (one past the number of q values).
345  //
346  // The code differs slightly between opencl and dll since opencl is only
347  // seeing one q value (stored in the variable "this_F2") while the dll
348  // version must loop over all q.
349  #if defined(CALL_FQ)
350    double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]);
351    double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]);
352    double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]);
353    double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]);
354  #else
355    double weight_norm = (pd_start == 0 ? 0.0 : result[nq]);
356    double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]);
357    double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]);
358    double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]);
359  #endif
360  #if defined(USE_GPU)
361    #if defined(CALL_FQ)
362      double this_F2 = (pd_start == 0 ? 0.0 : result[2*q_index+0]);
363      double this_F1 = (pd_start == 0 ? 0.0 : result[2*q_index+1]);
364    #else
365      double this_F2 = (pd_start == 0 ? 0.0 : result[q_index]);
366    #endif
367  #else // !USE_GPU
368    if (pd_start == 0) {
369      #ifdef USE_OPENMP
370      #pragma omp parallel for
371      #endif
372      #if defined(CALL_FQ)
373          // 2*nq for F^2,F pairs
374          for (int q_index=0; q_index < 2*nq; q_index++) result[q_index] = 0.0;
375      #else
376          for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
377      #endif
378    }
379    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
380#endif // !USE_GPU
381
382
383// ====== macros to set up the parts of the loop =======
384/*
385Based on the level of the loop, uses C preprocessor magic to construct
386level-specific looping variables, including these from loop level 3:
387
388  int n3 : length of loop for mesh level 3
389  int i3 : current position in the loop for level 3, which is calculated
390       from a combination of pd_start, pd_stride[3] and pd_length[3].
391  int p3 : is the index into the parameter table for mesh level 3
392  double v3[] : pointer into dispersity array to values for loop 3
393  double w3[] : pointer into dispersity array to weights for loop 3
394  double weight3 : the product of weights from levels 3 and up, computed
395       as weight5*weight4*w3[i3].  Note that we need an outermost
396       value weight5 set to 1.0 for this to work properly.
397
398After expansion, the loop struction will look like the following:
399
400  // --- PD_INIT(4) ---
401  const int n4 = pd_length[4];
402  const int p4 = pd_par[4];
403  pglobal const double *v4 = pd_value + pd_offset[4];
404  pglobal const double *w4 = pd_weight + pd_offset[4];
405  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
406
407  // --- PD_INIT(3) ---
408  const int n3 = pd_length[3];
409  ...
410  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
411
412  PD_INIT(2)
413  PD_INIT(1)
414  PD_INIT(0)
415
416  // --- PD_OUTERMOST_WEIGHT(5) ---
417  const double weight5 = 1.0;
418
419  // --- PD_OPEN(4,5) ---
420  while (i4 < n4) {
421    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
422    const double weight4 = w4[i4] * weight5;
423
424    // from PD_OPEN(3,4)
425    while (i3 < n3) {
426      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
427      const double weight3 = w3[i3] * weight4;
428
429      PD_OPEN(3,2)
430      PD_OPEN(2,1)
431      PD_OPEN(0,1)
432
433      // ... main loop body ...
434      APPLY_PROJECTION    // convert jitter values to spherical coords
435      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
436      for each q
437          FETCH_Q         // set qx,qy from the q input vector
438          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
439          CALL_KERNEL     // F2 = Iqxy(qa, qb, qc, p1, p2, ...)
440
441      ++step;  // increment counter representing position in dispersity mesh
442
443      PD_CLOSE(0)
444      PD_CLOSE(1)
445      PD_CLOSE(2)
446
447      // --- PD_CLOSE(3) ---
448      if (step >= pd_stop) break;
449      ++i3;
450    }
451    i3 = 0; // reset loop counter for next round through the loop
452
453    // --- PD_CLOSE(4) ---
454    if (step >= pd_stop) break;
455    ++i4;
456  }
457  i4 = 0; // reset loop counter even though no more rounds through the loop
458
459*/
460
461
462// ** prepare inner loops **
463
464// Depending on the shape type (radial, axial, triaxial), the variables
465// and calling parameters in the loop body will be slightly different.
466// Macros capture the differences in one spot so the rest of the code
467// is easier to read. The code below both declares variables for the
468// inner loop and defines the macros that use them.
469
470
471#if defined(CALL_FQ) // COMPUTE_F1_F2 is true
472  // unoriented 1D returning <F> and <F^2>
473  // Note that F1 and F2 are returned from CALL_FQ by reference, and the
474  // user of the CALL_KERNEL macro below is assuming that F1 and F2 are defined.
475  double qk;
476  double F1, F2;
477  #define FETCH_Q() do { qk = q[q_index]; } while (0)
478  #define BUILD_ROTATION() do {} while(0)
479  #define APPLY_ROTATION() do {} while(0)
480  #define CALL_KERNEL() CALL_FQ(qk,F1,F2,local_values.table)
481
482#elif defined(CALL_FQ_A)
483  // unoriented 2D return <F> and <F^2>
484  // Note that the CALL_FQ_A macro is computing _F1_slot and _F2_slot by
485  // reference then returning _F2_slot.  We are calling them _F1_slot and
486  // _F2_slot here so they don't conflict with _F1 and _F2 in the macro
487  // expansion, or with the use of F2 = CALL_KERNEL() when it is used below.
488  double qx, qy;
489  double _F1_slot, _F2_slot;
490  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
491  #define BUILD_ROTATION() do {} while(0)
492  #define APPLY_ROTATION() do {} while(0)
493  #define CALL_KERNEL() CALL_FQ_A(sqrt(qx*qx+qy*qy),_F1_slot,_F2_slot,local_values.table)
494
495#elif defined(CALL_IQ)
496  // unoriented 1D return <F^2>
497  double qk;
498  #define FETCH_Q() do { qk = q[q_index]; } while (0)
499  #define BUILD_ROTATION() do {} while(0)
500  #define APPLY_ROTATION() do {} while(0)
501  #define CALL_KERNEL() CALL_IQ(qk,local_values.table)
502
503#elif defined(CALL_IQ_A)
504  // unoriented 2D
505  double qx, qy;
506  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
507  #define BUILD_ROTATION() do {} while(0)
508  #define APPLY_ROTATION() do {} while(0)
509  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
510
511#elif defined(CALL_IQ_AC)
512  // oriented symmetric 2D
513  double qx, qy;
514  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
515  double qa, qc;
516  QACRotation rotation;
517  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
518  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
519  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
520  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
521
522#elif defined(CALL_IQ_ABC)
523  // oriented asymmetric 2D
524  double qx, qy;
525  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
526  double qa, qb, qc;
527  QABCRotation rotation;
528  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
529  // psi and dpsi are only for IQ_ABC, so they are processed here.
530  const double psi = values[details->theta_par+4];
531  local_values.table.psi = 0.;
532  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
533  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
534  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
535
536#elif defined(CALL_IQ_XY)
537  // direct call to qx,qy calculator
538  double qx, qy;
539  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
540  #define BUILD_ROTATION() do {} while(0)
541  #define APPLY_ROTATION() do {} while(0)
542  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
543#endif
544
545// Define APPLY_PROJECTION depending on model symmetries. We do this outside
546// the previous if block so that we don't need to repeat the identical
547// logic in the IQ_AC and IQ_ABC branches.  This will become more important
548// if we implement more projections, or more complicated projections.
549#if defined(CALL_IQ) || defined(CALL_IQ_A) || defined(CALL_FQ) || defined(CALL_FQ_A)
550  // no orientation
551  #define APPLY_PROJECTION() const double weight=weight0
552#elif defined(CALL_IQ_XY) // pass orientation to the model
553  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
554  // Need to plug the values for the orientation angles back into parameter
555  // table in case they were overridden by the orientation offset.  This
556  // means that orientation dispersity will not work for these models, but
557  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
558  // the user model wants full control of orientation/magnetism.
559  #if defined(HAVE_PSI)
560    const double theta = values[details->theta_par+2];
561    const double phi = values[details->theta_par+3];
562    const double psi = values[details->theta_par+4];
563    double weight;
564    #define APPLY_PROJECTION() do { \
565      local_values.table.theta = theta; \
566      local_values.table.phi = phi; \
567      local_values.table.psi = psi; \
568      weight=weight0; \
569    } while (0)
570  #elif defined(HAVE_THETA)
571    const double theta = values[details->theta_par+2];
572    const double phi = values[details->theta_par+3];
573    double weight;
574    #define APPLY_PROJECTION() do { \
575      local_values.table.theta = theta; \
576      local_values.table.phi = phi; \
577      weight=weight0; \
578    } while (0)
579  #else
580    #define APPLY_PROJECTION() const double weight=weight0
581  #endif
582#else // apply jitter and view before calling the model
583  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
584  const double theta = values[details->theta_par+2];
585  const double phi = values[details->theta_par+3];
586  // Make sure jitter angle defaults to zero if there is no jitter distribution
587  local_values.table.theta = 0.;
588  local_values.table.phi = 0.;
589  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
590  // dispersity values and copied to the local parameter table as
591  // we go through the mesh.
592  double dtheta, dphi, weight;
593  #if PROJECTION == 1 // equirectangular
594    #define APPLY_PROJECTION() do { \
595      dtheta = local_values.table.theta; \
596      dphi = local_values.table.phi; \
597      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
598    } while (0)
599  #elif PROJECTION == 2 // sinusoidal
600    #define APPLY_PROJECTION() do { \
601      dtheta = local_values.table.theta; \
602      dphi = local_values.table.phi; \
603      weight = weight0; \
604      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
605      else if (dphi != 0.0) weight = 0.; \
606      if (fabs(dphi) >= 180.) weight = 0.; \
607    } while (0)
608  #endif
609#endif // done defining APPLY_PROJECTION
610
611// ** define looping macros **
612
613// Define looping variables
614#define PD_INIT(_LOOP) \
615  const int n##_LOOP = details->pd_length[_LOOP]; \
616  const int p##_LOOP = details->pd_par[_LOOP]; \
617  pglobal const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
618  pglobal const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
619  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
620
621// Jump into the middle of the dispersity loop
622#define PD_OPEN(_LOOP,_OUTER) \
623  while (i##_LOOP < n##_LOOP) { \
624    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
625    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
626
627// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
628#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
629#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
630
631// Close out the loop
632#define PD_CLOSE(_LOOP) \
633    if (step >= pd_stop) break; \
634    ++i##_LOOP; \
635  } \
636  i##_LOOP = 0;
637
638// ====== construct the loops =======
639
640// Pointers to the start of the dispersity and weight vectors, if needed.
641#if MAX_PD>0
642  pglobal const double *pd_value = values + NUM_VALUES;
643  pglobal const double *pd_weight = pd_value + details->num_weights;
644#endif
645
646// The variable "step" is the current position in the dispersity loop.
647// It will be incremented each time a new point in the mesh is accumulated,
648// and used to test whether we have reached pd_stop.
649int step = pd_start;
650
651// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
652
653// define looping variables
654#if MAX_PD>4
655  PD_INIT(4)
656#endif
657#if MAX_PD>3
658  PD_INIT(3)
659#endif
660#if MAX_PD>2
661  PD_INIT(2)
662#endif
663#if MAX_PD>1
664  PD_INIT(1)
665#endif
666#if MAX_PD>0
667  PD_INIT(0)
668#endif
669
670// open nested loops
671PD_OUTERMOST_WEIGHT(MAX_PD)
672#if MAX_PD>4
673  PD_OPEN(4,5)
674#endif
675#if MAX_PD>3
676  PD_OPEN(3,4)
677#endif
678#if MAX_PD>2
679  PD_OPEN(2,3)
680#endif
681#if MAX_PD>1
682  PD_OPEN(1,2)
683#endif
684#if MAX_PD>0
685  PD_OPEN(0,1)
686#endif
687
688//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
689
690  // ====== loop body =======
691  #ifdef INVALID
692  if (!INVALID(local_values.table))
693  #endif
694  {
695     APPLY_PROJECTION();
696
697    // Accumulate I(q)
698    // Note: weight==0 must always be excluded
699    if (weight > cutoff) {
700      double form, shell;
701      CALL_VOLUME(form, shell, local_values.table);
702      weight_norm += weight;
703      weighted_form += weight * form;
704      weighted_shell += weight * shell;
705      if (effective_radius_type != 0) {
706        weighted_radius += weight * CALL_EFFECTIVE_RADIUS(effective_radius_type, local_values.table);
707      }
708      BUILD_ROTATION();
709
710#if !defined(USE_GPU)
711      // DLL needs to explicitly loop over the q values.
712      #ifdef USE_OPENMP
713      #pragma omp parallel for
714      #endif
715      for (q_index=0; q_index<nq; q_index++)
716#endif // !USE_GPU
717      {
718
719        FETCH_Q();
720        APPLY_ROTATION();
721
722        // ======= COMPUTE SCATTERING ==========
723        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
724          // Compute the scattering from the magnetic cross sections.
725          double F2 = 0.0;
726          const double qsq = qx*qx + qy*qy;
727          if (qsq > 1.e-16) {
728            // TODO: what is the magnetic scattering at q=0
729            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
730            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
731
732            // loop over uu, ud real, du real, dd, ud imag, du imag
733            for (unsigned int xs=0; xs<6; xs++) {
734              const double xs_weight = xs_weights[xs];
735              if (xs_weight > 1.e-8) {
736                // Since the cross section weight is significant, set the slds
737                // to the effective slds for this cross section, call the
738                // kernel, and add according to weight.
739                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
740                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
741                  const int32_t sld_index = slds[sk];
742                  const double mx = values[mag_index];
743                  const double my = values[mag_index+1];
744                  const double mz = values[mag_index+2];
745                  local_values.vector[sld_index] =
746                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
747//if (q_index==0) printf("%d: (qx,qy)=(%g,%g) xs=%d sld%d=%g p=(%g,%g) m=(%g,%g,%g)\n",
748//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz);
749                }
750                F2 += xs_weight * CALL_KERNEL();
751              }
752            }
753          }
754        #else  // !MAGNETIC
755          #if defined(CALL_FQ)
756            CALL_KERNEL(); // sets F1 and F2 by reference
757          #else
758            const double F2 = CALL_KERNEL();
759          #endif
760        #endif // !MAGNETIC
761//printf("q_index:%d %g %g %g %g\n", q_index, F2, weight0);
762
763        #if defined(USE_GPU)
764          #if defined(CALL_FQ)
765            this_F2 += weight * F2;
766            this_F1 += weight * F1;
767          #else
768            this_F2 += weight * F2;
769          #endif
770        #else // !USE_OPENCL
771          #if defined(CALL_FQ)
772            result[2*q_index+0] += weight * F2;
773            result[2*q_index+1] += weight * F1;
774          #else
775            result[q_index] += weight * F2;
776          #endif
777        #endif // !USE_OPENCL
778      }
779    }
780  }
781// close nested loops
782++step;
783#if MAX_PD>0
784  PD_CLOSE(0)
785#endif
786#if MAX_PD>1
787  PD_CLOSE(1)
788#endif
789#if MAX_PD>2
790  PD_CLOSE(2)
791#endif
792#if MAX_PD>3
793  PD_CLOSE(3)
794#endif
795#if MAX_PD>4
796  PD_CLOSE(4)
797#endif
798
799// Remember the results and the updated norm.
800#if defined(USE_GPU)
801  #if defined(CALL_FQ)
802  result[2*q_index+0] = this_F2;
803  result[2*q_index+1] = this_F1;
804  #else
805  result[q_index] = this_F2;
806  #endif
807  if (q_index == 0)
808#endif
809  {
810#if defined(CALL_FQ)
811    result[2*nq] = weight_norm;
812    result[2*nq+1] = weighted_form;
813    result[2*nq+2] = weighted_shell;
814    result[2*nq+3] = weighted_radius;
815#else
816    result[nq] = weight_norm;
817    result[nq+1] = weighted_form;
818    result[nq+2] = weighted_shell;
819    result[nq+3] = weighted_radius;
820#endif
821  }
822
823// ** clear the macros in preparation for the next kernel **
824#undef PD_INIT
825#undef PD_OPEN
826#undef PD_CLOSE
827#undef FETCH_Q
828#undef APPLY_PROJECTION
829#undef BUILD_ROTATION
830#undef APPLY_ROTATION
831#undef CALL_KERNEL
832}
Note: See TracBrowser for help on using the repository browser.