source: sasmodels/sasmodels/kernel_iq.c @ de032da

ticket-1257-vesicle-productticket_1156ticket_822_more_unit_tests
Last change on this file since de032da was de032da, checked in by Paul Kienzle <pkienzle@…>, 5 years ago

Merge branch 'master' into ticket_822_v5_unit_tests

  • Property mode set to 100644
File size: 30.4 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(form, shell, table) : assign form and shell values
29//  CALL_RADIUS_EFFECTIVE(mode, table) : call the R_eff function
30//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
31//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
32//  CALL_FQ(q, F1, F2, table) : call the Fq function for 1D calcs.
33//  CALL_FQ_A(q, F1, F2, table) : call the Iq function with |q| for 2D data.
34//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
35//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
36//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
37//  INVALID(table) : test if the current point is feesible to calculate.  This
38//      will be defined in the kernel definition file.
39//  PROJECTION : equirectangular=1, sinusoidal=2
40//      see explore/jitter.py for definitions.
41
42
43#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
44#define _PAR_BLOCK_
45
46typedef struct {
47#if MAX_PD > 0
48    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
49    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
50    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
51    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
52#endif // MAX_PD > 0
53    int32_t num_eval;           // total number of voxels in hypercube
54    int32_t num_weights;        // total length of the weights vector
55    int32_t num_active;         // number of non-trivial pd loops
56    int32_t theta_par;          // id of first orientation variable
57} ProblemDetails;
58
59// Intel HD 4000 needs private arrays to be a multiple of 4 long
60typedef struct {
61    PARAMETER_TABLE
62} ParameterTable;
63typedef union {
64    ParameterTable table;
65    double vector[4*((NUM_PARS+3)/4)];
66} ParameterBlock;
67#endif // _PAR_BLOCK_
68
69#if defined(MAGNETIC) && NUM_MAGNETIC > 0
70// ===== Helper functions for magnetism =====
71
72// Return value restricted between low and high
73static double clip(double value, double low, double high)
74{
75  return (value < low ? low : (value > high ? high : value));
76}
77
78// Compute spin cross sections given in_spin and out_spin
79// To convert spin cross sections to sld b:
80//     uu * (sld - m_sigma_x);
81//     dd * (sld + m_sigma_x);
82//     ud * (m_sigma_y - 1j*m_sigma_z);
83//     du * (m_sigma_y + 1j*m_sigma_z);
84// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
85static void set_spin_weights(double in_spin, double out_spin, double weight[6])
86{
87
88  double norm;
89  in_spin = clip(in_spin, 0.0, 1.0);
90  out_spin = clip(out_spin, 0.0, 1.0);
91  // Previous version of this function took the square root of the weights,
92  // under the assumption that
93  //
94  //     w*I(q, rho1, rho2, ...) = I(q, sqrt(w)*rho1, sqrt(w)*rho2, ...)
95  //
96  // However, since the weights are applied to the final intensity and
97  // are not interned inside the I(q) function, we want the full
98  // weight and not the square root.  Anyway no function will ever use
99  // set_spin_weights as part of calculating an amplitude, as the weights are
100  // related to polarisation efficiency of the instrument. The weights serve to
101  // construct various magnet scattering cross sections, which are linear combinations
102  // of the spin-resolved cross sections. The polarisation efficiency e_in and e_out
103  // are parameters ranging from 0.5 (unpolarised) beam to 1 (perfect optics).
104  // For in_spin or out_spin <0.5 one assumes a CS, where the spin is reversed/flipped
105  // with respect to the initial supermirror polariser. The actual polarisation efficiency
106  // in this case is however e_in/out = 1-in/out_spin.
107
108  if (out_spin < 0.5){norm=1-out_spin;}
109  else{norm=out_spin;}
110
111
112// The norm is needed to make sure that the scattering cross sections are
113//correctly weighted, such that the sum of spin-resolved measurements adds up to
114// the unpolarised or half-polarised scattering cross section. No intensity weighting
115// needed on the incoming polariser side (assuming that a user), has normalised
116// to the incoming flux with polariser in for SANSPOl and unpolarised beam, respectively.
117
118
119  weight[0] = (1.0-in_spin) * (1.0-out_spin) / norm; // dd
120  weight[1] = (1.0-in_spin) * out_spin / norm;       // du
121  weight[2] = in_spin * (1.0-out_spin) / norm;       // ud
122  weight[3] = in_spin * out_spin / norm;             // uu
123  weight[4] = weight[1]; // du.imag
124  weight[5] = weight[2]; // ud.imag
125}
126
127// Compute the magnetic sld
128static double mag_sld(
129  const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag
130  const double qx, const double qy,
131  const double px, const double py,
132  const double sld,
133  const double mx, const double my, const double mz
134)
135{
136  if (xs < 4) {
137    const double perp = qy*mx - qx*my;
138    switch (xs) {
139      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
140      case 0: // dd => sld - D M_perpx
141          return sld - px*perp;
142      case 1: // du.real => -D M_perpy
143          return py*perp;
144      case 2: // ud.real => -D M_perpy
145          return py*perp;
146      case 3: // uu => sld + D M_perpx
147          return sld + px*perp;
148    }
149  } else {
150    if (xs== 4) {
151      return -mz;  // du.imag => +D M_perpz
152    } else { // index == 5
153      return +mz;  // ud.imag => -D M_perpz
154    }
155  }
156}
157
158
159#endif
160
161// ===== Helper functions for orientation and jitter =====
162
163// To change the definition of the angles, run explore/angles.py, which
164// uses sympy to generate the equations.
165
166#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
167#define _QAC_SECTION
168
169typedef struct {
170    double R31, R32;
171} QACRotation;
172
173// Fill in the rotation matrix R from the view angles (theta, phi) and the
174// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
175// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
176static void
177qac_rotation(
178    QACRotation *rotation,
179    double theta, double phi,
180    double dtheta, double dphi)
181{
182    double sin_theta, cos_theta;
183    double sin_phi, cos_phi;
184
185    // reverse view matrix
186    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
187    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
188    const double V11 = cos_phi*cos_theta;
189    const double V12 = sin_phi*cos_theta;
190    const double V21 = -sin_phi;
191    const double V22 = cos_phi;
192    const double V31 = sin_theta*cos_phi;
193    const double V32 = sin_phi*sin_theta;
194
195    // reverse jitter matrix
196    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
197    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
198    const double J31 = sin_theta;
199    const double J32 = -sin_phi*cos_theta;
200    const double J33 = cos_phi*cos_theta;
201
202    // reverse matrix
203    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
204    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
205}
206
207// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
208// returning R*[qx,qy]' = [qa,qc]'
209static void
210qac_apply(
211    QACRotation *rotation,
212    double qx, double qy,
213    double *qab_out, double *qc_out)
214{
215    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
216    const double dqc = rotation->R31*qx + rotation->R32*qy;
217    const double dqab_sq = -dqc*dqc + qx*qx + qy*qy;
218    //*qab_out = sqrt(fabs(dqab_sq));
219    *qab_out = dqab_sq > 0.0 ? sqrt(dqab_sq) : 0.0;
220    *qc_out = dqc;
221}
222#endif // _QAC_SECTION
223
224#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
225#define _QABC_SECTION
226
227typedef struct {
228    double R11, R12;
229    double R21, R22;
230    double R31, R32;
231} QABCRotation;
232
233// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
234// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
235// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
236static void
237qabc_rotation(
238    QABCRotation *rotation,
239    double theta, double phi, double psi,
240    double dtheta, double dphi, double dpsi)
241{
242    double sin_theta, cos_theta;
243    double sin_phi, cos_phi;
244    double sin_psi, cos_psi;
245
246    // reverse view matrix
247    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
248    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
249    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
250    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
251    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
252    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
253    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
254    const double V31 = sin_theta*cos_phi;
255    const double V32 = sin_phi*sin_theta;
256
257    // reverse jitter matrix
258    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
259    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
260    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
261    const double J11 = cos_psi*cos_theta;
262    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
263    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
264    const double J21 = -sin_psi*cos_theta;
265    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
266    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
267    const double J31 = sin_theta;
268    const double J32 = -sin_phi*cos_theta;
269    const double J33 = cos_phi*cos_theta;
270
271    // reverse matrix
272    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
273    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
274    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
275    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
276    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
277    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
278}
279
280// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
281// returning R*[qx,qy]' = [qa,qb,qc]'
282static void
283qabc_apply(
284    QABCRotation *rotation,
285    double qx, double qy,
286    double *qa_out, double *qb_out, double *qc_out)
287{
288    *qa_out = rotation->R11*qx + rotation->R12*qy;
289    *qb_out = rotation->R21*qx + rotation->R22*qy;
290    *qc_out = rotation->R31*qx + rotation->R32*qy;
291}
292
293#endif // _QABC_SECTION
294
295// ==================== KERNEL CODE ========================
296kernel
297void KERNEL_NAME(
298    int32_t nq,                   // number of q values
299    const int32_t pd_start,       // where we are in the dispersity loop
300    const int32_t pd_stop,        // where we are stopping in the dispersity loop
301    pglobal const ProblemDetails *details,
302    pglobal const double *values, // parameter values and distributions
303    pglobal const double *q,      // nq q values, with padding to boundary
304    pglobal double *result,       // nq+1 return values, again with padding
305    const double cutoff,          // cutoff in the dispersity weight product
306    int32_t radius_effective_mode // which effective radius to compute
307    )
308{
309#if defined(USE_GPU)
310  // who we are and what element we are working with
311  #if defined(USE_OPENCL)
312  const int q_index = get_global_id(0);
313  #else // USE_CUDA
314  const int q_index = threadIdx.x + blockIdx.x * blockDim.x;
315  #endif
316  if (q_index >= nq) return;
317#else
318  // Define q_index here so that debugging statements can be written to work
319  // for both OpenCL and DLL using:
320  //    if (q_index == 0) {printf(...);}
321  int q_index = 0;
322#endif
323
324  // ** Fill in the local values table **
325  // Storage for the current parameter values.
326  // These will be updated as we walk the dispersity mesh.
327  ParameterBlock local_values;
328  //   values[0] is scale
329  //   values[1] is background
330  #ifdef USE_OPENMP
331  #pragma omp parallel for
332  #endif
333  for (int i=0; i < NUM_PARS; i++) {
334    local_values.vector[i] = values[2+i];
335    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
336  }
337  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
338  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
339
340  // ** Precompute magnatism values **
341#if defined(MAGNETIC) && NUM_MAGNETIC>0
342  // Location of the sld parameters in the parameter vector.
343  // These parameters are updated with the effective sld due to magnetism.
344  const int32_t slds[] = { MAGNETIC_PARS };
345
346  // Interpret polarization cross section.
347  //     up_frac_i = values[NUM_PARS+2];
348  //     up_frac_f = values[NUM_PARS+3];
349  //     up_angle = values[NUM_PARS+4];
350  // TODO: could precompute more magnetism parameters before calling the kernel.
351  double xs_weights[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
352  double cos_mspin, sin_mspin;
353  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], xs_weights);
354  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
355#endif // MAGNETIC
356
357  // ** Fill in the initial results **
358  // If pd_start is zero that means that we are starting a new calculation,
359  // and must initialize the result to zero.  Otherwise, we are restarting
360  // the calculation from somewhere in the middle of the dispersity mesh,
361  // and we update the value rather than reset it. Similarly for the
362  // normalization factor, which is stored as the final value in the
363  // results vector (one past the number of q values).
364  //
365  // The code differs slightly between opencl and dll since opencl is only
366  // seeing one q value (stored in the variable "this_F2") while the dll
367  // version must loop over all q.
368  #if defined(CALL_FQ)
369    double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]);
370    double weighted_form = (pd_start == 0 ? 0.0 : result[2*nq+1]);
371    double weighted_shell = (pd_start == 0 ? 0.0 : result[2*nq+2]);
372    double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+3]);
373  #else
374    double weight_norm = (pd_start == 0 ? 0.0 : result[nq]);
375    double weighted_form = (pd_start == 0 ? 0.0 : result[nq+1]);
376    double weighted_shell = (pd_start == 0 ? 0.0 : result[nq+2]);
377    double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+3]);
378  #endif
379  #if defined(USE_GPU)
380    #if defined(CALL_FQ)
381      double this_F2 = (pd_start == 0 ? 0.0 : result[2*q_index+0]);
382      double this_F1 = (pd_start == 0 ? 0.0 : result[2*q_index+1]);
383    #else
384      double this_F2 = (pd_start == 0 ? 0.0 : result[q_index]);
385    #endif
386  #else // !USE_GPU
387    if (pd_start == 0) {
388      #ifdef USE_OPENMP
389      #pragma omp parallel for
390      #endif
391      #if defined(CALL_FQ)
392          // 2*nq for F^2,F pairs
393          for (int q_index=0; q_index < 2*nq; q_index++) result[q_index] = 0.0;
394      #else
395          for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
396      #endif
397    }
398    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
399#endif // !USE_GPU
400
401
402// ====== macros to set up the parts of the loop =======
403/*
404Based on the level of the loop, uses C preprocessor magic to construct
405level-specific looping variables, including these from loop level 3:
406
407  int n3 : length of loop for mesh level 3
408  int i3 : current position in the loop for level 3, which is calculated
409       from a combination of pd_start, pd_stride[3] and pd_length[3].
410  int p3 : is the index into the parameter table for mesh level 3
411  double v3[] : pointer into dispersity array to values for loop 3
412  double w3[] : pointer into dispersity array to weights for loop 3
413  double weight3 : the product of weights from levels 3 and up, computed
414       as weight5*weight4*w3[i3].  Note that we need an outermost
415       value weight5 set to 1.0 for this to work properly.
416
417After expansion, the loop struction will look like the following:
418
419  // --- PD_INIT(4) ---
420  const int n4 = pd_length[4];
421  const int p4 = pd_par[4];
422  pglobal const double *v4 = pd_value + pd_offset[4];
423  pglobal const double *w4 = pd_weight + pd_offset[4];
424  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
425
426  // --- PD_INIT(3) ---
427  const int n3 = pd_length[3];
428  ...
429  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
430
431  PD_INIT(2)
432  PD_INIT(1)
433  PD_INIT(0)
434
435  // --- PD_OUTERMOST_WEIGHT(5) ---
436  const double weight5 = 1.0;
437
438  // --- PD_OPEN(4,5) ---
439  while (i4 < n4) {
440    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
441    const double weight4 = w4[i4] * weight5;
442
443    // from PD_OPEN(3,4)
444    while (i3 < n3) {
445      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
446      const double weight3 = w3[i3] * weight4;
447
448      PD_OPEN(3,2)
449      PD_OPEN(2,1)
450      PD_OPEN(0,1)
451
452      // ... main loop body ...
453      APPLY_PROJECTION    // convert jitter values to spherical coords
454      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
455      for each q
456          FETCH_Q         // set qx,qy from the q input vector
457          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
458          CALL_KERNEL     // F2 = Iqxy(qa, qb, qc, p1, p2, ...)
459
460      ++step;  // increment counter representing position in dispersity mesh
461
462      PD_CLOSE(0)
463      PD_CLOSE(1)
464      PD_CLOSE(2)
465
466      // --- PD_CLOSE(3) ---
467      if (step >= pd_stop) break;
468      ++i3;
469    }
470    i3 = 0; // reset loop counter for next round through the loop
471
472    // --- PD_CLOSE(4) ---
473    if (step >= pd_stop) break;
474    ++i4;
475  }
476  i4 = 0; // reset loop counter even though no more rounds through the loop
477
478*/
479
480
481// ** prepare inner loops **
482
483// Depending on the shape type (radial, axial, triaxial), the variables
484// and calling parameters in the loop body will be slightly different.
485// Macros capture the differences in one spot so the rest of the code
486// is easier to read. The code below both declares variables for the
487// inner loop and defines the macros that use them.
488
489
490#if defined(CALL_FQ) // COMPUTE_F1_F2 is true
491  // unoriented 1D returning <F> and <F^2>
492  // Note that F1 and F2 are returned from CALL_FQ by reference, and the
493  // user of the CALL_KERNEL macro below is assuming that F1 and F2 are defined.
494  double qk;
495  double F1, F2;
496  #define FETCH_Q() do { qk = q[q_index]; } while (0)
497  #define BUILD_ROTATION() do {} while(0)
498  #define APPLY_ROTATION() do {} while(0)
499  #define CALL_KERNEL() CALL_FQ(qk,F1,F2,local_values.table)
500
501#elif defined(CALL_FQ_A)
502  // unoriented 2D return <F> and <F^2>
503  // Note that the CALL_FQ_A macro is computing _F1_slot and _F2_slot by
504  // reference then returning _F2_slot.  We are calling them _F1_slot and
505  // _F2_slot here so they don't conflict with _F1 and _F2 in the macro
506  // expansion, or with the use of F2 = CALL_KERNEL() when it is used below.
507  double qx, qy;
508  double _F1_slot, _F2_slot;
509  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
510  #define BUILD_ROTATION() do {} while(0)
511  #define APPLY_ROTATION() do {} while(0)
512  #define CALL_KERNEL() CALL_FQ_A(sqrt(qx*qx+qy*qy),_F1_slot,_F2_slot,local_values.table)
513
514#elif defined(CALL_IQ)
515  // unoriented 1D return <F^2>
516  double qk;
517  #define FETCH_Q() do { qk = q[q_index]; } while (0)
518  #define BUILD_ROTATION() do {} while(0)
519  #define APPLY_ROTATION() do {} while(0)
520  #define CALL_KERNEL() CALL_IQ(qk,local_values.table)
521
522#elif defined(CALL_IQ_A)
523  // unoriented 2D
524  double qx, qy;
525  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
526  #define BUILD_ROTATION() do {} while(0)
527  #define APPLY_ROTATION() do {} while(0)
528  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
529
530#elif defined(CALL_IQ_AC)
531  // oriented symmetric 2D
532  double qx, qy;
533  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
534  double qa, qc;
535  QACRotation rotation;
536  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
537  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
538  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
539  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
540
541#elif defined(CALL_IQ_ABC)
542  // oriented asymmetric 2D
543  double qx, qy;
544  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
545  double qa, qb, qc;
546  QABCRotation rotation;
547  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
548  // psi and dpsi are only for IQ_ABC, so they are processed here.
549  const double psi = values[details->theta_par+4];
550  local_values.table.psi = 0.;
551  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
552  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
553  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
554
555#elif defined(CALL_IQ_XY)
556  // direct call to qx,qy calculator
557  double qx, qy;
558  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
559  #define BUILD_ROTATION() do {} while(0)
560  #define APPLY_ROTATION() do {} while(0)
561  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
562#endif
563
564// Define APPLY_PROJECTION depending on model symmetries. We do this outside
565// the previous if block so that we don't need to repeat the identical
566// logic in the IQ_AC and IQ_ABC branches.  This will become more important
567// if we implement more projections, or more complicated projections.
568#if defined(CALL_IQ) || defined(CALL_IQ_A) || defined(CALL_FQ) || defined(CALL_FQ_A)
569  // no orientation
570  #define APPLY_PROJECTION() const double weight=weight0
571#elif defined(CALL_IQ_XY) // pass orientation to the model
572  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
573  // Need to plug the values for the orientation angles back into parameter
574  // table in case they were overridden by the orientation offset.  This
575  // means that orientation dispersity will not work for these models, but
576  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
577  // the user model wants full control of orientation/magnetism.
578  #if defined(HAVE_PSI)
579    const double theta = values[details->theta_par+2];
580    const double phi = values[details->theta_par+3];
581    const double psi = values[details->theta_par+4];
582    double weight;
583    #define APPLY_PROJECTION() do { \
584      local_values.table.theta = theta; \
585      local_values.table.phi = phi; \
586      local_values.table.psi = psi; \
587      weight=weight0; \
588    } while (0)
589  #elif defined(HAVE_THETA)
590    const double theta = values[details->theta_par+2];
591    const double phi = values[details->theta_par+3];
592    double weight;
593    #define APPLY_PROJECTION() do { \
594      local_values.table.theta = theta; \
595      local_values.table.phi = phi; \
596      weight=weight0; \
597    } while (0)
598  #else
599    #define APPLY_PROJECTION() const double weight=weight0
600  #endif
601#else // apply jitter and view before calling the model
602  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
603  const double theta = values[details->theta_par+2];
604  const double phi = values[details->theta_par+3];
605  // Make sure jitter angle defaults to zero if there is no jitter distribution
606  local_values.table.theta = 0.;
607  local_values.table.phi = 0.;
608  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
609  // dispersity values and copied to the local parameter table as
610  // we go through the mesh.
611  double dtheta, dphi, weight;
612  #if PROJECTION == 1 // equirectangular
613    #define APPLY_PROJECTION() do { \
614      dtheta = local_values.table.theta; \
615      dphi = local_values.table.phi; \
616      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
617    } while (0)
618  #elif PROJECTION == 2 // sinusoidal
619    #define APPLY_PROJECTION() do { \
620      dtheta = local_values.table.theta; \
621      dphi = local_values.table.phi; \
622      weight = weight0; \
623      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
624      else if (dphi != 0.0) weight = 0.; \
625      if (fabs(dphi) >= 180.) weight = 0.; \
626    } while (0)
627  #endif
628#endif // done defining APPLY_PROJECTION
629
630// ** define looping macros **
631
632// Define looping variables
633#define PD_INIT(_LOOP) \
634  const int n##_LOOP = details->pd_length[_LOOP]; \
635  const int p##_LOOP = details->pd_par[_LOOP]; \
636  pglobal const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
637  pglobal const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
638  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
639
640// Jump into the middle of the dispersity loop
641#define PD_OPEN(_LOOP,_OUTER) \
642  while (i##_LOOP < n##_LOOP) { \
643    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
644    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
645
646// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
647#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
648#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
649
650// Close out the loop
651#define PD_CLOSE(_LOOP) \
652    if (step >= pd_stop) break; \
653    ++i##_LOOP; \
654  } \
655  i##_LOOP = 0;
656
657// ====== construct the loops =======
658
659// Pointers to the start of the dispersity and weight vectors, if needed.
660#if MAX_PD>0
661  pglobal const double *pd_value = values + NUM_VALUES;
662  pglobal const double *pd_weight = pd_value + details->num_weights;
663#endif
664
665// The variable "step" is the current position in the dispersity loop.
666// It will be incremented each time a new point in the mesh is accumulated,
667// and used to test whether we have reached pd_stop.
668int step = pd_start;
669
670// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
671
672// define looping variables
673#if MAX_PD>4
674  PD_INIT(4)
675#endif
676#if MAX_PD>3
677  PD_INIT(3)
678#endif
679#if MAX_PD>2
680  PD_INIT(2)
681#endif
682#if MAX_PD>1
683  PD_INIT(1)
684#endif
685#if MAX_PD>0
686  PD_INIT(0)
687#endif
688
689// open nested loops
690PD_OUTERMOST_WEIGHT(MAX_PD)
691#if MAX_PD>4
692  PD_OPEN(4,5)
693#endif
694#if MAX_PD>3
695  PD_OPEN(3,4)
696#endif
697#if MAX_PD>2
698  PD_OPEN(2,3)
699#endif
700#if MAX_PD>1
701  PD_OPEN(1,2)
702#endif
703#if MAX_PD>0
704  PD_OPEN(0,1)
705#endif
706
707//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
708
709  // ====== loop body =======
710  #ifdef INVALID
711  if (!INVALID(local_values.table))
712  #endif
713  {
714     APPLY_PROJECTION();
715
716    // Accumulate I(q)
717    // Note: weight==0 must always be excluded
718    if (weight > cutoff) {
719      double form, shell;
720      CALL_VOLUME(form, shell, local_values.table);
721      weight_norm += weight;
722      weighted_form += weight * form;
723      weighted_shell += weight * shell;
724      if (radius_effective_mode != 0) {
725        weighted_radius += weight * CALL_RADIUS_EFFECTIVE(radius_effective_mode, local_values.table);
726      }
727      BUILD_ROTATION();
728
729#if !defined(USE_GPU)
730      // DLL needs to explicitly loop over the q values.
731      #ifdef USE_OPENMP
732      #pragma omp parallel for
733      #endif
734      for (q_index=0; q_index<nq; q_index++)
735#endif // !USE_GPU
736      {
737
738        FETCH_Q();
739        APPLY_ROTATION();
740
741        // ======= COMPUTE SCATTERING ==========
742        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
743          // Compute the scattering from the magnetic cross sections.
744          double F2 = 0.0;
745          const double qsq = qx*qx + qy*qy;
746          if (qsq > 1.e-16) {
747            // TODO: what is the magnetic scattering at q=0
748            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
749            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
750
751            // loop over uu, ud real, du real, dd, ud imag, du imag
752            for (unsigned int xs=0; xs<6; xs++) {
753              const double xs_weight = xs_weights[xs];
754              if (xs_weight > 1.e-8) {
755                // Since the cross section weight is significant, set the slds
756                // to the effective slds for this cross section, call the
757                // kernel, and add according to weight.
758                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
759                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
760                  const int32_t sld_index = slds[sk];
761                  const double mx = values[mag_index];
762                  const double my = values[mag_index+1];
763                  const double mz = values[mag_index+2];
764                  local_values.vector[sld_index] =
765                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
766//if (q_index==0) printf("%d: (qx,qy)=(%g,%g) xs=%d sld%d=%g p=(%g,%g) m=(%g,%g,%g)\n",
767//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz);
768                }
769                F2 += xs_weight * CALL_KERNEL();
770              }
771            }
772          }
773        #else  // !MAGNETIC
774          #if defined(CALL_FQ)
775            CALL_KERNEL(); // sets F1 and F2 by reference
776          #else
777            const double F2 = CALL_KERNEL();
778          #endif
779        #endif // !MAGNETIC
780//printf("q_index:%d %g %g %g %g\n", q_index, F2, weight0);
781
782        #if defined(USE_GPU)
783          #if defined(CALL_FQ)
784            this_F2 += weight * F2;
785            this_F1 += weight * F1;
786          #else
787            this_F2 += weight * F2;
788          #endif
789        #else // !USE_OPENCL
790          #if defined(CALL_FQ)
791            result[2*q_index+0] += weight * F2;
792            result[2*q_index+1] += weight * F1;
793          #else
794            result[q_index] += weight * F2;
795          #endif
796        #endif // !USE_OPENCL
797      }
798    }
799  }
800// close nested loops
801++step;
802#if MAX_PD>0
803  PD_CLOSE(0)
804#endif
805#if MAX_PD>1
806  PD_CLOSE(1)
807#endif
808#if MAX_PD>2
809  PD_CLOSE(2)
810#endif
811#if MAX_PD>3
812  PD_CLOSE(3)
813#endif
814#if MAX_PD>4
815  PD_CLOSE(4)
816#endif
817
818// Remember the results and the updated norm.
819#if defined(USE_GPU)
820  #if defined(CALL_FQ)
821  result[2*q_index+0] = this_F2;
822  result[2*q_index+1] = this_F1;
823  #else
824  result[q_index] = this_F2;
825  #endif
826  if (q_index == 0)
827#endif
828  {
829#if defined(CALL_FQ)
830    result[2*nq] = weight_norm;
831    result[2*nq+1] = weighted_form;
832    result[2*nq+2] = weighted_shell;
833    result[2*nq+3] = weighted_radius;
834#else
835    result[nq] = weight_norm;
836    result[nq+1] = weighted_form;
837    result[nq+2] = weighted_shell;
838    result[nq+3] = weighted_radius;
839#endif
840  }
841
842// ** clear the macros in preparation for the next kernel **
843#undef PD_INIT
844#undef PD_OPEN
845#undef PD_CLOSE
846#undef FETCH_Q
847#undef APPLY_PROJECTION
848#undef BUILD_ROTATION
849#undef APPLY_ROTATION
850#undef CALL_KERNEL
851}
Note: See TracBrowser for help on using the repository browser.