source: sasmodels/sasmodels/kernel_iq.c @ 71b751d

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 71b751d was c036ddb, checked in by Paul Kienzle <pkienzle@…>, 6 years ago

refactor so Iq is not needed if Fq is defined

  • Property mode set to 100644
File size: 28.3 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
31//  CALL_FQ(q, F1, F2, table) : call the Fq function for 1D calcs.
32//  CALL_FQ_A(q, F1, F2, table) : call the Iq function with |q| for 2D data.
33//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
34//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
35//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
36//  INVALID(table) : test if the current point is feesible to calculate.  This
37//      will be defined in the kernel definition file.
38//  PROJECTION : equirectangular=1, sinusoidal=2
39//      see explore/jitter.py for definitions.
40
41
42#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
43#define _PAR_BLOCK_
44
45typedef struct {
46#if MAX_PD > 0
47    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
48    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
49    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
50    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
51#endif // MAX_PD > 0
52    int32_t num_eval;           // total number of voxels in hypercube
53    int32_t num_weights;        // total length of the weights vector
54    int32_t num_active;         // number of non-trivial pd loops
55    int32_t theta_par;          // id of first orientation variable
56} ProblemDetails;
57
58// Intel HD 4000 needs private arrays to be a multiple of 4 long
59typedef struct {
60    PARAMETER_TABLE
61} ParameterTable;
62typedef union {
63    ParameterTable table;
64    double vector[4*((NUM_PARS+3)/4)];
65} ParameterBlock;
66#endif // _PAR_BLOCK_
67
68#if defined(MAGNETIC) && NUM_MAGNETIC > 0
69// ===== Helper functions for magnetism =====
70
71// Return value restricted between low and high
72static double clip(double value, double low, double high)
73{
74  return (value < low ? low : (value > high ? high : value));
75}
76
77// Compute spin cross sections given in_spin and out_spin
78// To convert spin cross sections to sld b:
79//     uu * (sld - m_sigma_x);
80//     dd * (sld + m_sigma_x);
81//     ud * (m_sigma_y - 1j*m_sigma_z);
82//     du * (m_sigma_y + 1j*m_sigma_z);
83// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
84static void set_spin_weights(double in_spin, double out_spin, double weight[6])
85{
86  in_spin = clip(in_spin, 0.0, 1.0);
87  out_spin = clip(out_spin, 0.0, 1.0);
88  // Previous version of this function took the square root of the weights,
89  // under the assumption that
90  //
91  //     w*I(q, rho1, rho2, ...) = I(q, sqrt(w)*rho1, sqrt(w)*rho2, ...)
92  //
93  // However, since the weights are applied to the final intensity and
94  // are not interned inside the I(q) function, we want the full
95  // weight and not the square root.  Any function using
96  // set_spin_weights as part of calculating an amplitude will need to
97  // manually take that square root, but there is currently no such
98  // function.
99  weight[0] = (1.0-in_spin) * (1.0-out_spin); // dd
100  weight[1] = (1.0-in_spin) * out_spin;       // du
101  weight[2] = in_spin * (1.0-out_spin);       // ud
102  weight[3] = in_spin * out_spin;             // uu
103  weight[4] = weight[1]; // du.imag
104  weight[5] = weight[2]; // ud.imag
105}
106
107// Compute the magnetic sld
108static double mag_sld(
109  const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag
110  const double qx, const double qy,
111  const double px, const double py,
112  const double sld,
113  const double mx, const double my, const double mz
114)
115{
116  if (xs < 4) {
117    const double perp = qy*mx - qx*my;
118    switch (xs) {
119      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
120      case 0: // uu => sld - D M_perpx
121          return sld - px*perp;
122      case 1: // ud.real => -D M_perpy
123          return py*perp;
124      case 2: // du.real => -D M_perpy
125          return py*perp;
126      case 3: // dd => sld + D M_perpx
127          return sld + px*perp;
128    }
129  } else {
130    if (xs== 4) {
131      return -mz;  // du.imag => +D M_perpz
132    } else { // index == 5
133      return +mz;  // ud.imag => -D M_perpz
134    }
135  }
136}
137
138
139#endif
140
141// ===== Helper functions for orientation and jitter =====
142
143// To change the definition of the angles, run explore/angles.py, which
144// uses sympy to generate the equations.
145
146#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
147#define _QAC_SECTION
148
149typedef struct {
150    double R31, R32;
151} QACRotation;
152
153// Fill in the rotation matrix R from the view angles (theta, phi) and the
154// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
155// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
156static void
157qac_rotation(
158    QACRotation *rotation,
159    double theta, double phi,
160    double dtheta, double dphi)
161{
162    double sin_theta, cos_theta;
163    double sin_phi, cos_phi;
164
165    // reverse view matrix
166    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
167    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
168    const double V11 = cos_phi*cos_theta;
169    const double V12 = sin_phi*cos_theta;
170    const double V21 = -sin_phi;
171    const double V22 = cos_phi;
172    const double V31 = sin_theta*cos_phi;
173    const double V32 = sin_phi*sin_theta;
174
175    // reverse jitter matrix
176    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
177    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
178    const double J31 = sin_theta;
179    const double J32 = -sin_phi*cos_theta;
180    const double J33 = cos_phi*cos_theta;
181
182    // reverse matrix
183    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
184    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
185}
186
187// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
188// returning R*[qx,qy]' = [qa,qc]'
189static void
190qac_apply(
191    QACRotation *rotation,
192    double qx, double qy,
193    double *qa_out, double *qc_out)
194{
195    const double dqc = rotation->R31*qx + rotation->R32*qy;
196    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
197    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy);
198
199    *qa_out = dqa;
200    *qc_out = dqc;
201}
202#endif // _QAC_SECTION
203
204#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
205#define _QABC_SECTION
206
207typedef struct {
208    double R11, R12;
209    double R21, R22;
210    double R31, R32;
211} QABCRotation;
212
213// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
214// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
215// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
216static void
217qabc_rotation(
218    QABCRotation *rotation,
219    double theta, double phi, double psi,
220    double dtheta, double dphi, double dpsi)
221{
222    double sin_theta, cos_theta;
223    double sin_phi, cos_phi;
224    double sin_psi, cos_psi;
225
226    // reverse view matrix
227    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
228    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
229    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
230    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
231    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
232    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
233    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
234    const double V31 = sin_theta*cos_phi;
235    const double V32 = sin_phi*sin_theta;
236
237    // reverse jitter matrix
238    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
239    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
240    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
241    const double J11 = cos_psi*cos_theta;
242    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
243    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
244    const double J21 = -sin_psi*cos_theta;
245    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
246    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
247    const double J31 = sin_theta;
248    const double J32 = -sin_phi*cos_theta;
249    const double J33 = cos_phi*cos_theta;
250
251    // reverse matrix
252    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
253    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
254    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
255    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
256    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
257    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
258}
259
260// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
261// returning R*[qx,qy]' = [qa,qb,qc]'
262static void
263qabc_apply(
264    QABCRotation *rotation,
265    double qx, double qy,
266    double *qa_out, double *qb_out, double *qc_out)
267{
268    *qa_out = rotation->R11*qx + rotation->R12*qy;
269    *qb_out = rotation->R21*qx + rotation->R22*qy;
270    *qc_out = rotation->R31*qx + rotation->R32*qy;
271}
272
273#endif // _QABC_SECTION
274
275// ==================== KERNEL CODE ========================
276#define COMPUTE_F1_F2 defined(CALL_FQ)
277kernel
278void KERNEL_NAME(
279    int32_t nq,                 // number of q values
280    const int32_t pd_start,     // where we are in the dispersity loop
281    const int32_t pd_stop,      // where we are stopping in the dispersity loop
282    global const ProblemDetails *details,
283    global const double *values,
284    global const double *q, // nq q values, with padding to boundary
285    global double *result,  // nq+1 return values, again with padding
286    const double cutoff     // cutoff in the dispersity weight product
287    )
288{
289#ifdef USE_OPENCL
290  // who we are and what element we are working with
291  const int q_index = get_global_id(0);
292  if (q_index >= nq) return;
293#else
294  // Define q_index here so that debugging statements can be written to work
295  // for both OpenCL and DLL using:
296  //    if (q_index == 0) {printf(...);}
297  int q_index = 0;
298#endif
299
300  // ** Fill in the local values table **
301  // Storage for the current parameter values.
302  // These will be updated as we walk the dispersity mesh.
303  ParameterBlock local_values;
304  //   values[0] is scale
305  //   values[1] is background
306  #ifdef USE_OPENMP
307  #pragma omp parallel for
308  #endif
309  for (int i=0; i < NUM_PARS; i++) {
310    local_values.vector[i] = values[2+i];
311    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
312  }
313  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
314  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
315
316  // ** Precompute magnatism values **
317#if defined(MAGNETIC) && NUM_MAGNETIC>0
318  // Location of the sld parameters in the parameter vector.
319  // These parameters are updated with the effective sld due to magnetism.
320  const int32_t slds[] = { MAGNETIC_PARS };
321
322  // Interpret polarization cross section.
323  //     up_frac_i = values[NUM_PARS+2];
324  //     up_frac_f = values[NUM_PARS+3];
325  //     up_angle = values[NUM_PARS+4];
326  // TODO: could precompute more magnetism parameters before calling the kernel.
327  double xs_weights[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
328  double cos_mspin, sin_mspin;
329  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], xs_weights);
330  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
331#endif // MAGNETIC
332
333  // ** Fill in the initial results **
334  // If pd_start is zero that means that we are starting a new calculation,
335  // and must initialize the result to zero.  Otherwise, we are restarting
336  // the calculation from somewhere in the middle of the dispersity mesh,
337  // and we update the value rather than reset it. Similarly for the
338  // normalization factor, which is stored as the final value in the
339  // results vector (one past the number of q values).
340  //
341  // The code differs slightly between opencl and dll since opencl is only
342  // seeing one q value (stored in the variable "this_result") while the dll
343  // version must loop over all q.
344  #ifdef USE_OPENCL
345    #if COMPUTE_F1_F2
346      double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
347      double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq+1]);
348      double this_F2 = (pd_start == 0 ? 0.0 : result[q_index]);
349      double this_F1 = (pd_start == 0 ? 0.0 : result[q_index+nq+1]);
350    #else
351      double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
352      double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
353    #endif
354  #else // !USE_OPENCL
355    #if COMPUTE_F1_F2
356      double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
357      double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq+1]);
358    #else
359      double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
360    #endif
361    if (pd_start == 0) {
362      #ifdef USE_OPENMP
363      #pragma omp parallel for
364      #endif
365      #if COMPUTE_F1_F2
366          for (int q_index=0; q_index < 2*nq+2; q_index++) result[q_index] = 0.0;
367      #else
368          for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
369      #endif
370    }
371    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
372#endif // !USE_OPENCL
373
374
375// ====== macros to set up the parts of the loop =======
376/*
377Based on the level of the loop, uses C preprocessor magic to construct
378level-specific looping variables, including these from loop level 3:
379
380  int n3 : length of loop for mesh level 3
381  int i3 : current position in the loop for level 3, which is calculated
382       from a combination of pd_start, pd_stride[3] and pd_length[3].
383  int p3 : is the index into the parameter table for mesh level 3
384  double v3[] : pointer into dispersity array to values for loop 3
385  double w3[] : pointer into dispersity array to weights for loop 3
386  double weight3 : the product of weights from levels 3 and up, computed
387       as weight5*weight4*w3[i3].  Note that we need an outermost
388       value weight5 set to 1.0 for this to work properly.
389
390After expansion, the loop struction will look like the following:
391
392  // --- PD_INIT(4) ---
393  const int n4 = pd_length[4];
394  const int p4 = pd_par[4];
395  global const double *v4 = pd_value + pd_offset[4];
396  global const double *w4 = pd_weight + pd_offset[4];
397  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
398
399  // --- PD_INIT(3) ---
400  const int n3 = pd_length[3];
401  ...
402  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
403
404  PD_INIT(2)
405  PD_INIT(1)
406  PD_INIT(0)
407
408  // --- PD_OUTERMOST_WEIGHT(5) ---
409  const double weight5 = 1.0;
410
411  // --- PD_OPEN(4,5) ---
412  while (i4 < n4) {
413    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
414    const double weight4 = w4[i4] * weight5;
415
416    // from PD_OPEN(3,4)
417    while (i3 < n3) {
418      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
419      const double weight3 = w3[i3] * weight4;
420
421      PD_OPEN(3,2)
422      PD_OPEN(2,1)
423      PD_OPEN(0,1)
424
425      // ... main loop body ...
426      APPLY_PROJECTION    // convert jitter values to spherical coords
427      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
428      for each q
429          FETCH_Q         // set qx,qy from the q input vector
430          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
431          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
432
433      ++step;  // increment counter representing position in dispersity mesh
434
435      PD_CLOSE(0)
436      PD_CLOSE(1)
437      PD_CLOSE(2)
438
439      // --- PD_CLOSE(3) ---
440      if (step >= pd_stop) break;
441      ++i3;
442    }
443    i3 = 0; // reset loop counter for next round through the loop
444
445    // --- PD_CLOSE(4) ---
446    if (step >= pd_stop) break;
447    ++i4;
448  }
449  i4 = 0; // reset loop counter even though no more rounds through the loop
450
451*/
452
453
454// ** prepare inner loops **
455
456// Depending on the shape type (radial, axial, triaxial), the variables
457// and calling parameters in the loop body will be slightly different.
458// Macros capture the differences in one spot so the rest of the code
459// is easier to read. The code below both declares variables for the
460// inner loop and defines the macros that use them.
461
462
463#if defined(CALL_FQ) // COMPUTE_F1_F2 is true
464  // unoriented 1D returning <F> and <F^2>
465  double qk;
466  double F1, F2;
467  #define FETCH_Q() do { qk = q[q_index]; } while (0)
468  #define BUILD_ROTATION() do {} while(0)
469  #define APPLY_ROTATION() do {} while(0)
470  #define CALL_KERNEL() CALL_FQ(qk,F1,F2,local_values.table)
471
472#elif defined(CALL_FQ_A)
473  // unoriented 2D return <F> and <F^2>
474  double qx, qy;
475  double F1, F2;
476  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
477  #define BUILD_ROTATION() do {} while(0)
478  #define APPLY_ROTATION() do {} while(0)
479  #define CALL_KERNEL() CALL_FQ_A(sqrt(qx*qx+qy*qy),F1,F2,local_values.table)
480
481#elif defined(CALL_IQ)
482  // unoriented 1D return <F^2>
483  double qk;
484  #define FETCH_Q() do { qk = q[q_index]; } while (0)
485  #define BUILD_ROTATION() do {} while(0)
486  #define APPLY_ROTATION() do {} while(0)
487  #define CALL_KERNEL() CALL_IQ(qk,local_values.table)
488
489#elif defined(CALL_IQ_A)
490  // unoriented 2D
491  double qx, qy;
492  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
493  #define BUILD_ROTATION() do {} while(0)
494  #define APPLY_ROTATION() do {} while(0)
495  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
496
497#elif defined(CALL_IQ_AC)
498  // oriented symmetric 2D
499  double qx, qy;
500  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
501  double qa, qc;
502  QACRotation rotation;
503  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
504  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
505  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
506  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
507
508#elif defined(CALL_IQ_ABC)
509  // oriented asymmetric 2D
510  double qx, qy;
511  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
512  double qa, qb, qc;
513  QABCRotation rotation;
514  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
515  // psi and dpsi are only for IQ_ABC, so they are processed here.
516  const double psi = values[details->theta_par+4];
517  local_values.table.psi = 0.;
518  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
519  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
520  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
521
522#elif defined(CALL_IQ_XY)
523  // direct call to qx,qy calculator
524  double qx, qy;
525  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
526  #define BUILD_ROTATION() do {} while(0)
527  #define APPLY_ROTATION() do {} while(0)
528  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
529#endif
530
531// Define APPLY_PROJECTION depending on model symmetries. We do this outside
532// the previous if block so that we don't need to repeat the identical
533// logic in the IQ_AC and IQ_ABC branches.  This will become more important
534// if we implement more projections, or more complicated projections.
535#if defined(CALL_IQ) || defined(CALL_IQ_A) || defined(CALL_FQ) || defined(CALL_FQ_A)
536  // no orientation
537  #define APPLY_PROJECTION() const double weight=weight0
538#elif defined(CALL_IQ_XY) // pass orientation to the model
539  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
540  // Need to plug the values for the orientation angles back into parameter
541  // table in case they were overridden by the orientation offset.  This
542  // means that orientation dispersity will not work for these models, but
543  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
544  // the user model wants full control of orientation/magnetism.
545  #if defined(HAVE_PSI)
546    const double theta = values[details->theta_par+2];
547    const double phi = values[details->theta_par+3];
548    const double psi = values[details->theta_par+4];
549    double weight;
550    #define APPLY_PROJECTION() do { \
551      local_values.table.theta = theta; \
552      local_values.table.phi = phi; \
553      local_values.table.psi = psi; \
554      weight=weight0; \
555    } while (0)
556  #elif defined(HAVE_THETA)
557    const double theta = values[details->theta_par+2];
558    const double phi = values[details->theta_par+3];
559    double weight;
560    #define APPLY_PROJECTION() do { \
561      local_values.table.theta = theta; \
562      local_values.table.phi = phi; \
563      weight=weight0; \
564    } while (0)
565  #else
566    #define APPLY_PROJECTION() const double weight=weight0
567  #endif
568#else // apply jitter and view before calling the model
569  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
570  const double theta = values[details->theta_par+2];
571  const double phi = values[details->theta_par+3];
572  // Make sure jitter angle defaults to zero if there is no jitter distribution
573  local_values.table.theta = 0.;
574  local_values.table.phi = 0.;
575  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
576  // dispersity values and copied to the local parameter table as
577  // we go through the mesh.
578  double dtheta, dphi, weight;
579  #if PROJECTION == 1 // equirectangular
580    #define APPLY_PROJECTION() do { \
581      dtheta = local_values.table.theta; \
582      dphi = local_values.table.phi; \
583      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
584    } while (0)
585  #elif PROJECTION == 2 // sinusoidal
586    #define APPLY_PROJECTION() do { \
587      dtheta = local_values.table.theta; \
588      dphi = local_values.table.phi; \
589      weight = weight0; \
590      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
591      else if (dphi != 0.0) weight = 0.; \
592      if (fabs(dphi) >= 180.) weight = 0.; \
593    } while (0)
594  #endif
595#endif // done defining APPLY_PROJECTION
596
597// ** define looping macros **
598
599// Define looping variables
600#define PD_INIT(_LOOP) \
601  const int n##_LOOP = details->pd_length[_LOOP]; \
602  const int p##_LOOP = details->pd_par[_LOOP]; \
603  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
604  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
605  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
606
607// Jump into the middle of the dispersity loop
608#define PD_OPEN(_LOOP,_OUTER) \
609  while (i##_LOOP < n##_LOOP) { \
610    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
611    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
612
613// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
614#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
615#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
616
617// Close out the loop
618#define PD_CLOSE(_LOOP) \
619    if (step >= pd_stop) break; \
620    ++i##_LOOP; \
621  } \
622  i##_LOOP = 0;
623
624// ====== construct the loops =======
625
626// Pointers to the start of the dispersity and weight vectors, if needed.
627#if MAX_PD>0
628  global const double *pd_value = values + NUM_VALUES;
629  global const double *pd_weight = pd_value + details->num_weights;
630#endif
631
632// The variable "step" is the current position in the dispersity loop.
633// It will be incremented each time a new point in the mesh is accumulated,
634// and used to test whether we have reached pd_stop.
635int step = pd_start;
636
637// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
638
639// define looping variables
640#if MAX_PD>4
641  PD_INIT(4)
642#endif
643#if MAX_PD>3
644  PD_INIT(3)
645#endif
646#if MAX_PD>2
647  PD_INIT(2)
648#endif
649#if MAX_PD>1
650  PD_INIT(1)
651#endif
652#if MAX_PD>0
653  PD_INIT(0)
654#endif
655
656// open nested loops
657PD_OUTERMOST_WEIGHT(MAX_PD)
658#if MAX_PD>4
659  PD_OPEN(4,5)
660#endif
661#if MAX_PD>3
662  PD_OPEN(3,4)
663#endif
664#if MAX_PD>2
665  PD_OPEN(2,3)
666#endif
667#if MAX_PD>1
668  PD_OPEN(1,2)
669#endif
670#if MAX_PD>0
671  PD_OPEN(0,1)
672#endif
673
674//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
675
676  // ====== loop body =======
677  #ifdef INVALID
678  if (!INVALID(local_values.table))
679  #endif
680  {
681     APPLY_PROJECTION();
682
683    // Accumulate I(q)
684    // Note: weight==0 must always be excluded
685    if (weight > cutoff) {
686      pd_norm += weight * CALL_VOLUME(local_values.table);
687      #if COMPUTE_F1_F2
688      weight_norm += weight;
689      #endif
690      BUILD_ROTATION();
691
692#ifndef USE_OPENCL
693      // DLL needs to explicitly loop over the q values.
694      #ifdef USE_OPENMP
695      #pragma omp parallel for
696      #endif
697      for (q_index=0; q_index<nq; q_index++)
698#endif // !USE_OPENCL
699      {
700
701        FETCH_Q();
702        APPLY_ROTATION();
703
704        // ======= COMPUTE SCATTERING ==========
705        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
706          // Compute the scattering from the magnetic cross sections.
707          double scattering = 0.0;
708          const double qsq = qx*qx + qy*qy;
709          if (qsq > 1.e-16) {
710            // TODO: what is the magnetic scattering at q=0
711            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
712            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
713
714            // loop over uu, ud real, du real, dd, ud imag, du imag
715            for (unsigned int xs=0; xs<6; xs++) {
716              const double xs_weight = xs_weights[xs];
717              if (xs_weight > 1.e-8) {
718                // Since the cross section weight is significant, set the slds
719                // to the effective slds for this cross section, call the
720                // kernel, and add according to weight.
721                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
722                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
723                  const int32_t sld_index = slds[sk];
724                  const double mx = values[mag_index];
725                  const double my = values[mag_index+1];
726                  const double mz = values[mag_index+2];
727                  local_values.vector[sld_index] =
728                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
729//if (q_index==0) printf("%d: (qx,qy)=(%g,%g) xs=%d sld%d=%g p=(%g,%g) m=(%g,%g,%g)\n",
730//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz);
731                }
732                scattering += xs_weight * CALL_KERNEL();
733              }
734            }
735          }
736        #else  // !MAGNETIC
737          #if COMPUTE_F1_F2
738            CALL_KERNEL(); // sets F1 and F2 by reference
739          #else
740            const double scattering = CALL_KERNEL();
741          #endif
742        #endif // !MAGNETIC
743//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
744
745        #ifdef USE_OPENCL
746          #if COMPUTE_F1_F2
747            this_F1 += weight * F1;
748            this_F2 += weight * F2;
749          #else
750            this_result += weight * scattering;
751          #endif
752        #else // !USE_OPENCL
753          #if COMPUTE_F1_F2
754            result[q_index] += weight * F2;
755            result[q_index+nq+1] += weight * F1;
756          #else
757            result[q_index] += weight * scattering;
758          #endif
759        #endif // !USE_OPENCL
760      }
761    }
762  }
763// close nested loops
764++step;
765#if MAX_PD>0
766  PD_CLOSE(0)
767#endif
768#if MAX_PD>1
769  PD_CLOSE(1)
770#endif
771#if MAX_PD>2
772  PD_CLOSE(2)
773#endif
774#if MAX_PD>3
775  PD_CLOSE(3)
776#endif
777#if MAX_PD>4
778  PD_CLOSE(4)
779#endif
780
781// Remember the current result and the updated norm.
782#ifdef USE_OPENCL
783  #if COMPUTE_F1_F2
784    result[q_index] = this_F2;
785    result[q_index+nq+1] = this_F1;
786    if (q_index == 0) {
787      result[nq] = pd_norm;
788      result[2*nq+1] = weight_norm;
789    }
790  #else
791    result[q_index] = this_result;
792    if (q_index == 0) result[nq] = pd_norm;
793  #endif
794
795//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
796#else // !USE_OPENCL
797  #if COMPUTE_F1_F2
798    result[nq] = pd_norm;
799    result[2*nq+1] = weight_norm;
800  #else
801    result[nq] = pd_norm;
802  #endif
803//printf("res: %g/%g\n", result[0], pd_norm);
804#endif // !USE_OPENCL
805
806// ** clear the macros in preparation for the next kernel **
807#undef COMPUTE_F1_F2
808#undef PD_INIT
809#undef PD_OPEN
810#undef PD_CLOSE
811#undef FETCH_Q
812#undef APPLY_PROJECTION
813#undef BUILD_ROTATION
814#undef APPLY_ROTATION
815#undef CALL_KERNEL
816}
Note: See TracBrowser for help on using the repository browser.