source: sasmodels/sasmodels/kernel_iq.c @ 2773c66

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 2773c66 was 2773c66, checked in by Torin Cooper-Bennun <torin.cooper-bennun@…>, 18 months ago

Merge branch 'beta_approx' into beta_approx_new_R_eff

  • Property mode set to 100644
File size: 29.5 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_EFFECTIVE_RADIUS(type, table) : call the R_eff function
30//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
31//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
32//  CALL_FQ(q, F1, F2, table) : call the Fq function for 1D calcs.
33//  CALL_FQ_A(q, F1, F2, table) : call the Iq function with |q| for 2D data.
34//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
35//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
36//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
37//  INVALID(table) : test if the current point is feesible to calculate.  This
38//      will be defined in the kernel definition file.
39//  PROJECTION : equirectangular=1, sinusoidal=2
40//      see explore/jitter.py for definitions.
41
42
43#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
44#define _PAR_BLOCK_
45
46typedef struct {
47#if MAX_PD > 0
48    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
49    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
50    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
51    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
52#endif // MAX_PD > 0
53    int32_t num_eval;           // total number of voxels in hypercube
54    int32_t num_weights;        // total length of the weights vector
55    int32_t num_active;         // number of non-trivial pd loops
56    int32_t theta_par;          // id of first orientation variable
57} ProblemDetails;
58
59// Intel HD 4000 needs private arrays to be a multiple of 4 long
60typedef struct {
61    PARAMETER_TABLE
62} ParameterTable;
63typedef union {
64    ParameterTable table;
65    double vector[4*((NUM_PARS+3)/4)];
66} ParameterBlock;
67#endif // _PAR_BLOCK_
68
69#if defined(MAGNETIC) && NUM_MAGNETIC > 0
70// ===== Helper functions for magnetism =====
71
72// Return value restricted between low and high
73static double clip(double value, double low, double high)
74{
75  return (value < low ? low : (value > high ? high : value));
76}
77
78// Compute spin cross sections given in_spin and out_spin
79// To convert spin cross sections to sld b:
80//     uu * (sld - m_sigma_x);
81//     dd * (sld + m_sigma_x);
82//     ud * (m_sigma_y - 1j*m_sigma_z);
83//     du * (m_sigma_y + 1j*m_sigma_z);
84// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
85static void set_spin_weights(double in_spin, double out_spin, double weight[6])
86{
87  in_spin = clip(in_spin, 0.0, 1.0);
88  out_spin = clip(out_spin, 0.0, 1.0);
89  // Previous version of this function took the square root of the weights,
90  // under the assumption that
91  //
92  //     w*I(q, rho1, rho2, ...) = I(q, sqrt(w)*rho1, sqrt(w)*rho2, ...)
93  //
94  // However, since the weights are applied to the final intensity and
95  // are not interned inside the I(q) function, we want the full
96  // weight and not the square root.  Any function using
97  // set_spin_weights as part of calculating an amplitude will need to
98  // manually take that square root, but there is currently no such
99  // function.
100  weight[0] = (1.0-in_spin) * (1.0-out_spin); // dd
101  weight[1] = (1.0-in_spin) * out_spin;       // du
102  weight[2] = in_spin * (1.0-out_spin);       // ud
103  weight[3] = in_spin * out_spin;             // uu
104  weight[4] = weight[1]; // du.imag
105  weight[5] = weight[2]; // ud.imag
106}
107
108// Compute the magnetic sld
109static double mag_sld(
110  const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag
111  const double qx, const double qy,
112  const double px, const double py,
113  const double sld,
114  const double mx, const double my, const double mz
115)
116{
117  if (xs < 4) {
118    const double perp = qy*mx - qx*my;
119    switch (xs) {
120      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
121      case 0: // uu => sld - D M_perpx
122          return sld - px*perp;
123      case 1: // ud.real => -D M_perpy
124          return py*perp;
125      case 2: // du.real => -D M_perpy
126          return py*perp;
127      case 3: // dd => sld + D M_perpx
128          return sld + px*perp;
129    }
130  } else {
131    if (xs== 4) {
132      return -mz;  // du.imag => +D M_perpz
133    } else { // index == 5
134      return +mz;  // ud.imag => -D M_perpz
135    }
136  }
137}
138
139
140#endif
141
142// ===== Helper functions for orientation and jitter =====
143
144// To change the definition of the angles, run explore/angles.py, which
145// uses sympy to generate the equations.
146
147#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
148#define _QAC_SECTION
149
150typedef struct {
151    double R31, R32;
152} QACRotation;
153
154// Fill in the rotation matrix R from the view angles (theta, phi) and the
155// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
156// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
157static void
158qac_rotation(
159    QACRotation *rotation,
160    double theta, double phi,
161    double dtheta, double dphi)
162{
163    double sin_theta, cos_theta;
164    double sin_phi, cos_phi;
165
166    // reverse view matrix
167    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
168    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
169    const double V11 = cos_phi*cos_theta;
170    const double V12 = sin_phi*cos_theta;
171    const double V21 = -sin_phi;
172    const double V22 = cos_phi;
173    const double V31 = sin_theta*cos_phi;
174    const double V32 = sin_phi*sin_theta;
175
176    // reverse jitter matrix
177    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
178    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
179    const double J31 = sin_theta;
180    const double J32 = -sin_phi*cos_theta;
181    const double J33 = cos_phi*cos_theta;
182
183    // reverse matrix
184    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
185    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
186}
187
188// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
189// returning R*[qx,qy]' = [qa,qc]'
190static void
191qac_apply(
192    QACRotation *rotation,
193    double qx, double qy,
194    double *qab_out, double *qc_out)
195{
196    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
197    const double dqc = rotation->R31*qx + rotation->R32*qy;
198    const double dqab_sq = -dqc*dqc + qx*qx + qy*qy;
199    //*qab_out = sqrt(fabs(dqab_sq));
200    *qab_out = dqab_sq > 0.0 ? sqrt(dqab_sq) : 0.0;
201    *qc_out = dqc;
202}
203#endif // _QAC_SECTION
204
205#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
206#define _QABC_SECTION
207
208typedef struct {
209    double R11, R12;
210    double R21, R22;
211    double R31, R32;
212} QABCRotation;
213
214// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
215// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
216// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
217static void
218qabc_rotation(
219    QABCRotation *rotation,
220    double theta, double phi, double psi,
221    double dtheta, double dphi, double dpsi)
222{
223    double sin_theta, cos_theta;
224    double sin_phi, cos_phi;
225    double sin_psi, cos_psi;
226
227    // reverse view matrix
228    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
229    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
230    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
231    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
232    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
233    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
234    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
235    const double V31 = sin_theta*cos_phi;
236    const double V32 = sin_phi*sin_theta;
237
238    // reverse jitter matrix
239    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
240    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
241    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
242    const double J11 = cos_psi*cos_theta;
243    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
244    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
245    const double J21 = -sin_psi*cos_theta;
246    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
247    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
248    const double J31 = sin_theta;
249    const double J32 = -sin_phi*cos_theta;
250    const double J33 = cos_phi*cos_theta;
251
252    // reverse matrix
253    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
254    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
255    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
256    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
257    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
258    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
259}
260
261// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
262// returning R*[qx,qy]' = [qa,qb,qc]'
263static void
264qabc_apply(
265    QABCRotation *rotation,
266    double qx, double qy,
267    double *qa_out, double *qb_out, double *qc_out)
268{
269    *qa_out = rotation->R11*qx + rotation->R12*qy;
270    *qb_out = rotation->R21*qx + rotation->R22*qy;
271    *qc_out = rotation->R31*qx + rotation->R32*qy;
272}
273
274#endif // _QABC_SECTION
275
276// ==================== KERNEL CODE ========================
277#define COMPUTE_F1_F2 defined(CALL_FQ)
278kernel
279void KERNEL_NAME(
280    int32_t nq,                 // number of q values
281    const int32_t pd_start,     // where we are in the dispersity loop
282    const int32_t pd_stop,      // where we are stopping in the dispersity loop
283    global const ProblemDetails *details,
284    global const double *values,
285    global const double *q, // nq q values, with padding to boundary
286    global double *result,  // nq+1 return values, again with padding
287    const double cutoff,     // cutoff in the dispersity weight product
288    int32_t effective_radius_type // which effective radius to compute
289    )
290{
291#ifdef USE_OPENCL
292  // who we are and what element we are working with
293  const int q_index = get_global_id(0);
294  if (q_index >= nq) return;
295#else
296  // Define q_index here so that debugging statements can be written to work
297  // for both OpenCL and DLL using:
298  //    if (q_index == 0) {printf(...);}
299  int q_index = 0;
300#endif
301
302  // ** Fill in the local values table **
303  // Storage for the current parameter values.
304  // These will be updated as we walk the dispersity mesh.
305  ParameterBlock local_values;
306  //   values[0] is scale
307  //   values[1] is background
308  #ifdef USE_OPENMP
309  #pragma omp parallel for
310  #endif
311  for (int i=0; i < NUM_PARS; i++) {
312    local_values.vector[i] = values[2+i];
313    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
314  }
315  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
316  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
317
318  // ** Precompute magnatism values **
319#if defined(MAGNETIC) && NUM_MAGNETIC>0
320  // Location of the sld parameters in the parameter vector.
321  // These parameters are updated with the effective sld due to magnetism.
322  const int32_t slds[] = { MAGNETIC_PARS };
323
324  // Interpret polarization cross section.
325  //     up_frac_i = values[NUM_PARS+2];
326  //     up_frac_f = values[NUM_PARS+3];
327  //     up_angle = values[NUM_PARS+4];
328  // TODO: could precompute more magnetism parameters before calling the kernel.
329  double xs_weights[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
330  double cos_mspin, sin_mspin;
331  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], xs_weights);
332  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
333#endif // MAGNETIC
334
335  // ** Fill in the initial results **
336  // If pd_start is zero that means that we are starting a new calculation,
337  // and must initialize the result to zero.  Otherwise, we are restarting
338  // the calculation from somewhere in the middle of the dispersity mesh,
339  // and we update the value rather than reset it. Similarly for the
340  // normalization factor, which is stored as the final value in the
341  // results vector (one past the number of q values).
342  //
343  // The code differs slightly between opencl and dll since opencl is only
344  // seeing one q value (stored in the variable "this_result") while the dll
345  // version must loop over all q.
346  #ifdef USE_OPENCL
347    #if COMPUTE_F1_F2
348      double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]);
349      double weighted_volume = (pd_start == 0 ? 0.0 : result[2*nq+1]);
350      double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+2]);
351      double this_F2 = (pd_start == 0 ? 0.0 : result[2*q_index+0]);
352      double this_F1 = (pd_start == 0 ? 0.0 : result[2*q_index+1]);
353    #else
354      double weight_norm = (pd_start == 0 ? 0.0 : result[nq]);
355      double weighted_volume = (pd_start == 0 ? 0.0 : result[nq+1]);
356      double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+2]);
357      double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
358    #endif
359  #else // !USE_OPENCL
360    #if COMPUTE_F1_F2
361      double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq]);
362      double weighted_volume = (pd_start == 0 ? 0.0 : result[2*nq+1]);
363      double weighted_radius = (pd_start == 0 ? 0.0 : result[2*nq+2]);
364    #else
365      double weight_norm = (pd_start == 0 ? 0.0 : result[nq]);
366      double weighted_volume = (pd_start == 0 ? 0.0 : result[nq+1]);
367      double weighted_radius = (pd_start == 0 ? 0.0 : result[nq+2]);
368    #endif
369    if (pd_start == 0) {
370      #ifdef USE_OPENMP
371      #pragma omp parallel for
372      #endif
373      #if COMPUTE_F1_F2
374          // 2*nq for F^2,F pairs
375          for (int q_index=0; q_index < 2*nq; q_index++) result[q_index] = 0.0;
376      #else
377          for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
378      #endif
379    }
380    //if (q_index==0) printf("start %d %g %g\n", pd_start, weighted_volume, result[0]);
381#endif // !USE_OPENCL
382
383
384// ====== macros to set up the parts of the loop =======
385/*
386Based on the level of the loop, uses C preprocessor magic to construct
387level-specific looping variables, including these from loop level 3:
388
389  int n3 : length of loop for mesh level 3
390  int i3 : current position in the loop for level 3, which is calculated
391       from a combination of pd_start, pd_stride[3] and pd_length[3].
392  int p3 : is the index into the parameter table for mesh level 3
393  double v3[] : pointer into dispersity array to values for loop 3
394  double w3[] : pointer into dispersity array to weights for loop 3
395  double weight3 : the product of weights from levels 3 and up, computed
396       as weight5*weight4*w3[i3].  Note that we need an outermost
397       value weight5 set to 1.0 for this to work properly.
398
399After expansion, the loop struction will look like the following:
400
401  // --- PD_INIT(4) ---
402  const int n4 = pd_length[4];
403  const int p4 = pd_par[4];
404  global const double *v4 = pd_value + pd_offset[4];
405  global const double *w4 = pd_weight + pd_offset[4];
406  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
407
408  // --- PD_INIT(3) ---
409  const int n3 = pd_length[3];
410  ...
411  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
412
413  PD_INIT(2)
414  PD_INIT(1)
415  PD_INIT(0)
416
417  // --- PD_OUTERMOST_WEIGHT(5) ---
418  const double weight5 = 1.0;
419
420  // --- PD_OPEN(4,5) ---
421  while (i4 < n4) {
422    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
423    const double weight4 = w4[i4] * weight5;
424
425    // from PD_OPEN(3,4)
426    while (i3 < n3) {
427      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
428      const double weight3 = w3[i3] * weight4;
429
430      PD_OPEN(3,2)
431      PD_OPEN(2,1)
432      PD_OPEN(0,1)
433
434      // ... main loop body ...
435      APPLY_PROJECTION    // convert jitter values to spherical coords
436      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
437      for each q
438          FETCH_Q         // set qx,qy from the q input vector
439          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
440          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
441
442      ++step;  // increment counter representing position in dispersity mesh
443
444      PD_CLOSE(0)
445      PD_CLOSE(1)
446      PD_CLOSE(2)
447
448      // --- PD_CLOSE(3) ---
449      if (step >= pd_stop) break;
450      ++i3;
451    }
452    i3 = 0; // reset loop counter for next round through the loop
453
454    // --- PD_CLOSE(4) ---
455    if (step >= pd_stop) break;
456    ++i4;
457  }
458  i4 = 0; // reset loop counter even though no more rounds through the loop
459
460*/
461
462
463// ** prepare inner loops **
464
465// Depending on the shape type (radial, axial, triaxial), the variables
466// and calling parameters in the loop body will be slightly different.
467// Macros capture the differences in one spot so the rest of the code
468// is easier to read. The code below both declares variables for the
469// inner loop and defines the macros that use them.
470
471
472#if defined(CALL_FQ) // COMPUTE_F1_F2 is true
473  // unoriented 1D returning <F> and <F^2>
474  double qk;
475  double F1, F2;
476  #define FETCH_Q() do { qk = q[q_index]; } while (0)
477  #define BUILD_ROTATION() do {} while(0)
478  #define APPLY_ROTATION() do {} while(0)
479  #define CALL_KERNEL() CALL_FQ(qk,F1,F2,local_values.table)
480
481#elif defined(CALL_FQ_A)
482  // unoriented 2D return <F> and <F^2>
483  double qx, qy;
484  double F1, F2;
485  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
486  #define BUILD_ROTATION() do {} while(0)
487  #define APPLY_ROTATION() do {} while(0)
488  #define CALL_KERNEL() CALL_FQ_A(sqrt(qx*qx+qy*qy),F1,F2,local_values.table)
489
490#elif defined(CALL_IQ)
491  // unoriented 1D return <F^2>
492  double qk;
493  #define FETCH_Q() do { qk = q[q_index]; } while (0)
494  #define BUILD_ROTATION() do {} while(0)
495  #define APPLY_ROTATION() do {} while(0)
496  #define CALL_KERNEL() CALL_IQ(qk,local_values.table)
497
498#elif defined(CALL_IQ_A)
499  // unoriented 2D
500  double qx, qy;
501  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
502  #define BUILD_ROTATION() do {} while(0)
503  #define APPLY_ROTATION() do {} while(0)
504  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
505
506#elif defined(CALL_IQ_AC)
507  // oriented symmetric 2D
508  double qx, qy;
509  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
510  double qa, qc;
511  QACRotation rotation;
512  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
513  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
514  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
515  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
516
517#elif defined(CALL_IQ_ABC)
518  // oriented asymmetric 2D
519  double qx, qy;
520  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
521  double qa, qb, qc;
522  QABCRotation rotation;
523  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
524  // psi and dpsi are only for IQ_ABC, so they are processed here.
525  const double psi = values[details->theta_par+4];
526  local_values.table.psi = 0.;
527  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
528  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
529  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
530
531#elif defined(CALL_IQ_XY)
532  // direct call to qx,qy calculator
533  double qx, qy;
534  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
535  #define BUILD_ROTATION() do {} while(0)
536  #define APPLY_ROTATION() do {} while(0)
537  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
538#endif
539
540// Define APPLY_PROJECTION depending on model symmetries. We do this outside
541// the previous if block so that we don't need to repeat the identical
542// logic in the IQ_AC and IQ_ABC branches.  This will become more important
543// if we implement more projections, or more complicated projections.
544#if defined(CALL_IQ) || defined(CALL_IQ_A) || defined(CALL_FQ) || defined(CALL_FQ_A)
545  // no orientation
546  #define APPLY_PROJECTION() const double weight=weight0
547#elif defined(CALL_IQ_XY) // pass orientation to the model
548  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
549  // Need to plug the values for the orientation angles back into parameter
550  // table in case they were overridden by the orientation offset.  This
551  // means that orientation dispersity will not work for these models, but
552  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
553  // the user model wants full control of orientation/magnetism.
554  #if defined(HAVE_PSI)
555    const double theta = values[details->theta_par+2];
556    const double phi = values[details->theta_par+3];
557    const double psi = values[details->theta_par+4];
558    double weight;
559    #define APPLY_PROJECTION() do { \
560      local_values.table.theta = theta; \
561      local_values.table.phi = phi; \
562      local_values.table.psi = psi; \
563      weight=weight0; \
564    } while (0)
565  #elif defined(HAVE_THETA)
566    const double theta = values[details->theta_par+2];
567    const double phi = values[details->theta_par+3];
568    double weight;
569    #define APPLY_PROJECTION() do { \
570      local_values.table.theta = theta; \
571      local_values.table.phi = phi; \
572      weight=weight0; \
573    } while (0)
574  #else
575    #define APPLY_PROJECTION() const double weight=weight0
576  #endif
577#else // apply jitter and view before calling the model
578  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
579  const double theta = values[details->theta_par+2];
580  const double phi = values[details->theta_par+3];
581  // Make sure jitter angle defaults to zero if there is no jitter distribution
582  local_values.table.theta = 0.;
583  local_values.table.phi = 0.;
584  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
585  // dispersity values and copied to the local parameter table as
586  // we go through the mesh.
587  double dtheta, dphi, weight;
588  #if PROJECTION == 1 // equirectangular
589    #define APPLY_PROJECTION() do { \
590      dtheta = local_values.table.theta; \
591      dphi = local_values.table.phi; \
592      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
593    } while (0)
594  #elif PROJECTION == 2 // sinusoidal
595    #define APPLY_PROJECTION() do { \
596      dtheta = local_values.table.theta; \
597      dphi = local_values.table.phi; \
598      weight = weight0; \
599      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
600      else if (dphi != 0.0) weight = 0.; \
601      if (fabs(dphi) >= 180.) weight = 0.; \
602    } while (0)
603  #endif
604#endif // done defining APPLY_PROJECTION
605
606// ** define looping macros **
607
608// Define looping variables
609#define PD_INIT(_LOOP) \
610  const int n##_LOOP = details->pd_length[_LOOP]; \
611  const int p##_LOOP = details->pd_par[_LOOP]; \
612  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
613  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
614  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
615
616// Jump into the middle of the dispersity loop
617#define PD_OPEN(_LOOP,_OUTER) \
618  while (i##_LOOP < n##_LOOP) { \
619    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
620    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
621
622// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
623#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
624#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
625
626// Close out the loop
627#define PD_CLOSE(_LOOP) \
628    if (step >= pd_stop) break; \
629    ++i##_LOOP; \
630  } \
631  i##_LOOP = 0;
632
633// ====== construct the loops =======
634
635// Pointers to the start of the dispersity and weight vectors, if needed.
636#if MAX_PD>0
637  global const double *pd_value = values + NUM_VALUES;
638  global const double *pd_weight = pd_value + details->num_weights;
639#endif
640
641// The variable "step" is the current position in the dispersity loop.
642// It will be incremented each time a new point in the mesh is accumulated,
643// and used to test whether we have reached pd_stop.
644int step = pd_start;
645
646// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
647
648// define looping variables
649#if MAX_PD>4
650  PD_INIT(4)
651#endif
652#if MAX_PD>3
653  PD_INIT(3)
654#endif
655#if MAX_PD>2
656  PD_INIT(2)
657#endif
658#if MAX_PD>1
659  PD_INIT(1)
660#endif
661#if MAX_PD>0
662  PD_INIT(0)
663#endif
664
665// open nested loops
666PD_OUTERMOST_WEIGHT(MAX_PD)
667#if MAX_PD>4
668  PD_OPEN(4,5)
669#endif
670#if MAX_PD>3
671  PD_OPEN(3,4)
672#endif
673#if MAX_PD>2
674  PD_OPEN(2,3)
675#endif
676#if MAX_PD>1
677  PD_OPEN(1,2)
678#endif
679#if MAX_PD>0
680  PD_OPEN(0,1)
681#endif
682
683//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
684
685  // ====== loop body =======
686  #ifdef INVALID
687  if (!INVALID(local_values.table))
688  #endif
689  {
690     APPLY_PROJECTION();
691
692    // Accumulate I(q)
693    // Note: weight==0 must always be excluded
694    if (weight > cutoff) {
695      weighted_volume += weight * CALL_VOLUME(local_values.table);
696      #if COMPUTE_F1_F2
697      weight_norm += weight;
698      #endif
699      if (effective_radius_type != 0) {
700        weighted_radius += weight * CALL_EFFECTIVE_RADIUS(effective_radius_type, local_values.table);
701      }
702      BUILD_ROTATION();
703
704#ifndef USE_OPENCL
705      // DLL needs to explicitly loop over the q values.
706      #ifdef USE_OPENMP
707      #pragma omp parallel for
708      #endif
709      for (q_index=0; q_index<nq; q_index++)
710#endif // !USE_OPENCL
711      {
712
713        FETCH_Q();
714        APPLY_ROTATION();
715
716        // ======= COMPUTE SCATTERING ==========
717        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
718          // Compute the scattering from the magnetic cross sections.
719          double scattering = 0.0;
720          const double qsq = qx*qx + qy*qy;
721          if (qsq > 1.e-16) {
722            // TODO: what is the magnetic scattering at q=0
723            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
724            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
725
726            // loop over uu, ud real, du real, dd, ud imag, du imag
727            for (unsigned int xs=0; xs<6; xs++) {
728              const double xs_weight = xs_weights[xs];
729              if (xs_weight > 1.e-8) {
730                // Since the cross section weight is significant, set the slds
731                // to the effective slds for this cross section, call the
732                // kernel, and add according to weight.
733                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
734                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
735                  const int32_t sld_index = slds[sk];
736                  const double mx = values[mag_index];
737                  const double my = values[mag_index+1];
738                  const double mz = values[mag_index+2];
739                  local_values.vector[sld_index] =
740                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
741//if (q_index==0) printf("%d: (qx,qy)=(%g,%g) xs=%d sld%d=%g p=(%g,%g) m=(%g,%g,%g)\n",
742//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz);
743                }
744                scattering += xs_weight * CALL_KERNEL();
745              }
746            }
747          }
748        #else  // !MAGNETIC
749          #if COMPUTE_F1_F2
750            CALL_KERNEL(); // sets F1 and F2 by reference
751          #else
752            const double scattering = CALL_KERNEL();
753          #endif
754        #endif // !MAGNETIC
755//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
756
757        #ifdef USE_OPENCL
758          #if COMPUTE_F1_F2
759            this_F2 += weight * F2;
760            this_F1 += weight * F1;
761          #else
762            this_result += weight * scattering;
763          #endif
764        #else // !USE_OPENCL
765          #if COMPUTE_F1_F2
766            result[2*q_index+0] += weight * F2;
767            result[2*q_index+1] += weight * F1;
768          #else
769            result[q_index] += weight * scattering;
770          #endif
771        #endif // !USE_OPENCL
772      }
773    }
774  }
775// close nested loops
776++step;
777#if MAX_PD>0
778  PD_CLOSE(0)
779#endif
780#if MAX_PD>1
781  PD_CLOSE(1)
782#endif
783#if MAX_PD>2
784  PD_CLOSE(2)
785#endif
786#if MAX_PD>3
787  PD_CLOSE(3)
788#endif
789#if MAX_PD>4
790  PD_CLOSE(4)
791#endif
792
793// Remember the current result and the updated norm.
794#ifdef USE_OPENCL
795  #if COMPUTE_F1_F2
796    result[2*q_index+0] = this_F2;
797    result[2*q_index+1] = this_F1;
798    if (q_index == 0) {
799      result[2*nq+0] = weight_norm;
800      result[2*nq+1] = weighted_volume;
801      result[2*nq+2] = weighted_radius;
802    }
803  #else
804    result[q_index] = this_result;
805    if (q_index == 0) {
806      result[nq+0] = weight_norm;
807      result[nq+1] = weighted_volume;
808      result[nq+2] = weighted_radius;
809    }
810  #endif
811
812//if (q_index == 0) printf("res: %g/%g\n", result[0], weigthed_volume);
813#else // !USE_OPENCL
814  #if COMPUTE_F1_F2
815    result[2*nq] = weight_norm;
816    result[2*nq+1] = weighted_volume;
817    result[2*nq+2] = weighted_radius;
818  #else
819    result[nq] = weight_norm;
820    result[nq+1] = weighted_volume;
821    result[nq+2] = weighted_radius;
822  #endif
823//printf("res: %g/%g\n", result[0], weighted_volume);
824#endif // !USE_OPENCL
825
826// ** clear the macros in preparation for the next kernel **
827#undef COMPUTE_F1_F2
828#undef PD_INIT
829#undef PD_OPEN
830#undef PD_CLOSE
831#undef FETCH_Q
832#undef APPLY_PROJECTION
833#undef BUILD_ROTATION
834#undef APPLY_ROTATION
835#undef CALL_KERNEL
836}
Note: See TracBrowser for help on using the repository browser.