source: sasmodels/sasmodels/kernel_iq.c @ 01c8d9e

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 01c8d9e was 01c8d9e, checked in by Suczewski <ges3@…>, 11 months ago

beta approximation, first pass

  • Property mode set to 100644
File size: 27.5 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed for this model,
16//      which will be at most modelinfo.MAX_PD.
17//  NUM_PARS : the number of parameters in the parameter table
18//  NUM_VALUES : the number of values to skip at the start of the
19//      values array before you get to the dispersity values.
20//  PARAMETER_TABLE : list of parameter declarations used to create the
21//      ParameterTable type.
22//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
23//      included three times, once for each kernel type.
24//  MAGNETIC : defined when the magnetic kernel is being instantiated
25//  NUM_MAGNETIC : the number of magnetic parameters
26//  MAGNETIC_PARS : a comma-separated list of indices to the sld
27//      parameters in the parameter table.
28//  CALL_VOLUME(table) : call the form volume function
29//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
30//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
31//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
32//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
33//  CALL_IQ_XY(qx, qy, table) : call the Iqxy function for arbitrary models
34//  INVALID(table) : test if the current point is feesible to calculate.  This
35//      will be defined in the kernel definition file.
36//  PROJECTION : equirectangular=1, sinusoidal=2
37//      see explore/jitter.py for definitions.
38
39
40#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
41#define _PAR_BLOCK_
42
43typedef struct {
44#if MAX_PD > 0
45    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
46    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
47    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
48    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
49#endif // MAX_PD > 0
50    int32_t num_eval;           // total number of voxels in hypercube
51    int32_t num_weights;        // total length of the weights vector
52    int32_t num_active;         // number of non-trivial pd loops
53    int32_t theta_par;          // id of first orientation variable
54} ProblemDetails;
55
56// Intel HD 4000 needs private arrays to be a multiple of 4 long
57typedef struct {
58    PARAMETER_TABLE
59} ParameterTable;
60typedef union {
61    ParameterTable table;
62    double vector[4*((NUM_PARS+3)/4)];
63} ParameterBlock;
64#endif // _PAR_BLOCK_
65
66#if defined(MAGNETIC) && NUM_MAGNETIC > 0
67// ===== Helper functions for magnetism =====
68
69// Return value restricted between low and high
70static double clip(double value, double low, double high)
71{
72  return (value < low ? low : (value > high ? high : value));
73}
74
75// Compute spin cross sections given in_spin and out_spin
76// To convert spin cross sections to sld b:
77//     uu * (sld - m_sigma_x);
78//     dd * (sld + m_sigma_x);
79//     ud * (m_sigma_y - 1j*m_sigma_z);
80//     du * (m_sigma_y + 1j*m_sigma_z);
81// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
82static void set_spin_weights(double in_spin, double out_spin, double weight[6])
83{
84  in_spin = clip(in_spin, 0.0, 1.0);
85  out_spin = clip(out_spin, 0.0, 1.0);
86  // Previous version of this function took the square root of the weights,
87  // under the assumption that
88  //
89  //     w*I(q, rho1, rho2, ...) = I(q, sqrt(w)*rho1, sqrt(w)*rho2, ...)
90  //
91  // However, since the weights are applied to the final intensity and
92  // are not interned inside the I(q) function, we want the full
93  // weight and not the square root.  Any function using
94  // set_spin_weights as part of calculating an amplitude will need to
95  // manually take that square root, but there is currently no such
96  // function.
97  weight[0] = (1.0-in_spin) * (1.0-out_spin); // dd
98  weight[1] = (1.0-in_spin) * out_spin;       // du
99  weight[2] = in_spin * (1.0-out_spin);       // ud
100  weight[3] = in_spin * out_spin;             // uu
101  weight[4] = weight[1]; // du.imag
102  weight[5] = weight[2]; // ud.imag
103}
104
105// Compute the magnetic sld
106static double mag_sld(
107  const unsigned int xs, // 0=dd, 1=du.real, 2=ud.real, 3=uu, 4=du.imag, 5=ud.imag
108  const double qx, const double qy,
109  const double px, const double py,
110  const double sld,
111  const double mx, const double my, const double mz
112)
113{
114  if (xs < 4) {
115    const double perp = qy*mx - qx*my;
116    switch (xs) {
117      default: // keep compiler happy; condition ensures xs in [0,1,2,3]
118      case 0: // uu => sld - D M_perpx
119          return sld - px*perp;
120      case 1: // ud.real => -D M_perpy
121          return py*perp;
122      case 2: // du.real => -D M_perpy
123          return py*perp;
124      case 3: // dd => sld + D M_perpx
125          return sld + px*perp;
126    }
127  } else {
128    if (xs== 4) {
129      return -mz;  // du.imag => +D M_perpz
130    } else { // index == 5
131      return +mz;  // ud.imag => -D M_perpz
132    }
133  }
134}
135
136
137#endif
138
139// ===== Helper functions for orientation and jitter =====
140
141// To change the definition of the angles, run explore/angles.py, which
142// uses sympy to generate the equations.
143
144#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
145#define _QAC_SECTION
146
147typedef struct {
148    double R31, R32;
149} QACRotation;
150
151// Fill in the rotation matrix R from the view angles (theta, phi) and the
152// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
153// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
154static void
155qac_rotation(
156    QACRotation *rotation,
157    double theta, double phi,
158    double dtheta, double dphi)
159{
160    double sin_theta, cos_theta;
161    double sin_phi, cos_phi;
162
163    // reverse view matrix
164    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
165    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
166    const double V11 = cos_phi*cos_theta;
167    const double V12 = sin_phi*cos_theta;
168    const double V21 = -sin_phi;
169    const double V22 = cos_phi;
170    const double V31 = sin_theta*cos_phi;
171    const double V32 = sin_phi*sin_theta;
172
173    // reverse jitter matrix
174    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
175    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
176    const double J31 = sin_theta;
177    const double J32 = -sin_phi*cos_theta;
178    const double J33 = cos_phi*cos_theta;
179
180    // reverse matrix
181    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
182    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
183}
184
185// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
186// returning R*[qx,qy]' = [qa,qc]'
187static void
188qac_apply(
189    QACRotation *rotation,
190    double qx, double qy,
191    double *qa_out, double *qc_out)
192{
193    const double dqc = rotation->R31*qx + rotation->R32*qy;
194    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
195    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy);
196
197    *qa_out = dqa;
198    *qc_out = dqc;
199}
200#endif // _QAC_SECTION
201
202#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
203#define _QABC_SECTION
204
205typedef struct {
206    double R11, R12;
207    double R21, R22;
208    double R31, R32;
209} QABCRotation;
210
211// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
212// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
213// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
214static void
215qabc_rotation(
216    QABCRotation *rotation,
217    double theta, double phi, double psi,
218    double dtheta, double dphi, double dpsi)
219{
220    double sin_theta, cos_theta;
221    double sin_phi, cos_phi;
222    double sin_psi, cos_psi;
223
224    // reverse view matrix
225    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
226    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
227    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
228    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
229    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
230    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
231    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
232    const double V31 = sin_theta*cos_phi;
233    const double V32 = sin_phi*sin_theta;
234
235    // reverse jitter matrix
236    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
237    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
238    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
239    const double J11 = cos_psi*cos_theta;
240    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
241    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
242    const double J21 = -sin_psi*cos_theta;
243    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
244    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
245    const double J31 = sin_theta;
246    const double J32 = -sin_phi*cos_theta;
247    const double J33 = cos_phi*cos_theta;
248
249    // reverse matrix
250    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
251    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
252    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
253    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
254    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
255    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
256}
257
258// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
259// returning R*[qx,qy]' = [qa,qb,qc]'
260static void
261qabc_apply(
262    QABCRotation *rotation,
263    double qx, double qy,
264    double *qa_out, double *qb_out, double *qc_out)
265{
266    *qa_out = rotation->R11*qx + rotation->R12*qy;
267    *qb_out = rotation->R21*qx + rotation->R22*qy;
268    *qc_out = rotation->R31*qx + rotation->R32*qy;
269}
270
271#endif // _QABC_SECTION
272
273// ==================== KERNEL CODE ========================
274kernel
275void KERNEL_NAME(
276    int32_t nq,                 // number of q values
277    const int32_t pd_start,     // where we are in the dispersity loop
278    const int32_t pd_stop,      // where we are stopping in the dispersity loop
279    global const ProblemDetails *details,
280    global const double *values,
281    global const double *q, // nq q values, with padding to boundary
282    global double *result,  // nq+1 return values, again with padding
283    const double cutoff     // cutoff in the dispersity weight product
284    )
285{
286#ifdef USE_OPENCL
287  // who we are and what element we are working with
288  const int q_index = get_global_id(0);
289  if (q_index >= nq) return;
290#else
291  // Define q_index here so that debugging statements can be written to work
292  // for both OpenCL and DLL using:
293  //    if (q_index == 0) {printf(...);}
294  int q_index = 0;
295#endif
296
297  // ** Fill in the local values table **
298  // Storage for the current parameter values.
299  // These will be updated as we walk the dispersity mesh.
300  ParameterBlock local_values;
301  //   values[0] is scale
302  //   values[1] is background
303  #ifdef USE_OPENMP
304  #pragma omp parallel for
305  #endif
306  for (int i=0; i < NUM_PARS; i++) {
307    local_values.vector[i] = values[2+i];
308    //if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
309  }
310  //if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
311  //if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
312
313  // ** Precompute magnatism values **
314#if defined(MAGNETIC) && NUM_MAGNETIC>0
315  // Location of the sld parameters in the parameter vector.
316  // These parameters are updated with the effective sld due to magnetism.
317  const int32_t slds[] = { MAGNETIC_PARS };
318
319  // Interpret polarization cross section.
320  //     up_frac_i = values[NUM_PARS+2];
321  //     up_frac_f = values[NUM_PARS+3];
322  //     up_angle = values[NUM_PARS+4];
323  // TODO: could precompute more magnetism parameters before calling the kernel.
324  double xs_weights[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
325  double cos_mspin, sin_mspin;
326  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], xs_weights);
327  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
328#endif // MAGNETIC
329
330  // ** Fill in the initial results **
331  // If pd_start is zero that means that we are starting a new calculation,
332  // and must initialize the result to zero.  Otherwise, we are restarting
333  // the calculation from somewhere in the middle of the dispersity mesh,
334  // and we update the value rather than reset it. Similarly for the
335  // normalization factor, which is stored as the final value in the
336  // results vector (one past the number of q values).
337  //
338  // The code differs slightly between opencl and dll since opencl is only
339  // seeing one q value (stored in the variable "this_result") while the dll
340  // version must loop over all q
341  #if BETA
342  double *beta_result = &(result[nq+1]); // = result + nq + 1
343  double weight_norm = (pd_start == 0 ? 0.0 : result[2*nq+1]);
344  #endif
345  #ifdef USE_OPENCL
346    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
347    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
348    #if BETA
349      double this_beta_result = (pd_start == 0 ? 0.0 : result[nq + q_index];
350  #else // !USE_OPENCL
351    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
352    if (pd_start == 0) {
353      #ifdef USE_OPENMP
354      #pragma omp parallel for
355      #endif
356      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
357      #if BETA
358      for (int q_index=0; q_index < nq; q_index++) beta_result[q_index] = 0.0;
359      #endif
360    }
361    //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
362#endif // !USE_OPENCL
363
364
365
366
367
368// ====== macros to set up the parts of the loop =======
369/*
370Based on the level of the loop, uses C preprocessor magic to construct
371level-specific looping variables, including these from loop level 3:
372
373  int n3 : length of loop for mesh level 3
374  int i3 : current position in the loop for level 3, which is calculated
375       from a combination of pd_start, pd_stride[3] and pd_length[3].
376  int p3 : is the index into the parameter table for mesh level 3
377  double v3[] : pointer into dispersity array to values for loop 3
378  double w3[] : pointer into dispersity array to weights for loop 3
379  double weight3 : the product of weights from levels 3 and up, computed
380       as weight5*weight4*w3[i3].  Note that we need an outermost
381       value weight5 set to 1.0 for this to work properly.
382
383After expansion, the loop struction will look like the following:
384
385  // --- PD_INIT(4) ---
386  const int n4 = pd_length[4];
387  const int p4 = pd_par[4];
388  global const double *v4 = pd_value + pd_offset[4];
389  global const double *w4 = pd_weight + pd_offset[4];
390  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
391
392  // --- PD_INIT(3) ---
393  const int n3 = pd_length[3];
394  ...
395  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
396
397  PD_INIT(2)
398  PD_INIT(1)
399  PD_INIT(0)
400
401  // --- PD_OUTERMOST_WEIGHT(5) ---
402  const double weight5 = 1.0;
403
404  // --- PD_OPEN(4,5) ---
405  while (i4 < n4) {
406    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
407    const double weight4 = w4[i4] * weight5;
408
409    // from PD_OPEN(3,4)
410    while (i3 < n3) {
411      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
412      const double weight3 = w3[i3] * weight4;
413
414      PD_OPEN(3,2)
415      PD_OPEN(2,1)
416      PD_OPEN(0,1)
417
418      // ... main loop body ...
419      APPLY_PROJECTION    // convert jitter values to spherical coords
420      BUILD_ROTATION      // construct the rotation matrix qxy => qabc
421      for each q
422          FETCH_Q         // set qx,qy from the q input vector
423          APPLY_ROTATION  // convert qx,qy to qa,qb,qc
424          CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
425
426      ++step;  // increment counter representing position in dispersity mesh
427
428      PD_CLOSE(0)
429      PD_CLOSE(1)
430      PD_CLOSE(2)
431
432      // --- PD_CLOSE(3) ---
433      if (step >= pd_stop) break;
434      ++i3;
435    }
436    i3 = 0; // reset loop counter for next round through the loop
437
438    // --- PD_CLOSE(4) ---
439    if (step >= pd_stop) break;
440    ++i4;
441  }
442  i4 = 0; // reset loop counter even though no more rounds through the loop
443
444*/
445
446
447// ** prepare inner loops **
448
449// Depending on the shape type (radial, axial, triaxial), the variables
450// and calling parameters in the loop body will be slightly different.
451// Macros capture the differences in one spot so the rest of the code
452// is easier to read. The code below both declares variables for the
453// inner loop and defines the macros that use them.
454
455
456#if defined(CALL_IQ)
457  // unoriented 1D
458  double qk;
459  #if BETA == 0
460    #define FETCH_Q() do { qk = q[q_index]; } while (0)
461    #define BUILD_ROTATION() do {} while(0)
462    #define APPLY_ROTATION() do {} while(0)
463    #define CALL_KERNEL() CALL_IQ(qk,local_values.table)
464
465  // unoriented 1D Beta
466  #elif BETA == 1
467    double F1, F2;
468    #define FETCH_Q() do { qk = q[q_index]; } while (0)
469    #define BUILD_ROTATION() do {} while(0)
470    #define APPLY_ROTATION() do {} while(0)
471    #define CALL_KERNEL() CALL_IQ(qk,F1,F2,local_values.table)
472  #endif
473
474#elif defined(CALL_IQ_A)
475  // unoriented 2D
476  double qx, qy;
477  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
478  #define BUILD_ROTATION() do {} while(0)
479  #define APPLY_ROTATION() do {} while(0)
480  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
481
482#elif defined(CALL_IQ_AC)
483  // oriented symmetric 2D
484  double qx, qy;
485  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
486  double qa, qc;
487  QACRotation rotation;
488  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
489  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
490  #define APPLY_ROTATION() qac_apply(&rotation, qx, qy, &qa, &qc)
491  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
492
493#elif defined(CALL_IQ_ABC)
494  // oriented asymmetric 2D
495  double qx, qy;
496  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
497  double qa, qb, qc;
498  QABCRotation rotation;
499  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
500  // psi and dpsi are only for IQ_ABC, so they are processed here.
501  const double psi = values[details->theta_par+4];
502  local_values.table.psi = 0.;
503  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
504  #define APPLY_ROTATION() qabc_apply(&rotation, qx, qy, &qa, &qb, &qc)
505  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
506#elif defined(CALL_IQ_XY)
507  // direct call to qx,qy calculator
508  double qx, qy;
509  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
510  #define BUILD_ROTATION() do {} while(0)
511  #define APPLY_ROTATION() do {} while(0)
512  #define CALL_KERNEL() CALL_IQ_XY(qx, qy, local_values.table)
513#endif
514
515// Define APPLY_PROJECTION depending on model symmetries. We do this outside
516// the previous if block so that we don't need to repeat the identical
517// logic in the IQ_AC and IQ_ABC branches.  This will become more important
518// if we implement more projections, or more complicated projections.
519#if defined(CALL_IQ) || defined(CALL_IQ_A)  // no orientation
520  #define APPLY_PROJECTION() const double weight=weight0
521#elif defined(CALL_IQ_XY) // pass orientation to the model
522  // CRUFT: support oriented model which define Iqxy rather than Iqac or Iqabc
523  // Need to plug the values for the orientation angles back into parameter
524  // table in case they were overridden by the orientation offset.  This
525  // means that orientation dispersity will not work for these models, but
526  // it was broken anyway, so no matter.  Still want to provide Iqxy in case
527  // the user model wants full control of orientation/magnetism.
528  #if defined(HAVE_PSI)
529    const double theta = values[details->theta_par+2];
530    const double phi = values[details->theta_par+3];
531    const double psi = values[details->theta_par+4];
532    double weight;
533    #define APPLY_PROJECTION() do { \
534      local_values.table.theta = theta; \
535      local_values.table.phi = phi; \
536      local_values.table.psi = psi; \
537      weight=weight0; \
538    } while (0)
539  #elif defined(HAVE_THETA)
540    const double theta = values[details->theta_par+2];
541    const double phi = values[details->theta_par+3];
542    double weight;
543    #define APPLY_PROJECTION() do { \
544      local_values.table.theta = theta; \
545      local_values.table.phi = phi; \
546      weight=weight0; \
547    } while (0)
548  #else
549    #define APPLY_PROJECTION() const double weight=weight0
550  #endif
551#else // apply jitter and view before calling the model
552  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
553  const double theta = values[details->theta_par+2];
554  const double phi = values[details->theta_par+3];
555  // Make sure jitter angle defaults to zero if there is no jitter distribution
556  local_values.table.theta = 0.;
557  local_values.table.phi = 0.;
558  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
559  // dispersity values and copied to the local parameter table as
560  // we go through the mesh.
561  double dtheta, dphi, weight;
562  #if PROJECTION == 1 // equirectangular
563    #define APPLY_PROJECTION() do { \
564      dtheta = local_values.table.theta; \
565      dphi = local_values.table.phi; \
566      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
567    } while (0)
568  #elif PROJECTION == 2 // sinusoidal
569    #define APPLY_PROJECTION() do { \
570      dtheta = local_values.table.theta; \
571      dphi = local_values.table.phi; \
572      weight = weight0; \
573      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
574      else if (dphi != 0.0) weight = 0.; \
575      if (fabs(dphi) >= 180.) weight = 0.; \
576    } while (0)
577  #endif
578#endif // done defining APPLY_PROJECTION
579
580// ** define looping macros **
581
582// Define looping variables
583#define PD_INIT(_LOOP) \
584  const int n##_LOOP = details->pd_length[_LOOP]; \
585  const int p##_LOOP = details->pd_par[_LOOP]; \
586  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
587  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
588  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
589
590// Jump into the middle of the dispersity loop
591#define PD_OPEN(_LOOP,_OUTER) \
592  while (i##_LOOP < n##_LOOP) { \
593    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
594    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
595
596// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
597#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
598#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
599
600// Close out the loop
601#define PD_CLOSE(_LOOP) \
602    if (step >= pd_stop) break; \
603    ++i##_LOOP; \
604  } \
605  i##_LOOP = 0;
606
607// ====== construct the loops =======
608
609// Pointers to the start of the dispersity and weight vectors, if needed.
610#if MAX_PD>0
611  global const double *pd_value = values + NUM_VALUES;
612  global const double *pd_weight = pd_value + details->num_weights;
613#endif
614
615// The variable "step" is the current position in the dispersity loop.
616// It will be incremented each time a new point in the mesh is accumulated,
617// and used to test whether we have reached pd_stop.
618int step = pd_start;
619
620// *** define loops for each of 0, 1, 2, ..., modelinfo.MAX_PD-1 ***
621
622// define looping variables
623#if MAX_PD>4
624  PD_INIT(4)
625#endif
626#if MAX_PD>3
627  PD_INIT(3)
628#endif
629#if MAX_PD>2
630  PD_INIT(2)
631#endif
632#if MAX_PD>1
633  PD_INIT(1)
634#endif
635#if MAX_PD>0
636  PD_INIT(0)
637#endif
638
639// open nested loops
640PD_OUTERMOST_WEIGHT(MAX_PD)
641#if MAX_PD>4
642  PD_OPEN(4,5)
643#endif
644#if MAX_PD>3
645  PD_OPEN(3,4)
646#endif
647#if MAX_PD>2
648  PD_OPEN(2,3)
649#endif
650#if MAX_PD>1
651  PD_OPEN(1,2)
652#endif
653#if MAX_PD>0
654  PD_OPEN(0,1)
655#endif
656
657//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
658
659  // ====== loop body =======
660  #ifdef INVALID
661  if (!INVALID(local_values.table))
662  #endif
663  {
664     APPLY_PROJECTION();
665
666    // Accumulate I(q)
667    // Note: weight==0 must always be excluded
668    if (weight > cutoff) {
669      pd_norm += weight * CALL_VOLUME(local_values.table);
670      BUILD_ROTATION();
671#if BETA
672    if (weight > cutoff) {
673      weight_norm += weight;}
674#endif
675#ifndef USE_OPENCL
676      // DLL needs to explicitly loop over the q values.
677      #ifdef USE_OPENMP
678      #pragma omp parallel for
679      #endif
680      for (q_index=0; q_index<nq; q_index++)
681#endif // !USE_OPENCL
682      {
683
684        FETCH_Q();
685        APPLY_ROTATION();
686
687        // ======= COMPUTE SCATTERING ==========
688        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
689          // Compute the scattering from the magnetic cross sections.
690          double scattering = 0.0;
691          const double qsq = qx*qx + qy*qy;
692          if (qsq > 1.e-16) {
693            // TODO: what is the magnetic scattering at q=0
694            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
695            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
696
697            // loop over uu, ud real, du real, dd, ud imag, du imag
698            for (unsigned int xs=0; xs<6; xs++) {
699              const double xs_weight = xs_weights[xs];
700              if (xs_weight > 1.e-8) {
701                // Since the cross section weight is significant, set the slds
702                // to the effective slds for this cross section, call the
703                // kernel, and add according to weight.
704                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
705                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
706                  const int32_t sld_index = slds[sk];
707                  const double mx = values[mag_index];
708                  const double my = values[mag_index+1];
709                  const double mz = values[mag_index+2];
710                  local_values.vector[sld_index] =
711                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
712//if (q_index==0) printf("%d: (qx,qy)=(%g,%g) xs=%d sld%d=%g p=(%g,%g) m=(%g,%g,%g)\n",
713//  q_index, qx, qy, xs, sk, local_values.vector[sld_index], px, py, mx, my, mz);
714                }
715                scattering += xs_weight * CALL_KERNEL();
716              }
717            }
718          }
719        #else  // !MAGNETIC
720          #if defined(CALL_IQ) && BETA == 1
721            CALL_KERNEL();
722            const double scatteringF1 = F1;
723            const double scatteringF2 = F2;
724          #else
725            const double scattering = CALL_KERNEL();
726          #endif
727        #endif // !MAGNETIC
728//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
729
730        #ifdef USE_OPENCL
731          #if defined(CALL_IQ)&& BETA == 1
732             this_result += weight * scatteringF2;
733             this_beta_result += weight * scatteringF1;
734            #else
735              this_result += weight * scattering;
736          #endif
737        #else // !USE_OPENCL
738          #if defined(CALL_IQ)&& BETA == 1
739            result[q_index] += weight * scatteringF2;
740            beta_result[q_index] += weight * scatteringF1;
741            #endif
742            #else
743            result[q_index] += weight * scattering;
744          #endif
745        #endif // !USE_OPENCL
746      }
747    }
748  }
749// close nested loops
750++step;
751#if MAX_PD>0
752  PD_CLOSE(0)
753#endif
754#if MAX_PD>1
755  PD_CLOSE(1)
756#endif
757#if MAX_PD>2
758  PD_CLOSE(2)
759#endif
760#if MAX_PD>3
761  PD_CLOSE(3)
762#endif
763#if MAX_PD>4
764  PD_CLOSE(4)
765#endif
766
767// Remember the current result and the updated norm.
768#ifdef USE_OPENCL
769  result[q_index] = this_result;
770  if (q_index == 0) result[nq] = pd_norm;
771  #if BETA
772  beta_result[q_index] = this_beta_result;
773  #endif
774  if (q_index == 0) result[nq] = pd_norm;
775
776//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
777#else // !USE_OPENCL
778  result[nq] = pd_norm;
779  #if BETA
780  result[2*nq+1] = weight_norm;
781  #endif
782//printf("res: %g/%g\n", result[0], pd_norm);
783#endif // !USE_OPENCL
784
785// ** clear the macros in preparation for the next kernel **
786#undef PD_INIT
787#undef PD_OPEN
788#undef PD_CLOSE
789#undef FETCH_Q
790#undef APPLY_PROJECTION
791#undef BUILD_ROTATION
792#undef APPLY_ROTATION
793#undef CALL_KERNEL
794}
Note: See TracBrowser for help on using the repository browser.