source: sasmodels/sasmodels/kernel_iq.c @ 767dca8

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 767dca8 was 767dca8, checked in by Paul Kienzle <pkienzle@…>, 6 years ago

use alternative jitter, which maintains constant arc length for the phi distribution

  • Property mode set to 100644
File size: 24.0 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed
16//  NUM_PARS : the number of parameters in the parameter table
17//  NUM_VALUES : the number of values to skip at the start of the
18//      values array before you get to the dispersity values.
19//  PARAMETER_TABLE : list of parameter declarations used to create the
20//      ParameterTable type.
21//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
22//      included three times, once for each kernel type.
23//  MAGNETIC : defined when the magnetic kernel is being instantiated
24//  NUM_MAGNETIC : the number of magnetic parameters
25//  MAGNETIC_PARS : a comma-separated list of indices to the sld
26//      parameters in the parameter table.
27//  CALL_VOLUME(table) : call the form volume function
28//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
29//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
30//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
31//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
32//  INVALID(table) : test if the current point is feesible to calculate.  This
33//      will be defined in the kernel definition file.
34
35// Set SCALED_PHI to 1 if dphi represents distance rather than longitude.
36// That is, in order to make an equally spaced mesh on a curved surface,
37// you will need to scale longitude displacement by cos(latitude) to account
38// for the reduction in distance per degree latitude as you move away from
39// the equator.  Points on the mesh with scaled dphi values more than 180
40// degrees are not included in the dispersity calculation. This should make
41// the integral over the entire surface work by sampling fewer points near
42// the poles.
43# define USE_SCALED_DPHI 1
44
45#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
46#define _PAR_BLOCK_
47
48typedef struct {
49#if MAX_PD > 0
50    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
51    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
52    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
53    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
54#endif // MAX_PD > 0
55    int32_t num_eval;           // total number of voxels in hypercube
56    int32_t num_weights;        // total length of the weights vector
57    int32_t num_active;         // number of non-trivial pd loops
58    int32_t theta_par;          // id of first orientation variable
59} ProblemDetails;
60
61// Intel HD 4000 needs private arrays to be a multiple of 4 long
62typedef struct {
63    PARAMETER_TABLE
64} ParameterTable;
65typedef union {
66    ParameterTable table;
67    double vector[4*((NUM_PARS+3)/4)];
68} ParameterBlock;
69#endif // _PAR_BLOCK_
70
71#if defined(MAGNETIC) && NUM_MAGNETIC > 0
72// ===== Helper functions for magnetism =====
73
74// Return value restricted between low and high
75static double clip(double value, double low, double high)
76{
77  return (value < low ? low : (value > high ? high : value));
78}
79
80// Compute spin cross sections given in_spin and out_spin
81// To convert spin cross sections to sld b:
82//     uu * (sld - m_sigma_x);
83//     dd * (sld + m_sigma_x);
84//     ud * (m_sigma_y - 1j*m_sigma_z);
85//     du * (m_sigma_y + 1j*m_sigma_z);
86// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
87static void set_spin_weights(double in_spin, double out_spin, double spins[4])
88{
89  in_spin = clip(in_spin, 0.0, 1.0);
90  out_spin = clip(out_spin, 0.0, 1.0);
91  spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
92  spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin));       // du real
93  spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin)));       // ud real
94  spins[3] = sqrt(sqrt(in_spin * out_spin));             // uu
95  spins[4] = spins[1]; // du imag
96  spins[5] = spins[2]; // ud imag
97}
98
99// Compute the magnetic sld
100static double mag_sld(
101  const int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag
102  const double qx, const double qy,
103  const double px, const double py,
104  const double sld,
105  const double mx, const double my, const double mz
106)
107{
108  if (xs < 4) {
109    const double perp = qy*mx - qx*my;
110    switch (xs) {
111      case 0: // uu => sld - D M_perpx
112          return sld - px*perp;
113      case 1: // ud real => -D M_perpy
114          return py*perp;
115      case 2: // du real => -D M_perpy
116          return py*perp;
117      case 3: // dd real => sld + D M_perpx
118          return sld + px*perp;
119    }
120  } else {
121    if (xs== 4) {
122      return -mz;  // ud imag => -D M_perpz
123    } else { // index == 5
124      return mz;   // du imag => D M_perpz
125    }
126  }
127}
128
129
130#endif
131
132// ===== Helper functions for orientation and jitter =====
133
134// To change the definition of the angles, run explore/angles.py, which
135// uses sympy to generate the equations.
136
137#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
138#define _QAC_SECTION
139
140typedef struct {
141    double R31, R32;
142} QACRotation;
143
144// Fill in the rotation matrix R from the view angles (theta, phi) and the
145// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
146// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
147static void
148qac_rotation(
149    QACRotation *rotation,
150    double theta, double phi,
151    double dtheta, double dphi)
152{
153    double sin_theta, cos_theta;
154    double sin_phi, cos_phi;
155
156    // reverse view matrix
157    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
158    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
159    const double V11 = cos_phi*cos_theta;
160    const double V12 = sin_phi*cos_theta;
161    const double V21 = -sin_phi;
162    const double V22 = cos_phi;
163    const double V31 = sin_theta*cos_phi;
164    const double V32 = sin_phi*sin_theta;
165
166    // reverse jitter matrix
167    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
168    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
169    const double J31 = sin_theta;
170    const double J32 = -sin_phi*cos_theta;
171    const double J33 = cos_phi*cos_theta;
172
173    // reverse matrix
174    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
175    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
176}
177
178// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
179// returning R*[qx,qy]' = [qa,qc]'
180static double
181qac_apply(
182    QACRotation rotation,
183    double qx, double qy,
184    double *qa_out, double *qc_out)
185{
186    const double dqc = rotation.R31*qx + rotation.R32*qy;
187    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
188    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy);
189
190    *qa_out = dqa;
191    *qc_out = dqc;
192}
193#endif // _QAC_SECTION
194
195#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
196#define _QABC_SECTION
197
198typedef struct {
199    double R11, R12;
200    double R21, R22;
201    double R31, R32;
202} QABCRotation;
203
204// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
205// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
206// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
207static void
208qabc_rotation(
209    QABCRotation *rotation,
210    double theta, double phi, double psi,
211    double dtheta, double dphi, double dpsi)
212{
213    double sin_theta, cos_theta;
214    double sin_phi, cos_phi;
215    double sin_psi, cos_psi;
216
217    // reverse view matrix
218    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
219    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
220    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
221    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
222    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
223    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
224    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
225    const double V31 = sin_theta*cos_phi;
226    const double V32 = sin_phi*sin_theta;
227
228    // reverse jitter matrix
229    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
230    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
231    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
232    const double J11 = cos_psi*cos_theta;
233    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
234    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
235    const double J21 = -sin_psi*cos_theta;
236    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
237    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
238    const double J31 = sin_theta;
239    const double J32 = -sin_phi*cos_theta;
240    const double J33 = cos_phi*cos_theta;
241
242    // reverse matrix
243    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
244    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
245    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
246    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
247    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
248    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
249}
250
251// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
252// returning R*[qx,qy]' = [qa,qb,qc]'
253static double
254qabc_apply(
255    QABCRotation rotation,
256    double qx, double qy,
257    double *qa_out, double *qb_out, double *qc_out)
258{
259    *qa_out = rotation.R11*qx + rotation.R12*qy;
260    *qb_out = rotation.R21*qx + rotation.R22*qy;
261    *qc_out = rotation.R31*qx + rotation.R32*qy;
262}
263
264#endif // _QABC_SECTION
265
266
267// ==================== KERNEL CODE ========================
268
269kernel
270void KERNEL_NAME(
271    int32_t nq,                 // number of q values
272    const int32_t pd_start,     // where we are in the dispersity loop
273    const int32_t pd_stop,      // where we are stopping in the dispersity loop
274    global const ProblemDetails *details,
275    global const double *values,
276    global const double *q, // nq q values, with padding to boundary
277    global double *result,  // nq+1 return values, again with padding
278    const double cutoff     // cutoff in the dispersity weight product
279    )
280{
281#ifdef USE_OPENCL
282  // who we are and what element we are working with
283  const int q_index = get_global_id(0);
284  if (q_index >= nq) return;
285#else
286  // Define q_index here so that debugging statements can be written to work
287  // for both OpenCL and DLL using:
288  //    if (q_index == 0) {printf(...);}
289  int q_index = 0;
290#endif
291
292  // Storage for the current parameter values.  These will be updated as we
293  // walk the dispersity mesh.
294  ParameterBlock local_values;
295
296#if defined(MAGNETIC) && NUM_MAGNETIC>0
297  // Location of the sld parameters in the parameter vector.
298  // These parameters are updated with the effective sld due to magnetism.
299  const int32_t slds[] = { MAGNETIC_PARS };
300
301  // Interpret polarization cross section.
302  //     up_frac_i = values[NUM_PARS+2];
303  //     up_frac_f = values[NUM_PARS+3];
304  //     up_angle = values[NUM_PARS+4];
305  // TODO: could precompute more magnetism parameters before calling the kernel.
306  double spins[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
307  double cos_mspin, sin_mspin;
308  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], spins);
309  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
310#endif // MAGNETIC
311
312  // Fill in the initial variables
313  //   values[0] is scale
314  //   values[1] is background
315  #ifdef USE_OPENMP
316  #pragma omp parallel for
317  #endif
318  for (int i=0; i < NUM_PARS; i++) {
319    local_values.vector[i] = values[2+i];
320//if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
321  }
322//if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
323//if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
324
325  // If pd_start is zero that means that we are starting a new calculation,
326  // and must initialize the result to zero.  Otherwise, we are restarting
327  // the calculation from somewhere in the middle of the dispersity mesh,
328  // and we update the value rather than reset it. Similarly for the
329  // normalization factor, which is stored as the final value in the
330  // results vector (one past the number of q values).
331  //
332  // The code differs slightly between opencl and dll since opencl is only
333  // seeing one q value (stored in the variable "this_result") while the dll
334  // version must loop over all q.
335  #ifdef USE_OPENCL
336    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
337    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
338  #else // !USE_OPENCL
339    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
340    if (pd_start == 0) {
341      #ifdef USE_OPENMP
342      #pragma omp parallel for
343      #endif
344      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
345    }
346//if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
347#endif // !USE_OPENCL
348
349
350// ====== macros to set up the parts of the loop =======
351/*
352Based on the level of the loop, uses C preprocessor magic to construct
353level-specific looping variables, including these from loop level 3:
354
355  int n3 : length of loop for mesh level 3
356  int i3 : current position in the loop for level 3, which is calculated
357       from a combination of pd_start, pd_stride[3] and pd_length[3].
358  int p3 : is the index into the parameter table for mesh level 3
359  double v3[] : pointer into dispersity array to values for loop 3
360  double w3[] : pointer into dispersity array to weights for loop 3
361  double weight3 : the product of weights from levels 3 and up, computed
362       as weight5*weight4*w3[i3].  Note that we need an outermost
363       value weight5 set to 1.0 for this to work properly.
364
365After expansion, the loop struction will look like the following:
366
367  // --- PD_INIT(4) ---
368  const int n4 = pd_length[4];
369  const int p4 = pd_par[4];
370  global const double *v4 = pd_value + pd_offset[4];
371  global const double *w4 = pd_weight + pd_offset[4];
372  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
373
374  // --- PD_INIT(3) ---
375  const int n3 = pd_length[3];
376  ...
377  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
378
379  PD_INIT(2)
380  PD_INIT(1)
381  PD_INIT(0)
382
383  // --- PD_OUTERMOST_WEIGHT(5) ---
384  const double weight5 = 1.0;
385
386  // --- PD_OPEN(4,5) ---
387  while (i4 < n4) {
388    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
389    const double weight4 = w4[i4] * weight5;
390
391    // from PD_OPEN(3,4)
392    while (i3 < n3) {
393      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
394      const double weight3 = w3[i3] * weight4;
395
396      PD_OPEN(3,2)
397      PD_OPEN(2,1)
398      PD_OPEN(0,1)
399
400      ... main loop body ...
401
402      ++step;  // increment counter representing position in dispersity mesh
403
404      PD_CLOSE(0)
405      PD_CLOSE(1)
406      PD_CLOSE(2)
407
408      // --- PD_CLOSE(3) ---
409      if (step >= pd_stop) break;
410      ++i3;
411    }
412    i3 = 0; // reset loop counter for next round through the loop
413
414    // --- PD_CLOSE(4) ---
415    if (step >= pd_stop) break;
416    ++i4;
417  }
418  i4 = 0; // reset loop counter even though no more rounds through the loop
419
420*/
421
422// Define looping variables
423#define PD_INIT(_LOOP) \
424  const int n##_LOOP = details->pd_length[_LOOP]; \
425  const int p##_LOOP = details->pd_par[_LOOP]; \
426  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
427  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
428  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
429
430// Jump into the middle of the dispersity loop
431#define PD_OPEN(_LOOP,_OUTER) \
432  while (i##_LOOP < n##_LOOP) { \
433    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
434    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
435
436// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
437#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
438#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
439
440// Close out the loop
441#define PD_CLOSE(_LOOP) \
442    if (step >= pd_stop) break; \
443    ++i##_LOOP; \
444  } \
445  i##_LOOP = 0;
446
447// The main loop body is essentially:
448//
449//    BUILD_ROTATION      // construct the rotation matrix qxy => qabc
450//    for each q
451//        FETCH_Q         // set qx,qy from the q input vector
452//        APPLY_ROTATION  // convert qx,qy to qa,qb,qc
453//        CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
454//
455// Depending on the shape type (radial, axial, triaxial), the variables
456// and calling parameters will be slightly different.  These macros
457// capture the differences in one spot so the rest of the code is easier
458// to read.
459#if defined(CALL_IQ)
460  // unoriented 1D
461  double qk;
462  #define SCALE_DPHI_AND_SET_WEIGHT() const double weight=weight0
463  #define FETCH_Q() do { qk = q[q_index]; } while (0)
464  #define BUILD_ROTATION() do {} while(0)
465  #define APPLY_ROTATION() do {} while(0)
466  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
467
468#elif defined(CALL_IQ_A)
469  // unoriented 2D
470  double qx, qy;
471  #define SCALE_DPHI_AND_SET_WEIGHT() const double weight=weight0
472  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
473  #define BUILD_ROTATION() do {} while(0)
474  #define APPLY_ROTATION() do {} while(0)
475  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
476
477#elif defined(CALL_IQ_AC)
478  // oriented symmetric 2D
479  double qx, qy;
480  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
481  double qa, qc;
482  QACRotation rotation;
483  // Grab the "view" angles (theta, phi) from the initial parameter table.
484  // These are separate from the "jitter" angles (dtheta, dphi) which are
485  // stored with the dispersity values and copied to the local parameter
486  // table as we go through the mesh.
487  const double theta = values[details->theta_par+2];
488  const double phi = values[details->theta_par+3];
489  double dtheta, dphi, weight;
490  #if USE_SCALED_DPHI
491    #define SCALE_DPHI_AND_SET_WEIGHT() do { \
492      dtheta = local_values.table.theta; \
493      dphi = local_values.table.phi; \
494      weight = weight0; \
495      if (dtheta != 90.0) dphi /= fabs(cos(dtheta*M_PI_180)); \
496      else if (dphi != 0.0) weight = 0.; \
497      if (fabs(dphi) >= 180.) weight = 0.; \
498    } while (0)
499  #else
500    #define SCALE_DPHI_AND_SET_WEIGHT() do { \
501      dtheta = local_values.table.theta; \
502      dphi = local_values.table.phi; \
503      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
504    } while (0)
505  #endif
506  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
507  #define APPLY_ROTATION() qac_apply(rotation, qx, qy, &qa, &qc)
508  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
509
510#elif defined(CALL_IQ_ABC)
511  // oriented asymmetric 2D
512  double qx, qy;
513  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
514  double qa, qb, qc;
515  QABCRotation rotation;
516  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
517  // These are separate from the "jitter" angles (dtheta, dphi, dpsi) which are
518  // stored with the dispersity values and copied to the local parameter
519  // table as we go through the mesh.
520  const double theta = values[details->theta_par+2];
521  const double phi = values[details->theta_par+3];
522  const double psi = values[details->theta_par+4];
523  double dtheta, dphi, dpsi, weight;
524  #if USE_SCALED_DPHI
525    #define SCALE_DPHI_AND_SET_WEIGHT() do { \
526      dtheta = local_values.table.theta; \
527      dphi = local_values.table.phi; \
528      dpsi = local_values.table.psi; \
529      weight = weight0; \
530      if (dtheta != 90.0) dphi /= fabs(cos(dtheta*M_PI_180)); \
531      else if (dphi != 0.0) weight = 0.; \
532      if (fabs(dphi) >= 180.) weight = 0.; \
533    } while (0)
534  #else
535    #define SCALE_DPHI_AND_SET_WEIGHT() do { \
536      dtheta = local_values.table.theta; \
537      dphi = local_values.table.phi; \
538      dpsi = local_values.table.psi; \
539      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
540    } while (0)
541  #endif
542  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, dpsi)
543  #define APPLY_ROTATION() qabc_apply(rotation, qx, qy, &qa, &qb, &qc)
544  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
545
546#endif
547
548// ====== construct the loops =======
549
550// Pointers to the start of the dispersity and weight vectors, if needed.
551#if MAX_PD>0
552  global const double *pd_value = values + NUM_VALUES;
553  global const double *pd_weight = pd_value + details->num_weights;
554#endif
555
556// The variable "step" is the current position in the dispersity loop.
557// It will be incremented each time a new point in the mesh is accumulated,
558// and used to test whether we have reached pd_stop.
559int step = pd_start;
560
561// define looping variables
562#if MAX_PD>4
563  PD_INIT(4)
564#endif
565#if MAX_PD>3
566  PD_INIT(3)
567#endif
568#if MAX_PD>2
569  PD_INIT(2)
570#endif
571#if MAX_PD>1
572  PD_INIT(1)
573#endif
574#if MAX_PD>0
575  PD_INIT(0)
576#endif
577
578// open nested loops
579PD_OUTERMOST_WEIGHT(MAX_PD)
580#if MAX_PD>4
581  PD_OPEN(4,5)
582#endif
583#if MAX_PD>3
584  PD_OPEN(3,4)
585#endif
586#if MAX_PD>2
587  PD_OPEN(2,3)
588#endif
589#if MAX_PD>1
590  PD_OPEN(1,2)
591#endif
592#if MAX_PD>0
593  PD_OPEN(0,1)
594#endif
595
596
597//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
598  SCALE_DPHI_AND_SET_WEIGHT();
599
600  // ====== loop body =======
601  #ifdef INVALID
602  if (!INVALID(local_values.table))
603  #endif
604  {
605    // Accumulate I(q)
606    // Note: weight==0 must always be excluded
607    if (weight > cutoff) {
608      pd_norm += weight * CALL_VOLUME(local_values.table);
609      BUILD_ROTATION();
610
611#ifndef USE_OPENCL
612      // DLL needs to explicitly loop over the q values.
613      #ifdef USE_OPENMP
614      #pragma omp parallel for
615      #endif
616      for (q_index=0; q_index<nq; q_index++)
617#endif // !USE_OPENCL
618      {
619
620        FETCH_Q();
621        APPLY_ROTATION();
622
623        // ======= COMPUTE SCATTERING ==========
624        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
625          // Compute the scattering from the magnetic cross sections.
626          double scattering = 0.0;
627          const double qsq = qx*qx + qy*qy;
628          if (qsq > 1.e-16) {
629            // TODO: what is the magnetic scattering at q=0
630            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
631            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
632
633            // loop over uu, ud real, du real, dd, ud imag, du imag
634            for (int xs=0; xs<6; xs++) {
635              const double xs_weight = spins[xs];
636              if (xs_weight > 1.e-8) {
637                // Since the cross section weight is significant, set the slds
638                // to the effective slds for this cross section, call the
639                // kernel, and add according to weight.
640                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
641                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
642                  const int32_t sld_index = slds[sk];
643                  const double mx = values[mag_index];
644                  const double my = values[mag_index+1];
645                  const double mz = values[mag_index+2];
646                  local_values.vector[sld_index] =
647                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
648                }
649                scattering += xs_weight * CALL_KERNEL();
650              }
651            }
652          }
653        #else  // !MAGNETIC
654          const double scattering = CALL_KERNEL();
655        #endif // !MAGNETIC
656//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
657
658        #ifdef USE_OPENCL
659          this_result += weight * scattering;
660        #else // !USE_OPENCL
661          result[q_index] += weight * scattering;
662        #endif // !USE_OPENCL
663      }
664    }
665  }
666
667// close nested loops
668++step;
669#if MAX_PD>0
670  PD_CLOSE(0)
671#endif
672#if MAX_PD>1
673  PD_CLOSE(1)
674#endif
675#if MAX_PD>2
676  PD_CLOSE(2)
677#endif
678#if MAX_PD>3
679  PD_CLOSE(3)
680#endif
681#if MAX_PD>4
682  PD_CLOSE(4)
683#endif
684
685// Remember the current result and the updated norm.
686#ifdef USE_OPENCL
687  result[q_index] = this_result;
688  if (q_index == 0) result[nq] = pd_norm;
689//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
690#else // !USE_OPENCL
691  result[nq] = pd_norm;
692//printf("res: %g/%g\n", result[0], pd_norm);
693#endif // !USE_OPENCL
694
695// clear the macros in preparation for the next kernel
696#undef PD_INIT
697#undef PD_OPEN
698#undef PD_CLOSE
699#undef SCALE_DPHI_AND_SET_WEIGHT
700#undef FETCH_Q
701#undef BUILD_ROTATION
702#undef APPLY_ROTATION
703#undef CALL_KERNEL
704}
Note: See TracBrowser for help on using the repository browser.