source: sasmodels/sasmodels/kernel_iq.c @ 2c108a3

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 2c108a3 was 2c108a3, checked in by Paul Kienzle <pkienzle@…>, 7 years ago

simplify magnetism code

  • Property mode set to 100644
File size: 22.1 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed
16//  NUM_PARS : the number of parameters in the parameter table
17//  NUM_VALUES : the number of values to skip at the start of the
18//      values array before you get to the dispersity values.
19//  PARAMETER_TABLE : list of parameter declarations used to create the
20//      ParameterTable type.
21//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
22//      included three times, once for each kernel type.
23//  MAGNETIC : defined when the magnetic kernel is being instantiated
24//  NUM_MAGNETIC : the number of magnetic parameters
25//  MAGNETIC_PARS : a comma-separated list of indices to the sld
26//      parameters in the parameter table.
27//  CALL_VOLUME(table) : call the form volume function
28//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
29//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
30//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
31//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
32//  INVALID(table) : test if the current point is feesible to calculate.  This
33//      will be defined in the kernel definition file.
34
35#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
36#define _PAR_BLOCK_
37
38typedef struct {
39#if MAX_PD > 0
40    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
41    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
42    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
43    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
44#endif // MAX_PD > 0
45    int32_t num_eval;           // total number of voxels in hypercube
46    int32_t num_weights;        // total length of the weights vector
47    int32_t num_active;         // number of non-trivial pd loops
48    int32_t theta_par;          // id of first orientation variable
49} ProblemDetails;
50
51// Intel HD 4000 needs private arrays to be a multiple of 4 long
52typedef struct {
53    PARAMETER_TABLE
54} ParameterTable;
55typedef union {
56    ParameterTable table;
57    double vector[4*((NUM_PARS+3)/4)];
58} ParameterBlock;
59#endif // _PAR_BLOCK_
60
61#if defined(MAGNETIC) && NUM_MAGNETIC > 0
62// ===== Helper functions for magnetism =====
63
64// Return value restricted between low and high
65static double clip(double value, double low, double high)
66{
67  return (value < low ? low : (value > high ? high : value));
68}
69
70// Compute spin cross sections given in_spin and out_spin
71// To convert spin cross sections to sld b:
72//     uu * (sld - m_sigma_x);
73//     dd * (sld + m_sigma_x);
74//     ud * (m_sigma_y - 1j*m_sigma_z);
75//     du * (m_sigma_y + 1j*m_sigma_z);
76// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
77static void set_spin_weights(double in_spin, double out_spin, double spins[4])
78{
79  in_spin = clip(in_spin, 0.0, 1.0);
80  out_spin = clip(out_spin, 0.0, 1.0);
81  spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
82  spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin));       // du real
83  spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin)));       // ud real
84  spins[3] = sqrt(sqrt(in_spin * out_spin));             // uu
85  spins[4] = spins[1]; // du imag
86  spins[5] = spins[2]; // ud imag
87}
88
89// Compute the magnetic sld
90static double mag_sld(
91  const int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag
92  const double qx, const double qy,
93  const double px, const double py,
94  const double sld,
95  const double mx, const double my, const double mz
96)
97{
98  if (xs < 4) {
99    const double perp = qy*mx - qx*my;
100    switch (xs) {
101      case 0: // uu => sld - D M_perpx
102          return sld - px*perp;
103      case 1: // ud real => -D M_perpy
104          return py*perp;
105      case 2: // du real => -D M_perpy
106          return py*perp;
107      case 3: // dd real => sld + D M_perpx
108          return sld + px*perp;
109    }
110  } else {
111    if (xs== 4) {
112      return -mz;  // ud imag => -D M_perpz
113    } else { // index == 5
114      return mz;   // du imag => D M_perpz
115    }
116  }
117}
118
119
120#endif
121
122// ===== Helper functions for orientation and jitter =====
123
124// To change the definition of the angles, run explore/angles.py, which
125// uses sympy to generate the equations.
126
127#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
128#define _QAC_SECTION
129
130typedef struct {
131    double R31, R32;
132} QACRotation;
133
134// Fill in the rotation matrix R from the view angles (theta, phi) and the
135// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
136// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
137static void
138qac_rotation(
139    QACRotation *rotation,
140    double theta, double phi,
141    double dtheta, double dphi)
142{
143    double sin_theta, cos_theta;
144    double sin_phi, cos_phi;
145
146    // reverse view matrix
147    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
148    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
149    const double V11 = cos_phi*cos_theta;
150    const double V12 = sin_phi*cos_theta;
151    const double V21 = -sin_phi;
152    const double V22 = cos_phi;
153    const double V31 = sin_theta*cos_phi;
154    const double V32 = sin_phi*sin_theta;
155
156    // reverse jitter matrix
157    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
158    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
159    const double J31 = sin_theta;
160    const double J32 = -sin_phi*cos_theta;
161    const double J33 = cos_phi*cos_theta;
162
163    // reverse matrix
164    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
165    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
166}
167
168// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
169// returning R*[qx,qy]' = [qa,qc]'
170static double
171qac_apply(
172    QACRotation rotation,
173    double qx, double qy,
174    double *qa_out, double *qc_out)
175{
176    const double dqc = rotation.R31*qx + rotation.R32*qy;
177    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
178    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy);
179
180    *qa_out = dqa;
181    *qc_out = dqc;
182}
183#endif // _QAC_SECTION
184
185#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
186#define _QABC_SECTION
187
188typedef struct {
189    double R11, R12;
190    double R21, R22;
191    double R31, R32;
192} QABCRotation;
193
194// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
195// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
196// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
197static void
198qabc_rotation(
199    QABCRotation *rotation,
200    double theta, double phi, double psi,
201    double dtheta, double dphi, double dpsi)
202{
203    double sin_theta, cos_theta;
204    double sin_phi, cos_phi;
205    double sin_psi, cos_psi;
206
207    // reverse view matrix
208    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
209    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
210    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
211    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
212    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
213    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
214    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
215    const double V31 = sin_theta*cos_phi;
216    const double V32 = sin_phi*sin_theta;
217
218    // reverse jitter matrix
219    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
220    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
221    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
222    const double J11 = cos_psi*cos_theta;
223    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
224    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
225    const double J21 = -sin_psi*cos_theta;
226    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
227    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
228    const double J31 = sin_theta;
229    const double J32 = -sin_phi*cos_theta;
230    const double J33 = cos_phi*cos_theta;
231
232    // reverse matrix
233    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
234    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
235    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
236    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
237    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
238    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
239}
240
241// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
242// returning R*[qx,qy]' = [qa,qb,qc]'
243static double
244qabc_apply(
245    QABCRotation rotation,
246    double qx, double qy,
247    double *qa_out, double *qb_out, double *qc_out)
248{
249    *qa_out = rotation.R11*qx + rotation.R12*qy;
250    *qb_out = rotation.R21*qx + rotation.R22*qy;
251    *qc_out = rotation.R31*qx + rotation.R32*qy;
252}
253
254#endif // _QABC_SECTION
255
256
257// ==================== KERNEL CODE ========================
258
259kernel
260void KERNEL_NAME(
261    int32_t nq,                 // number of q values
262    const int32_t pd_start,     // where we are in the dispersity loop
263    const int32_t pd_stop,      // where we are stopping in the dispersity loop
264    global const ProblemDetails *details,
265    global const double *values,
266    global const double *q, // nq q values, with padding to boundary
267    global double *result,  // nq+1 return values, again with padding
268    const double cutoff     // cutoff in the dispersity weight product
269    )
270{
271#ifdef USE_OPENCL
272  // who we are and what element we are working with
273  const int q_index = get_global_id(0);
274  if (q_index >= nq) return;
275#else
276  // Define q_index here so that debugging statements can be written to work
277  // for both OpenCL and DLL using:
278  //    if (q_index == 0) {printf(...);}
279  int q_index = 0;
280#endif
281
282  // Storage for the current parameter values.  These will be updated as we
283  // walk the dispersity mesh.
284  ParameterBlock local_values;
285
286#if defined(MAGNETIC) && NUM_MAGNETIC>0
287  // Location of the sld parameters in the parameter vector.
288  // These parameters are updated with the effective sld due to magnetism.
289  const int32_t slds[] = { MAGNETIC_PARS };
290
291  // Interpret polarization cross section.
292  //     up_frac_i = values[NUM_PARS+2];
293  //     up_frac_f = values[NUM_PARS+3];
294  //     up_angle = values[NUM_PARS+4];
295  // TODO: could precompute more magnetism parameters before calling the kernel.
296  double spins[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
297  double cos_mspin, sin_mspin;
298  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], spins);
299  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
300#endif // MAGNETIC
301
302  // Fill in the initial variables
303  //   values[0] is scale
304  //   values[1] is background
305  #ifdef USE_OPENMP
306  #pragma omp parallel for
307  #endif
308  for (int i=0; i < NUM_PARS; i++) {
309    local_values.vector[i] = values[2+i];
310//if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
311  }
312//if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
313//if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
314
315  // If pd_start is zero that means that we are starting a new calculation,
316  // and must initialize the result to zero.  Otherwise, we are restarting
317  // the calculation from somewhere in the middle of the dispersity mesh,
318  // and we update the value rather than reset it. Similarly for the
319  // normalization factor, which is stored as the final value in the
320  // results vector (one past the number of q values).
321  //
322  // The code differs slightly between opencl and dll since opencl is only
323  // seeing one q value (stored in the variable "this_result") while the dll
324  // version must loop over all q.
325  #ifdef USE_OPENCL
326    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
327    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
328  #else // !USE_OPENCL
329    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
330    if (pd_start == 0) {
331      #ifdef USE_OPENMP
332      #pragma omp parallel for
333      #endif
334      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
335    }
336//if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
337#endif // !USE_OPENCL
338
339
340// ====== macros to set up the parts of the loop =======
341/*
342Based on the level of the loop, uses C preprocessor magic to construct
343level-specific looping variables, including these from loop level 3:
344
345  int n3 : length of loop for mesh level 3
346  int i3 : current position in the loop for level 3, which is calculated
347       from a combination of pd_start, pd_stride[3] and pd_length[3].
348  int p3 : is the index into the parameter table for mesh level 3
349  double v3[] : pointer into dispersity array to values for loop 3
350  double w3[] : pointer into dispersity array to weights for loop 3
351  double weight3 : the product of weights from levels 3 and up, computed
352       as weight5*weight4*w3[i3].  Note that we need an outermost
353       value weight5 set to 1.0 for this to work properly.
354
355After expansion, the loop struction will look like the following:
356
357  // --- PD_INIT(4) ---
358  const int n4 = pd_length[4];
359  const int p4 = pd_par[4];
360  global const double *v4 = pd_value + pd_offset[4];
361  global const double *w4 = pd_weight + pd_offset[4];
362  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
363
364  // --- PD_INIT(3) ---
365  const int n3 = pd_length[3];
366  ...
367  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
368
369  PD_INIT(2)
370  PD_INIT(1)
371  PD_INIT(0)
372
373  // --- PD_OUTERMOST_WEIGHT(5) ---
374  const double weight5 = 1.0;
375
376  // --- PD_OPEN(4,5) ---
377  while (i4 < n4) {
378    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
379    const double weight4 = w4[i4] * weight5;
380
381    // from PD_OPEN(3,4)
382    while (i3 < n3) {
383      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
384      const double weight3 = w3[i3] * weight4;
385
386      PD_OPEN(3,2)
387      PD_OPEN(2,1)
388      PD_OPEN(0,1)
389
390      ... main loop body ...
391
392      ++step;  // increment counter representing position in dispersity mesh
393
394      PD_CLOSE(0)
395      PD_CLOSE(1)
396      PD_CLOSE(2)
397
398      // --- PD_CLOSE(3) ---
399      if (step >= pd_stop) break;
400      ++i3;
401    }
402    i3 = 0; // reset loop counter for next round through the loop
403
404    // --- PD_CLOSE(4) ---
405    if (step >= pd_stop) break;
406    ++i4;
407  }
408  i4 = 0; // reset loop counter even though no more rounds through the loop
409
410*/
411
412// Define looping variables
413#define PD_INIT(_LOOP) \
414  const int n##_LOOP = details->pd_length[_LOOP]; \
415  const int p##_LOOP = details->pd_par[_LOOP]; \
416  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
417  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
418  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
419
420// Jump into the middle of the dispersity loop
421#define PD_OPEN(_LOOP,_OUTER) \
422  while (i##_LOOP < n##_LOOP) { \
423    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
424    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
425
426// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
427#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
428#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
429
430// Close out the loop
431#define PD_CLOSE(_LOOP) \
432    if (step >= pd_stop) break; \
433    ++i##_LOOP; \
434  } \
435  i##_LOOP = 0;
436
437// The main loop body is essentially:
438//
439//    BUILD_ROTATION      // construct the rotation matrix qxy => qabc
440//    for each q
441//        FETCH_Q         // set qx,qy from the q input vector
442//        APPLY_ROTATION  // convert qx,qy to qa,qb,qc
443//        CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
444//
445// Depending on the shape type (radial, axial, triaxial), the variables
446// and calling parameters will be slightly different.  These macros
447// capture the differences in one spot so the rest of the code is easier
448// to read.
449#if defined(CALL_IQ)
450  // unoriented 1D
451  double qk;
452  #define FETCH_Q() do { qk = q[q_index]; } while (0)
453  #define BUILD_ROTATION() do {} while(0)
454  #define APPLY_ROTATION() do {} while(0)
455  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
456
457#elif defined(CALL_IQ_A)
458  // unoriented 2D
459  double qx, qy;
460  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
461  #define BUILD_ROTATION() do {} while(0)
462  #define APPLY_ROTATION() do {} while(0)
463  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
464
465#elif defined(CALL_IQ_AC)
466  // oriented symmetric 2D
467  double qx, qy;
468  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
469  double qa, qc;
470  QACRotation rotation;
471  // Grab the "view" angles (theta, phi) from the initial parameter table.
472  // These are separate from the "jitter" angles (dtheta, dphi) which are
473  // stored with the dispersity values and copied to the local parameter
474  // table as we go through the mesh.
475  const double theta = values[details->theta_par+2];
476  const double phi = values[details->theta_par+3];
477  #define BUILD_ROTATION() qac_rotation(&rotation, \
478    theta, phi, local_values.table.theta, local_values.table.phi)
479  #define APPLY_ROTATION() qac_apply(rotation, qx, qy, &qa, &qc)
480  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
481
482#elif defined(CALL_IQ_ABC)
483  // oriented asymmetric 2D
484  double qx, qy;
485  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
486  double qa, qb, qc;
487  QABCRotation rotation;
488  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
489  // These are separate from the "jitter" angles (dtheta, dphi, dpsi) which are
490  // stored with the dispersity values and copied to the local parameter
491  // table as we go through the mesh.
492  const double theta = values[details->theta_par+2];
493  const double phi = values[details->theta_par+3];
494  const double psi = values[details->theta_par+4];
495  #define BUILD_ROTATION() qabc_rotation(&rotation, \
496    theta, phi, psi, local_values.table.theta, \
497    local_values.table.phi, local_values.table.psi)
498  #define APPLY_ROTATION() qabc_apply(rotation, qx, qy, &qa, &qb, &qc)
499  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
500
501#endif
502
503// ====== construct the loops =======
504
505// Pointers to the start of the dispersity and weight vectors, if needed.
506#if MAX_PD>0
507  global const double *pd_value = values + NUM_VALUES;
508  global const double *pd_weight = pd_value + details->num_weights;
509#endif
510
511// The variable "step" is the current position in the dispersity loop.
512// It will be incremented each time a new point in the mesh is accumulated,
513// and used to test whether we have reached pd_stop.
514int step = pd_start;
515
516// define looping variables
517#if MAX_PD>4
518  PD_INIT(4)
519#endif
520#if MAX_PD>3
521  PD_INIT(3)
522#endif
523#if MAX_PD>2
524  PD_INIT(2)
525#endif
526#if MAX_PD>1
527  PD_INIT(1)
528#endif
529#if MAX_PD>0
530  PD_INIT(0)
531#endif
532
533// open nested loops
534PD_OUTERMOST_WEIGHT(MAX_PD)
535#if MAX_PD>4
536  PD_OPEN(4,5)
537#endif
538#if MAX_PD>3
539  PD_OPEN(3,4)
540#endif
541#if MAX_PD>2
542  PD_OPEN(2,3)
543#endif
544#if MAX_PD>1
545  PD_OPEN(1,2)
546#endif
547#if MAX_PD>0
548  PD_OPEN(0,1)
549#endif
550
551
552//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
553
554  // ====== loop body =======
555  #ifdef INVALID
556  if (!INVALID(local_values.table))
557  #endif
558  {
559    // Accumulate I(q)
560    // Note: weight==0 must always be excluded
561    if (weight0 > cutoff) {
562      pd_norm += weight0 * CALL_VOLUME(local_values.table);
563      BUILD_ROTATION();
564
565#ifndef USE_OPENCL
566      // DLL needs to explicitly loop over the q values.
567      #ifdef USE_OPENMP
568      #pragma omp parallel for
569      #endif
570      for (q_index=0; q_index<nq; q_index++)
571#endif // !USE_OPENCL
572      {
573
574        FETCH_Q();
575        APPLY_ROTATION();
576
577        // ======= COMPUTE SCATTERING ==========
578        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
579          // Compute the scattering from the magnetic cross sections.
580          double scattering = 0.0;
581          const double qsq = qx*qx + qy*qy;
582          if (qsq > 1.e-16) {
583            // TODO: what is the magnetic scattering at q=0
584            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
585            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
586
587            // loop over uu, ud real, du real, dd, ud imag, du imag
588            for (int xs=0; xs<6; xs++) {
589              const double xs_weight = spins[xs];
590              if (xs_weight > 1.e-8) {
591                // Since the cross section weight is significant, set the slds
592                // to the effective slds for this cross section, call the
593                // kernel, and add according to weight.
594                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
595                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
596                  const int32_t sld_index = slds[sk];
597                  const double mx = values[mag_index];
598                  const double my = values[mag_index+1];
599                  const double mz = values[mag_index+2];
600                  local_values.vector[sld_index] =
601                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
602                }
603                scattering += xs_weight * CALL_KERNEL();
604              }
605            }
606          }
607        #else  // !MAGNETIC
608          const double scattering = CALL_KERNEL();
609        #endif // !MAGNETIC
610//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight, weight0);
611
612        #ifdef USE_OPENCL
613          this_result += weight0 * scattering;
614        #else // !USE_OPENCL
615          result[q_index] += weight0 * scattering;
616        #endif // !USE_OPENCL
617      }
618    }
619  }
620
621// close nested loops
622++step;
623#if MAX_PD>0
624  PD_CLOSE(0)
625#endif
626#if MAX_PD>1
627  PD_CLOSE(1)
628#endif
629#if MAX_PD>2
630  PD_CLOSE(2)
631#endif
632#if MAX_PD>3
633  PD_CLOSE(3)
634#endif
635#if MAX_PD>4
636  PD_CLOSE(4)
637#endif
638
639// Remember the current result and the updated norm.
640#ifdef USE_OPENCL
641  result[q_index] = this_result;
642  if (q_index == 0) result[nq] = pd_norm;
643//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
644#else // !USE_OPENCL
645  result[nq] = pd_norm;
646//printf("res: %g/%g\n", result[0], pd_norm);
647#endif // !USE_OPENCL
648
649// clear the macros in preparation for the next kernel
650#undef PD_INIT
651#undef PD_OPEN
652#undef PD_CLOSE
653#undef FETCH_Q
654#undef BUILD_ROTATION
655#undef APPLY_ROTATION
656#undef CALL_KERNEL
657}
Note: See TracBrowser for help on using the repository browser.