source: sasmodels/sasmodels/kernel_iq.c @ ff10479

core_shell_microgelsmagnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since ff10479 was ff10479, checked in by Paul Kienzle <pkienzle@…>, 6 years ago

set projection used in theory to that selected in jitter.py

  • Property mode set to 100644
File size: 23.0 KB
Line 
1/*
2    ##########################################################
3    #                                                        #
4    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
5    #   !!                                              !!   #
6    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
7    #   !!                                              !!   #
8    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
9    #                                                        #
10    ##########################################################
11*/
12
13// NOTE: the following macros are defined in generate.py:
14//
15//  MAX_PD : the maximum number of dispersity loops allowed
16//  NUM_PARS : the number of parameters in the parameter table
17//  NUM_VALUES : the number of values to skip at the start of the
18//      values array before you get to the dispersity values.
19//  PARAMETER_TABLE : list of parameter declarations used to create the
20//      ParameterTable type.
21//  KERNEL_NAME : model_Iq, model_Iqxy or model_Imagnetic.  This code is
22//      included three times, once for each kernel type.
23//  MAGNETIC : defined when the magnetic kernel is being instantiated
24//  NUM_MAGNETIC : the number of magnetic parameters
25//  MAGNETIC_PARS : a comma-separated list of indices to the sld
26//      parameters in the parameter table.
27//  CALL_VOLUME(table) : call the form volume function
28//  CALL_IQ(q, table) : call the Iq function for 1D calcs.
29//  CALL_IQ_A(q, table) : call the Iq function with |q| for 2D data.
30//  CALL_IQ_AC(qa, qc, table) : call the Iqxy function for symmetric shapes
31//  CALL_IQ_ABC(qa, qc, table) : call the Iqxy function for asymmetric shapes
32//  INVALID(table) : test if the current point is feesible to calculate.  This
33//      will be defined in the kernel definition file.
34//  PROJECTION : equirectangular=1, sinusoidal=2
35//      see explore/jitter.py for definitions.
36
37#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
38#define _PAR_BLOCK_
39
40typedef struct {
41#if MAX_PD > 0
42    int32_t pd_par[MAX_PD];     // id of the nth dispersity variable
43    int32_t pd_length[MAX_PD];  // length of the nth dispersity weight vector
44    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
45    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
46#endif // MAX_PD > 0
47    int32_t num_eval;           // total number of voxels in hypercube
48    int32_t num_weights;        // total length of the weights vector
49    int32_t num_active;         // number of non-trivial pd loops
50    int32_t theta_par;          // id of first orientation variable
51} ProblemDetails;
52
53// Intel HD 4000 needs private arrays to be a multiple of 4 long
54typedef struct {
55    PARAMETER_TABLE
56} ParameterTable;
57typedef union {
58    ParameterTable table;
59    double vector[4*((NUM_PARS+3)/4)];
60} ParameterBlock;
61#endif // _PAR_BLOCK_
62
63#if defined(MAGNETIC) && NUM_MAGNETIC > 0
64// ===== Helper functions for magnetism =====
65
66// Return value restricted between low and high
67static double clip(double value, double low, double high)
68{
69  return (value < low ? low : (value > high ? high : value));
70}
71
72// Compute spin cross sections given in_spin and out_spin
73// To convert spin cross sections to sld b:
74//     uu * (sld - m_sigma_x);
75//     dd * (sld + m_sigma_x);
76//     ud * (m_sigma_y - 1j*m_sigma_z);
77//     du * (m_sigma_y + 1j*m_sigma_z);
78// weights for spin crosssections: dd du real, ud real, uu, du imag, ud imag
79static void set_spin_weights(double in_spin, double out_spin, double spins[4])
80{
81  in_spin = clip(in_spin, 0.0, 1.0);
82  out_spin = clip(out_spin, 0.0, 1.0);
83  spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
84  spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin));       // du real
85  spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin)));       // ud real
86  spins[3] = sqrt(sqrt(in_spin * out_spin));             // uu
87  spins[4] = spins[1]; // du imag
88  spins[5] = spins[2]; // ud imag
89}
90
91// Compute the magnetic sld
92static double mag_sld(
93  const int xs, // 0=dd, 1=du real, 2=ud real, 3=uu, 4=du imag, 5=up imag
94  const double qx, const double qy,
95  const double px, const double py,
96  const double sld,
97  const double mx, const double my, const double mz
98)
99{
100  if (xs < 4) {
101    const double perp = qy*mx - qx*my;
102    switch (xs) {
103      case 0: // uu => sld - D M_perpx
104          return sld - px*perp;
105      case 1: // ud real => -D M_perpy
106          return py*perp;
107      case 2: // du real => -D M_perpy
108          return py*perp;
109      case 3: // dd real => sld + D M_perpx
110          return sld + px*perp;
111    }
112  } else {
113    if (xs== 4) {
114      return -mz;  // ud imag => -D M_perpz
115    } else { // index == 5
116      return mz;   // du imag => D M_perpz
117    }
118  }
119}
120
121
122#endif
123
124// ===== Helper functions for orientation and jitter =====
125
126// To change the definition of the angles, run explore/angles.py, which
127// uses sympy to generate the equations.
128
129#if !defined(_QAC_SECTION) && defined(CALL_IQ_AC)
130#define _QAC_SECTION
131
132typedef struct {
133    double R31, R32;
134} QACRotation;
135
136// Fill in the rotation matrix R from the view angles (theta, phi) and the
137// jitter angles (dtheta, dphi).  This matrix can be applied to all of the
138// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
139static void
140qac_rotation(
141    QACRotation *rotation,
142    double theta, double phi,
143    double dtheta, double dphi)
144{
145    double sin_theta, cos_theta;
146    double sin_phi, cos_phi;
147
148    // reverse view matrix
149    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
150    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
151    const double V11 = cos_phi*cos_theta;
152    const double V12 = sin_phi*cos_theta;
153    const double V21 = -sin_phi;
154    const double V22 = cos_phi;
155    const double V31 = sin_theta*cos_phi;
156    const double V32 = sin_phi*sin_theta;
157
158    // reverse jitter matrix
159    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
160    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
161    const double J31 = sin_theta;
162    const double J32 = -sin_phi*cos_theta;
163    const double J33 = cos_phi*cos_theta;
164
165    // reverse matrix
166    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
167    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
168}
169
170// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
171// returning R*[qx,qy]' = [qa,qc]'
172static double
173qac_apply(
174    QACRotation rotation,
175    double qx, double qy,
176    double *qa_out, double *qc_out)
177{
178    const double dqc = rotation.R31*qx + rotation.R32*qy;
179    // Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
180    const double dqa = sqrt(-dqc*dqc + qx*qx + qy*qy);
181
182    *qa_out = dqa;
183    *qc_out = dqc;
184}
185#endif // _QAC_SECTION
186
187#if !defined(_QABC_SECTION) && defined(CALL_IQ_ABC)
188#define _QABC_SECTION
189
190typedef struct {
191    double R11, R12;
192    double R21, R22;
193    double R31, R32;
194} QABCRotation;
195
196// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
197// jitter angles (dtheta, dphi, dpsi).  This matrix can be applied to all of the
198// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
199static void
200qabc_rotation(
201    QABCRotation *rotation,
202    double theta, double phi, double psi,
203    double dtheta, double dphi, double dpsi)
204{
205    double sin_theta, cos_theta;
206    double sin_phi, cos_phi;
207    double sin_psi, cos_psi;
208
209    // reverse view matrix
210    SINCOS(theta*M_PI_180, sin_theta, cos_theta);
211    SINCOS(phi*M_PI_180, sin_phi, cos_phi);
212    SINCOS(psi*M_PI_180, sin_psi, cos_psi);
213    const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
214    const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
215    const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
216    const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
217    const double V31 = sin_theta*cos_phi;
218    const double V32 = sin_phi*sin_theta;
219
220    // reverse jitter matrix
221    SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
222    SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
223    SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
224    const double J11 = cos_psi*cos_theta;
225    const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
226    const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
227    const double J21 = -sin_psi*cos_theta;
228    const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
229    const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
230    const double J31 = sin_theta;
231    const double J32 = -sin_phi*cos_theta;
232    const double J33 = cos_phi*cos_theta;
233
234    // reverse matrix
235    rotation->R11 = J11*V11 + J12*V21 + J13*V31;
236    rotation->R12 = J11*V12 + J12*V22 + J13*V32;
237    rotation->R21 = J21*V11 + J22*V21 + J23*V31;
238    rotation->R22 = J21*V12 + J22*V22 + J23*V32;
239    rotation->R31 = J31*V11 + J32*V21 + J33*V31;
240    rotation->R32 = J31*V12 + J32*V22 + J33*V32;
241}
242
243// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
244// returning R*[qx,qy]' = [qa,qb,qc]'
245static double
246qabc_apply(
247    QABCRotation rotation,
248    double qx, double qy,
249    double *qa_out, double *qb_out, double *qc_out)
250{
251    *qa_out = rotation.R11*qx + rotation.R12*qy;
252    *qb_out = rotation.R21*qx + rotation.R22*qy;
253    *qc_out = rotation.R31*qx + rotation.R32*qy;
254}
255
256#endif // _QABC_SECTION
257
258
259// ==================== KERNEL CODE ========================
260
261kernel
262void KERNEL_NAME(
263    int32_t nq,                 // number of q values
264    const int32_t pd_start,     // where we are in the dispersity loop
265    const int32_t pd_stop,      // where we are stopping in the dispersity loop
266    global const ProblemDetails *details,
267    global const double *values,
268    global const double *q, // nq q values, with padding to boundary
269    global double *result,  // nq+1 return values, again with padding
270    const double cutoff     // cutoff in the dispersity weight product
271    )
272{
273#ifdef USE_OPENCL
274  // who we are and what element we are working with
275  const int q_index = get_global_id(0);
276  if (q_index >= nq) return;
277#else
278  // Define q_index here so that debugging statements can be written to work
279  // for both OpenCL and DLL using:
280  //    if (q_index == 0) {printf(...);}
281  int q_index = 0;
282#endif
283
284  // Storage for the current parameter values.  These will be updated as we
285  // walk the dispersity mesh.
286  ParameterBlock local_values;
287
288#if defined(MAGNETIC) && NUM_MAGNETIC>0
289  // Location of the sld parameters in the parameter vector.
290  // These parameters are updated with the effective sld due to magnetism.
291  const int32_t slds[] = { MAGNETIC_PARS };
292
293  // Interpret polarization cross section.
294  //     up_frac_i = values[NUM_PARS+2];
295  //     up_frac_f = values[NUM_PARS+3];
296  //     up_angle = values[NUM_PARS+4];
297  // TODO: could precompute more magnetism parameters before calling the kernel.
298  double spins[8];  // uu, ud real, du real, dd, ud imag, du imag, fill, fill
299  double cos_mspin, sin_mspin;
300  set_spin_weights(values[NUM_PARS+2], values[NUM_PARS+3], spins);
301  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
302#endif // MAGNETIC
303
304  // Fill in the initial variables
305  //   values[0] is scale
306  //   values[1] is background
307  #ifdef USE_OPENMP
308  #pragma omp parallel for
309  #endif
310  for (int i=0; i < NUM_PARS; i++) {
311    local_values.vector[i] = values[2+i];
312//if (q_index==0) printf("p%d = %g\n",i, local_values.vector[i]);
313  }
314//if (q_index==0) printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
315//if (q_index==0) printf("start:%d  stop:%d\n", pd_start, pd_stop);
316
317  // If pd_start is zero that means that we are starting a new calculation,
318  // and must initialize the result to zero.  Otherwise, we are restarting
319  // the calculation from somewhere in the middle of the dispersity mesh,
320  // and we update the value rather than reset it. Similarly for the
321  // normalization factor, which is stored as the final value in the
322  // results vector (one past the number of q values).
323  //
324  // The code differs slightly between opencl and dll since opencl is only
325  // seeing one q value (stored in the variable "this_result") while the dll
326  // version must loop over all q.
327  #ifdef USE_OPENCL
328    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
329    double this_result = (pd_start == 0 ? 0.0 : result[q_index]);
330  #else // !USE_OPENCL
331    double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
332    if (pd_start == 0) {
333      #ifdef USE_OPENMP
334      #pragma omp parallel for
335      #endif
336      for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
337    }
338//if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
339#endif // !USE_OPENCL
340
341
342// ====== macros to set up the parts of the loop =======
343/*
344Based on the level of the loop, uses C preprocessor magic to construct
345level-specific looping variables, including these from loop level 3:
346
347  int n3 : length of loop for mesh level 3
348  int i3 : current position in the loop for level 3, which is calculated
349       from a combination of pd_start, pd_stride[3] and pd_length[3].
350  int p3 : is the index into the parameter table for mesh level 3
351  double v3[] : pointer into dispersity array to values for loop 3
352  double w3[] : pointer into dispersity array to weights for loop 3
353  double weight3 : the product of weights from levels 3 and up, computed
354       as weight5*weight4*w3[i3].  Note that we need an outermost
355       value weight5 set to 1.0 for this to work properly.
356
357After expansion, the loop struction will look like the following:
358
359  // --- PD_INIT(4) ---
360  const int n4 = pd_length[4];
361  const int p4 = pd_par[4];
362  global const double *v4 = pd_value + pd_offset[4];
363  global const double *w4 = pd_weight + pd_offset[4];
364  int i4 = (pd_start/pd_stride[4])%n4;  // position in level 4 at pd_start
365
366  // --- PD_INIT(3) ---
367  const int n3 = pd_length[3];
368  ...
369  int i3 = (pd_start/pd_stride[3])%n3;  // position in level 3 at pd_start
370
371  PD_INIT(2)
372  PD_INIT(1)
373  PD_INIT(0)
374
375  // --- PD_OUTERMOST_WEIGHT(5) ---
376  const double weight5 = 1.0;
377
378  // --- PD_OPEN(4,5) ---
379  while (i4 < n4) {
380    parameter[p4] = v4[i4];  // set the value for pd parameter 4 at this mesh point
381    const double weight4 = w4[i4] * weight5;
382
383    // from PD_OPEN(3,4)
384    while (i3 < n3) {
385      parameter[p3] = v3[i3];  // set the value for pd parameter 3 at this mesh point
386      const double weight3 = w3[i3] * weight4;
387
388      PD_OPEN(3,2)
389      PD_OPEN(2,1)
390      PD_OPEN(0,1)
391
392      ... main loop body ...
393
394      ++step;  // increment counter representing position in dispersity mesh
395
396      PD_CLOSE(0)
397      PD_CLOSE(1)
398      PD_CLOSE(2)
399
400      // --- PD_CLOSE(3) ---
401      if (step >= pd_stop) break;
402      ++i3;
403    }
404    i3 = 0; // reset loop counter for next round through the loop
405
406    // --- PD_CLOSE(4) ---
407    if (step >= pd_stop) break;
408    ++i4;
409  }
410  i4 = 0; // reset loop counter even though no more rounds through the loop
411
412*/
413
414// Define looping variables
415#define PD_INIT(_LOOP) \
416  const int n##_LOOP = details->pd_length[_LOOP]; \
417  const int p##_LOOP = details->pd_par[_LOOP]; \
418  global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \
419  global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \
420  int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP;
421
422// Jump into the middle of the dispersity loop
423#define PD_OPEN(_LOOP,_OUTER) \
424  while (i##_LOOP < n##_LOOP) { \
425    local_values.vector[p##_LOOP] = v##_LOOP[i##_LOOP]; \
426    const double weight##_LOOP = w##_LOOP[i##_LOOP] * weight##_OUTER;
427
428// create the variable "weight#=1.0" where # is the outermost level+1 (=MAX_PD).
429#define _PD_OUTERMOST_WEIGHT(_n) const double weight##_n = 1.0;
430#define PD_OUTERMOST_WEIGHT(_n) _PD_OUTERMOST_WEIGHT(_n)
431
432// Close out the loop
433#define PD_CLOSE(_LOOP) \
434    if (step >= pd_stop) break; \
435    ++i##_LOOP; \
436  } \
437  i##_LOOP = 0;
438
439// The main loop body is essentially:
440//
441//    BUILD_ROTATION      // construct the rotation matrix qxy => qabc
442//    for each q
443//        FETCH_Q         // set qx,qy from the q input vector
444//        APPLY_ROTATION  // convert qx,qy to qa,qb,qc
445//        CALL_KERNEL     // scattering = Iqxy(qa, qb, qc, p1, p2, ...)
446//
447// Depending on the shape type (radial, axial, triaxial), the variables
448// and calling parameters will be slightly different.  These macros
449// capture the differences in one spot so the rest of the code is easier
450// to read.
451
452#if defined(CALL_IQ)
453  // unoriented 1D
454  double qk;
455  #define FETCH_Q() do { qk = q[q_index]; } while (0)
456  #define BUILD_ROTATION() do {} while(0)
457  #define APPLY_ROTATION() do {} while(0)
458  #define CALL_KERNEL() CALL_IQ(qk, local_values.table)
459
460#elif defined(CALL_IQ_A)
461  // unoriented 2D
462  double qx, qy;
463  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
464  #define BUILD_ROTATION() do {} while(0)
465  #define APPLY_ROTATION() do {} while(0)
466  #define CALL_KERNEL() CALL_IQ_A(sqrt(qx*qx+qy*qy), local_values.table)
467
468#elif defined(CALL_IQ_AC)
469  // oriented symmetric 2D
470  double qx, qy;
471  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
472  double qa, qc;
473  QACRotation rotation;
474  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
475  #define BUILD_ROTATION() qac_rotation(&rotation, theta, phi, dtheta, dphi);
476  #define APPLY_ROTATION() qac_apply(rotation, qx, qy, &qa, &qc)
477  #define CALL_KERNEL() CALL_IQ_AC(qa, qc, local_values.table)
478
479#elif defined(CALL_IQ_ABC)
480  // oriented asymmetric 2D
481  double qx, qy;
482  #define FETCH_Q() do { qx = q[2*q_index]; qy = q[2*q_index+1]; } while (0)
483  double qa, qb, qc;
484  QABCRotation rotation;
485  // theta, phi, dtheta, dphi are defined below in projection to avoid repeated code.
486  // psi and dpsi are only for IQ_ABC, so they are processed here.
487  const double psi = values[details->theta_par+4];
488  #define BUILD_ROTATION() qabc_rotation(&rotation, theta, phi, psi, dtheta, dphi, local_values.table.psi)
489  #define APPLY_ROTATION() qabc_apply(rotation, qx, qy, &qa, &qb, &qc)
490  #define CALL_KERNEL() CALL_IQ_ABC(qa, qb, qc, local_values.table)
491#endif
492
493// Doing jitter projection code outside the previous if block so that we don't
494// need to repeat the identical logic in the IQ_AC and IQ_ABC branches.  This
495// will become more important if we implement more projections, or more
496// complicated projections.
497#if defined(CALL_IQ) || defined(CALL_IQ_A)
498  #define APPLY_PROJECTION() const double weight=weight0
499#else // !spherosymmetric projection
500  // Grab the "view" angles (theta, phi, psi) from the initial parameter table.
501  const double theta = values[details->theta_par+2];
502  const double phi = values[details->theta_par+3];
503  // The "jitter" angles (dtheta, dphi, dpsi) are stored with the
504  // dispersity values and copied to the local parameter table as
505  // we go through the mesh.
506  double dtheta, dphi, weight;
507  #if PROJECTION == 1
508    #define APPLY_PROJECTION() do { \
509      dtheta = local_values.table.theta; \
510      dphi = local_values.table.phi; \
511      weight = fabs(cos(dtheta*M_PI_180)) * weight0; \
512    } while (0)
513  #elif PROJECTION == 2
514    #define APPLY_PROJECTION() do { \
515      dtheta = local_values.table.theta; \
516      dphi = local_values.table.phi; \
517      weight = weight0; \
518      if (dtheta != 90.0) dphi /= cos(dtheta*M_PI_180); \
519      else if (dphi != 0.0) weight = 0.; \
520      if (fabs(dphi) >= 180.) weight = 0.; \
521    } while (0)
522  #endif
523#endif // !spherosymmetric projection
524
525
526// ====== construct the loops =======
527
528// Pointers to the start of the dispersity and weight vectors, if needed.
529#if MAX_PD>0
530  global const double *pd_value = values + NUM_VALUES;
531  global const double *pd_weight = pd_value + details->num_weights;
532#endif
533
534// The variable "step" is the current position in the dispersity loop.
535// It will be incremented each time a new point in the mesh is accumulated,
536// and used to test whether we have reached pd_stop.
537int step = pd_start;
538
539// define looping variables
540#if MAX_PD>4
541  PD_INIT(4)
542#endif
543#if MAX_PD>3
544  PD_INIT(3)
545#endif
546#if MAX_PD>2
547  PD_INIT(2)
548#endif
549#if MAX_PD>1
550  PD_INIT(1)
551#endif
552#if MAX_PD>0
553  PD_INIT(0)
554#endif
555
556// open nested loops
557PD_OUTERMOST_WEIGHT(MAX_PD)
558#if MAX_PD>4
559  PD_OPEN(4,5)
560#endif
561#if MAX_PD>3
562  PD_OPEN(3,4)
563#endif
564#if MAX_PD>2
565  PD_OPEN(2,3)
566#endif
567#if MAX_PD>1
568  PD_OPEN(1,2)
569#endif
570#if MAX_PD>0
571  PD_OPEN(0,1)
572#endif
573
574
575//if (q_index==0) {printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");}
576
577  // ====== loop body =======
578  #ifdef INVALID
579  if (!INVALID(local_values.table))
580  #endif
581  {
582     APPLY_PROJECTION();
583
584    // Accumulate I(q)
585    // Note: weight==0 must always be excluded
586    if (weight > cutoff) {
587      pd_norm += weight * CALL_VOLUME(local_values.table);
588      BUILD_ROTATION();
589
590#ifndef USE_OPENCL
591      // DLL needs to explicitly loop over the q values.
592      #ifdef USE_OPENMP
593      #pragma omp parallel for
594      #endif
595      for (q_index=0; q_index<nq; q_index++)
596#endif // !USE_OPENCL
597      {
598
599        FETCH_Q();
600        APPLY_ROTATION();
601
602        // ======= COMPUTE SCATTERING ==========
603        #if defined(MAGNETIC) && NUM_MAGNETIC > 0
604          // Compute the scattering from the magnetic cross sections.
605          double scattering = 0.0;
606          const double qsq = qx*qx + qy*qy;
607          if (qsq > 1.e-16) {
608            // TODO: what is the magnetic scattering at q=0
609            const double px = (qy*cos_mspin + qx*sin_mspin)/qsq;
610            const double py = (qy*sin_mspin - qx*cos_mspin)/qsq;
611
612            // loop over uu, ud real, du real, dd, ud imag, du imag
613            for (int xs=0; xs<6; xs++) {
614              const double xs_weight = spins[xs];
615              if (xs_weight > 1.e-8) {
616                // Since the cross section weight is significant, set the slds
617                // to the effective slds for this cross section, call the
618                // kernel, and add according to weight.
619                for (int sk=0; sk<NUM_MAGNETIC; sk++) {
620                  const int32_t mag_index = NUM_PARS+5 + 3*sk;
621                  const int32_t sld_index = slds[sk];
622                  const double mx = values[mag_index];
623                  const double my = values[mag_index+1];
624                  const double mz = values[mag_index+2];
625                  local_values.vector[sld_index] =
626                    mag_sld(xs, qx, qy, px, py, values[sld_index+2], mx, my, mz);
627                }
628                scattering += xs_weight * CALL_KERNEL();
629              }
630            }
631          }
632        #else  // !MAGNETIC
633          const double scattering = CALL_KERNEL();
634        #endif // !MAGNETIC
635//printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0);
636
637        #ifdef USE_OPENCL
638          this_result += weight * scattering;
639        #else // !USE_OPENCL
640          result[q_index] += weight * scattering;
641        #endif // !USE_OPENCL
642      }
643    }
644  }
645
646// close nested loops
647++step;
648#if MAX_PD>0
649  PD_CLOSE(0)
650#endif
651#if MAX_PD>1
652  PD_CLOSE(1)
653#endif
654#if MAX_PD>2
655  PD_CLOSE(2)
656#endif
657#if MAX_PD>3
658  PD_CLOSE(3)
659#endif
660#if MAX_PD>4
661  PD_CLOSE(4)
662#endif
663
664// Remember the current result and the updated norm.
665#ifdef USE_OPENCL
666  result[q_index] = this_result;
667  if (q_index == 0) result[nq] = pd_norm;
668//if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm);
669#else // !USE_OPENCL
670  result[nq] = pd_norm;
671//printf("res: %g/%g\n", result[0], pd_norm);
672#endif // !USE_OPENCL
673
674// clear the macros in preparation for the next kernel
675#undef PD_INIT
676#undef PD_OPEN
677#undef PD_CLOSE
678#undef FETCH_Q
679#undef APPLY_PROJECTION
680#undef BUILD_ROTATION
681#undef APPLY_ROTATION
682#undef CALL_KERNEL
683}
Note: See TracBrowser for help on using the repository browser.