source: sasmodels/sasmodels/kernel_iq.c @ b32caab

core_shell_microgelscostrafo411magnetic_modelticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since b32caab was bde38b5, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

simplify kernel calling

  • Property mode set to 100644
File size: 11.7 KB
RevLine 
[2e44ac7]1
2/*
3    ##########################################################
4    #                                                        #
5    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
6    #   !!                                              !!   #
7    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
8    #   !!                                              !!   #
9    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
10    #                                                        #
11    ##########################################################
12*/
13
[03cac08]14#ifndef _PAR_BLOCK_ // protected block so we can include this code twice.
15#define _PAR_BLOCK_
[2e44ac7]16
17typedef struct {
[60eab2a]18#if MAX_PD > 0
[a6f9577]19    int32_t pd_par[MAX_PD];     // id of the nth polydispersity variable
[5cf3c33]20    int32_t pd_length[MAX_PD];  // length of the nth polydispersity weight vector
[0a7e5eb4]21    int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
[5cf3c33]22    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
[60eab2a]23#endif // MAX_PD > 0
[bde38b5]24    int32_t num_eval;           // total number of voxels in hypercube
25    int32_t num_weights;        // total length of the weights vector
[5ff1b03]26    int32_t num_active;         // number of non-trivial pd loops
[0a7e5eb4]27    int32_t theta_par;          // id of spherical correction variable
[2e44ac7]28} ProblemDetails;
29
[bde38b5]30// Intel HD 4000 needs private arrays to be a multiple of 4 long
[2e44ac7]31typedef struct {
[56547a8]32    PARAMETER_TABLE
[bde38b5]33} ParameterTable;
34typedef union {
35    ParameterTable table;
36    double vector[4*((NUM_PARS+3)/4)];
[2e44ac7]37} ParameterBlock;
[9eb3632]38#endif // _PAR_BLOCK_
[03cac08]39
[32e3c9b]40
[a4280bd]41#if defined(MAGNETIC) && NUM_MAGNETIC>0
42
[32e3c9b]43// Return value restricted between low and high
44static double clip(double value, double low, double high)
45{
[b966a96]46  return (value < low ? low : (value > high ? high : value));
[32e3c9b]47}
48
49// Compute spin cross sections given in_spin and out_spin
50// To convert spin cross sections to sld b:
51//     uu * (sld - m_sigma_x);
52//     dd * (sld + m_sigma_x);
53//     ud * (m_sigma_y + 1j*m_sigma_z);
54//     du * (m_sigma_y - 1j*m_sigma_z);
[a4280bd]55static void set_spins(double in_spin, double out_spin, double spins[4])
[32e3c9b]56{
[b966a96]57  in_spin = clip(in_spin, 0.0, 1.0);
58  out_spin = clip(out_spin, 0.0, 1.0);
[a4280bd]59  spins[0] = sqrt(sqrt((1.0-in_spin) * (1.0-out_spin))); // dd
60  spins[1] = sqrt(sqrt((1.0-in_spin) * out_spin));       // du
61  spins[2] = sqrt(sqrt(in_spin * (1.0-out_spin)));       // ud
62  spins[3] = sqrt(sqrt(in_spin * out_spin));             // uu
63}
64
65static double mag_sld(double qx, double qy, double p,
66                       double mx, double my, double sld)
67{
68    const double perp = qy*mx - qx*my;
69    return sld + perp*p;
[32e3c9b]70}
71
[9eb3632]72#endif // MAGNETIC
[2e44ac7]73
[03cac08]74kernel
75void KERNEL_NAME(
[5cf3c33]76    int32_t nq,                 // number of q values
77    const int32_t pd_start,     // where we are in the polydispersity loop
78    const int32_t pd_stop,      // where we are stopping in the polydispersity loop
[6e7ff6d]79    global const ProblemDetails *details,
[9eb3632]80    global const double *values,
[2e44ac7]81    global const double *q, // nq q values, with padding to boundary
[9eb3632]82    global double *result,  // nq+1 return values, again with padding
[303d8d6]83    const double cutoff     // cutoff in the polydispersity weight product
[2e44ac7]84    )
85{
[10ddb64]86  // Storage for the current parameter values.  These will be updated as we
[bde38b5]87  // walk the polydispersity cube.
[9eb3632]88  ParameterBlock local_values;
[2e44ac7]89
[a4280bd]90#if defined(MAGNETIC) && NUM_MAGNETIC>0
[bde38b5]91  // Location of the sld parameters in the parameter vector.
[9eb3632]92  // These parameters are updated with the effective sld due to magnetism.
[a4280bd]93  #if NUM_MAGNETIC > 3
[9eb3632]94  const int32_t slds[] = { MAGNETIC_PARS };
[a4280bd]95  #endif
[32e3c9b]96
[9eb3632]97  // TODO: could precompute these outside of the kernel.
[32e3c9b]98  // Interpret polarization cross section.
[a4280bd]99  //     up_frac_i = values[NUM_PARS+2];
100  //     up_frac_f = values[NUM_PARS+3];
101  //     up_angle = values[NUM_PARS+4];
102  double spins[4];
[32e3c9b]103  double cos_mspin, sin_mspin;
[a4280bd]104  set_spins(values[NUM_PARS+2], values[NUM_PARS+3], spins);
105  SINCOS(-values[NUM_PARS+4]*M_PI_180, sin_mspin, cos_mspin);
[9eb3632]106#endif // MAGNETIC
[3044216]107
[9eb3632]108  // Fill in the initial variables
109  //   values[0] is scale
110  //   values[1] is background
111  #ifdef USE_OPENMP
112  #pragma omp parallel for
113  #endif
[a4280bd]114  for (int i=0; i < NUM_PARS; i++) {
[bde38b5]115    local_values.vector[i] = values[2+i];
116//printf("p%d = %g\n",i, local_values.vector[i]);
[9eb3632]117  }
[bde38b5]118//printf("NUM_VALUES:%d  NUM_PARS:%d  MAX_PD:%d\n", NUM_VALUES, NUM_PARS, MAX_PD);
119//printf("start:%d  stop:%d\n", pd_start, pd_stop);
[f2f67a6]120
[bde38b5]121  double pd_norm = (pd_start == 0 ? 0.0 : result[nq]);
[9eb3632]122  if (pd_start == 0) {
[2e44ac7]123    #ifdef USE_OPENMP
124    #pragma omp parallel for
125    #endif
[9eb3632]126    for (int q_index=0; q_index < nq; q_index++) result[q_index] = 0.0;
[2e44ac7]127  }
[9eb3632]128//printf("start %d %g %g\n", pd_start, pd_norm, result[0]);
129
[7b7da6b]130#if MAX_PD>0
[bde38b5]131  global const double *pd_value = values + NUM_VALUES;
132  global const double *pd_weight = pd_value + details->num_weights;
[7b7da6b]133#endif
[9eb3632]134
135  // Jump into the middle of the polydispersity loop
136#if MAX_PD>4
137  int n4=details->pd_length[4];
138  int i4=(pd_start/details->pd_stride[4])%n4;
139  const int p4=details->pd_par[4];
140  global const double *v4 = pd_value + details->pd_offset[4];
141  global const double *w4 = pd_weight + details->pd_offset[4];
142#endif
143#if MAX_PD>3
144  int n3=details->pd_length[3];
145  int i3=(pd_start/details->pd_stride[3])%n3;
146  const int p3=details->pd_par[3];
147  global const double *v3 = pd_value + details->pd_offset[3];
148  global const double *w3 = pd_weight + details->pd_offset[3];
149//printf("offset %d: %d %d\n", 3, details->pd_offset[3], NUM_VALUES);
150#endif
151#if MAX_PD>2
152  int n2=details->pd_length[2];
153  int i2=(pd_start/details->pd_stride[2])%n2;
154  const int p2=details->pd_par[2];
155  global const double *v2 = pd_value + details->pd_offset[2];
156  global const double *w2 = pd_weight + details->pd_offset[2];
157#endif
158#if MAX_PD>1
159  int n1=details->pd_length[1];
160  int i1=(pd_start/details->pd_stride[1])%n1;
161  const int p1=details->pd_par[1];
162  global const double *v1 = pd_value + details->pd_offset[1];
163  global const double *w1 = pd_weight + details->pd_offset[1];
164#endif
165#if MAX_PD>0
166  int n0=details->pd_length[0];
167  int i0=(pd_start/details->pd_stride[0])%n0;
168  const int p0=details->pd_par[0];
169  global const double *v0 = pd_value + details->pd_offset[0];
170  global const double *w0 = pd_weight + details->pd_offset[0];
[bde38b5]171//printf("w0:%p, values:%p, diff:%ld, %d\n",w0,values,(w0-values), NUM_VALUES);
[9eb3632]172#endif
[2e44ac7]173
[5ff1b03]174
[9eb3632]175#if MAX_PD>0
[0f00d95]176  const int theta_par = details->theta_par;
[9eb3632]177  const int fast_theta = (theta_par == p0);
178  const int slow_theta = (theta_par >= 0 && !fast_theta);
[0f00d95]179  double spherical_correction = 1.0;
[32e3c9b]180#else
[0f00d95]181  // Note: if not polydisperse the weights cancel and we don't need the
182  // spherical correction.
183  const double spherical_correction = 1.0;
[32e3c9b]184#endif
[3044216]185
[9eb3632]186  int step = pd_start;
[ae2b6b5]187
[9eb3632]188#if MAX_PD>4
189  const double weight5 = 1.0;
190  while (i4 < n4) {
[bde38b5]191    local_values.vector[p4] = v4[i4];
[9eb3632]192    double weight4 = w4[i4] * weight5;
[bde38b5]193//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 4, p4, i4, n4, local_values.vector[p4], weight4);
[9eb3632]194#elif MAX_PD>3
195    const double weight4 = 1.0;
196#endif
197#if MAX_PD>3
198  while (i3 < n3) {
[bde38b5]199    local_values.vector[p3] = v3[i3];
[9eb3632]200    double weight3 = w3[i3] * weight4;
[bde38b5]201//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 3, p3, i3, n3, local_values.vector[p3], weight3);
[9eb3632]202#elif MAX_PD>2
203    const double weight3 = 1.0;
204#endif
205#if MAX_PD>2
206  while (i2 < n2) {
[bde38b5]207    local_values.vector[p2] = v2[i2];
[9eb3632]208    double weight2 = w2[i2] * weight3;
[bde38b5]209//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 2, p2, i2, n2, local_values.vector[p2], weight2);
[9eb3632]210#elif MAX_PD>1
211    const double weight2 = 1.0;
212#endif
213#if MAX_PD>1
214  while (i1 < n1) {
[bde38b5]215    local_values.vector[p1] = v1[i1];
[9eb3632]216    double weight1 = w1[i1] * weight2;
[bde38b5]217//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 1, p1, i1, n1, local_values.vector[p1], weight1);
[9eb3632]218#elif MAX_PD>0
219    const double weight1 = 1.0;
220#endif
221#if MAX_PD>0
[0f00d95]222  if (slow_theta) { // Theta is not in inner loop
[bde38b5]223    spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[theta_par])), 1.e-6);
[0f00d95]224  }
[9eb3632]225  while(i0 < n0) {
[bde38b5]226    local_values.vector[p0] = v0[i0];
[9eb3632]227    double weight0 = w0[i0] * weight1;
[bde38b5]228//printf("step:%d level %d: p:%d i:%d n:%d value:%g weight:%g\n", step, 0, p0, i0, n0, local_values.vector[p0], weight0);
[9eb3632]229    if (fast_theta) { // Theta is in inner loop
[bde38b5]230      spherical_correction = fmax(fabs(cos(M_PI_180*local_values.vector[p0])), 1.e-6);
[5ff1b03]231    }
[9eb3632]232#else
233    const double weight0 = 1.0;
234#endif
[5ff1b03]235
[bde38b5]236//printf("step:%d of %d, pars:",step,pd_stop); for (int i=0; i < NUM_PARS; i++) printf("p%d=%g ",i, local_values.vector[i]); printf("\n");
[9eb3632]237//printf("sphcor: %g\n", spherical_correction);
[ae2b6b5]238
[3044216]239    #ifdef INVALID
[bde38b5]240    if (!INVALID(local_values.table))
[3044216]241    #endif
[9eb3632]242    {
243      // Accumulate I(q)
244      // Note: weight==0 must always be excluded
245      if (weight0 > cutoff) {
[a3a0c5c]246        // spherical correction is set at a minimum of 1e-6, otherwise there
247        // would be problems looking at models with theta=90.
[9eb3632]248        const double weight = weight0 * spherical_correction;
[bde38b5]249        pd_norm += weight * CALL_VOLUME(local_values.table);
[9eb3632]250
251        #ifdef USE_OPENMP
252        #pragma omp parallel for
253        #endif
254        for (int q_index=0; q_index<nq; q_index++) {
[a4280bd]255#if defined(MAGNETIC) && NUM_MAGNETIC > 0
[9eb3632]256          const double qx = q[2*q_index];
257          const double qy = q[2*q_index+1];
258          const double qsq = qx*qx + qy*qy;
259
260          // Constant across orientation, polydispersity for given qx, qy
[a4280bd]261          double scattering = 0.0;
262          // TODO: what is the magnetic scattering at q=0
[9eb3632]263          if (qsq > 1.e-16) {
[bde38b5]264            double p[4];  // dd, du, ud, uu
[a4280bd]265            p[0] = (qy*cos_mspin + qx*sin_mspin)/qsq;
266            p[3] = -p[0];
267            p[1] = p[2] = (qy*sin_mspin - qx*cos_mspin)/qsq;
[9eb3632]268
[a4280bd]269            for (int index=0; index<4; index++) {
270              const double xs = spins[index];
271              if (xs > 1.e-8) {
272                const int spin_flip = (index==1) || (index==2);
273                const double pk = p[index];
274                for (int axis=0; axis<=spin_flip; axis++) {
275                  #define M1 NUM_PARS+5
276                  #define M2 NUM_PARS+8
277                  #define M3 NUM_PARS+13
278                  #define SLD(_M_offset, _sld_offset) \
[bde38b5]279                      local_values.vector[_sld_offset] = xs * (axis \
[a4280bd]280                      ? (index==1 ? -values[_M_offset+2] : values[_M_offset+2]) \
281                      : mag_sld(qx, qy, pk, values[_M_offset], values[_M_offset+1], \
282                                (spin_flip ? 0.0 : values[_sld_offset+2])))
283                  #if NUM_MAGNETIC==1
284                      SLD(M1, MAGNETIC_PAR1);
285                  #elif NUM_MAGNETIC==2
286                      SLD(M1, MAGNETIC_PAR1);
287                      SLD(M2, MAGNETIC_PAR2);
288                  #elif NUM_MAGNETIC==3
289                      SLD(M1, MAGNETIC_PAR1);
290                      SLD(M2, MAGNETIC_PAR2);
291                      SLD(M3, MAGNETIC_PAR3);
292                  #else
293                  for (int sk=0; sk<NUM_MAGNETIC; sk++) {
294                      SLD(M1+3*sk, slds[sk]);
295                  }
296                  #endif
[bde38b5]297                  scattering += CALL_IQ(q, q_index, local_values.table);
[a4280bd]298                }
299              }
[9eb3632]300            }
[32e3c9b]301          }
[9eb3632]302#else  // !MAGNETIC
[bde38b5]303          const double scattering = CALL_IQ(q, q_index, local_values.table);
[9eb3632]304#endif // !MAGNETIC
305//printf("q_index:%d %g %g %g %g\n",q_index, scattering, weight, spherical_correction, weight0);
306          result[q_index] += weight * scattering;
[32e3c9b]307        }
[3044216]308      }
[03cac08]309    }
[9eb3632]310    ++step;
311#if MAX_PD>0
312    if (step >= pd_stop) break;
313    ++i0;
[2e44ac7]314  }
[9eb3632]315  i0 = 0;
316#endif
317#if MAX_PD>1
318    if (step >= pd_stop) break;
319    ++i1;
320  }
321  i1 = 0;
322#endif
323#if MAX_PD>2
324    if (step >= pd_stop) break;
325    ++i2;
[2e44ac7]326  }
[9eb3632]327  i2 = 0;
328#endif
329#if MAX_PD>3
330    if (step >= pd_stop) break;
331    ++i3;
332  }
333  i3 = 0;
334#endif
335#if MAX_PD>4
336    if (step >= pd_stop) break;
337    ++i4;
338  }
339  i4 = 0;
340#endif
[f2f67a6]341
[9eb3632]342//printf("res: %g/%g\n", result[0], pd_norm);
[f2f67a6]343  // Remember the updated norm.
[a738209]344  result[nq] = pd_norm;
[2e44ac7]345}
Note: See TracBrowser for help on using the repository browser.