source: sasmodels/sasmodels/kernel_iq.c @ 9c490bc

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 9c490bc was 9c490bc, checked in by wojciech, 8 years ago

Some intermediate changes

  • Property mode set to 100644
File size: 15.2 KB
Line 
1
2/*
3    ##########################################################
4    #                                                        #
5    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
6    #   !!                                              !!   #
7    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
8    #   !!                                              !!   #
9    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
10    #                                                        #
11    ##########################################################
12*/
13
14/*
15The environment needs to provide the following #defines:
16
17   USE_OPENCL is defined if running in opencl
18   KERNEL declares a function to be available externally
19   KERNEL_NAME is the name of the function being declared
20   NPARS is the number of parameters in the kernel
21   PARAMETER_DECL is the declaration of the parameters to the kernel.
22
23       Cylinder:
24
25           #define PARAMETER_DECL \
26           double length; \
27           double radius; \
28           double sld; \
29           double sld_solvent
30
31       Note: scale and background are not included
32
33       Multi-shell cylinder (10 shell max):
34
35           #define PARAMETER_DECL \
36           double num_shells; \
37           double length; \
38           double radius[10]; \
39           double sld[10]; \
40           double sld_solvent
41
42   PARAMETER_CALL(var) is the declaration of a call to the kernel.
43
44       Cylinder:
45
46           #define PARAMETER_CALL(var) \
47           var.length, \
48           var.radius, \
49           var.sld, \
50           var.sld_solvent
51
52       Multi-shell cylinder:
53           #define PARAMETER_CALL(var) \
54           var.num_shells, \
55           var.length, \
56           var.radius, \
57           var.sld, \
58           var.sld_solvent
59
60   INVALID is a test for model parameters in the correct range
61
62       Cylinder:
63
64           #define INVALID(var) 0
65
66       BarBell:
67
68           #define INVALID(var) (var.bell_radius > var.radius)
69
70       Model with complicated constraints:
71
72           inline bool constrained(p1, p2, p3) { return expression; }
73           #define INVALID(var) constrained(var.p1, var.p2, var.p3)
74
75   IQ_FUNC could be Iq or Iqxy
76   IQ_PARS could be q[i] or q[2*i],q[2*i+1]
77
78Our design supports a limited number of polydispersity loops, wherein
79we need to cycle through the values of the polydispersity, calculate
80the I(q, p) for each combination of parameters, and perform a normalized
81weighted sum across all the weights.  Parameters may be passed to the
82underlying calculation engine as scalars or vectors, but the polydispersity
83calculator treats the parameter set as one long vector.
84
85Let's assume we have 6 parameters in the model, with two polydisperse::
86
87    0: scale        {scl = constant}
88    1: background   {bkg = constant}
89    5: length       {l = vector of 30pts}
90    4: radius       {r = vector of 10pts}
91    3: sld          {s = constant/(radius**2*length)}
92    2: sld_solvent  {s2 = constant}
93
94This generates the following call to the kernel (where x stands for an
95arbitrary value that is not used by the kernel evaluator):
96
97    NPARS = 4  // scale and background are in all models
98    problem {
99        pd_par = {5, 4, x, x}         // parameters *radius* and *length* vary
100        pd_length = {30, 10, 0, 0}    // *length* has more, so it is first
101        pd_offset = {10, 0, x, x}     // *length* starts at index 10 in weights
102        pd_stride = {1, 30, 300, 300} // cumulative product of pd length
103        pd_isvol = {1, 1, x, x}       // true if weight is a volume weight
104        par_offset = {2, 3, 303, 313}  // parameter offsets
105        par_coord = {0, 3, 2, 1} // bitmap of parameter dependencies
106        fast_coord_count = 2  // two parameters vary with *length* distribution
107        fast_coord_index = {5, 3, x, x, x, x}
108    }
109
110    weight = { l0, .., l29, r0, .., r9}
111    pars = { scl, bkg, l0, ..., l29, r0, r1, ..., r9,
112             s[l0,r0], ... s[l0,r9], s[l1,r0], ... s[l29,r9] , s2}
113
114    nq = 130
115    q = { q0, q1, ..., q130, x, x }  # pad to 8 element boundary
116    result = {r1, ..., r130, norm, vol, vol_norm, x, x, x, x, x, x, x}
117
118
119The polydisperse parameters are stored in as an array of parameter
120indices, one for each polydisperse parameter, stored in pd_par[n].
121Non-polydisperse parameters do not appear in this array. Each polydisperse
122parameter has a weight vector whose length is stored in pd_length[n],
123The weights are stored in a contiguous vector of weights for all
124parameters, with the starting position for the each parameter stored
125in pd_offset[n].  The values corresponding to the weights are stored
126together in a separate weights[] vector, with offset stored in
127par_offset[pd_par[n]]. Polydisperse parameters should be stored in
128decreasing order of length for highest efficiency.
129
130We limit the number of polydisperse dimensions to MAX_PD (currently 4).
131This cuts the size of the structure in half compared to allowing a
132separate polydispersity for each parameter.  This will help a little
133bit for models with large numbers of parameters, such as the onion model.
134
135Parameters may be coordinated.  That is, we may have the value of one
136parameter depend on a set of other parameters, some of which may be
137polydisperse.  For example, if sld is inversely proportional to the
138volume of a cylinder, and the length and radius are independently
139polydisperse, then for each combination of length and radius we need a
140separate value for the sld.  The caller must provide a coordination table
141for each parameter containing the value for each parameter given the
142value of the polydisperse parameters v1, v2, etc.  The tables for each
143parameter are arranged contiguously in a vector, with offset[k] giving the
144starting location of parameter k in the vector.  Each parameter defines
145coord[k] as a bit mask indicating which polydispersity parameters the
146parameter depends upon. Usually this is zero, indicating that the parameter
147is independent, but for the cylinder example given, the bits for the
148radius and length polydispersity parameters would both be set, the result
149being a (#radius x #length) table, or maybe a (#length x #radius) table
150if length comes first in the polydispersity table.
151
152NB: If we can guarantee that a compiler and OpenCL driver are available,
153we could instead create the coordination function on the fly for each
154parameter, saving memory and transfer time, but requiring a C compiler
155as part of the environment.
156
157In ordering the polydisperse parameters by decreasing length we can
158iterate over the longest dispersion weight vector first.  All parameters
159coordinated with this weight vector (the 'fast' parameters), can be
160updated with a simple increment to the next position in the parameter
161value table.  The indices of these parameters is stored in fast_coord_index[],
162with fast_coord_count being the number of fast parameters.  A total
163of NPARS slots is allocated to allow for the case that all parameters
164are coordinated with the fast index, though this will likely be mostly
165empty.  When the fast increment count reaches the end of the weight
166vector, then the index of the second polydisperse parameter must be
167incremented, and all of its coordinated parameters updated.  Because this
168operation is not in the inner loop, a slower algorithm can be used.
169
170If there is no polydispersity we pretend that it is polydisperisty with one
171parameter, pd_start=0 and pd_stop=1.  We may or may not short circuit the
172calculation in this case, depending on how much time it saves.
173
174The problem details structure can be allocated and sent in as an integer
175array using the read-only flag.  This allows us to copy it once per fit
176along with the weights vector, since features such as the number of
177polydisperity elements per pd parameter or the coordinated won't change
178between function evaluations.  A new parameter vector is sent for
179each I(q) evaluation.
180
181To protect against expensive evaluations taking all the GPU resource
182on large fits, the entire polydispersity will not be computed at once.
183Instead, a start and stop location will be sent, indicating where in the
184polydispersity loop the calculation should start and where it should
185stop.  We can do this for arbitrary start/stop points since we have
186unwound the nested loop.  Instead, we use the same technique as array
187index translation, using div and mod to figure out the i,j,k,...
188indices in the virtual nested loop.
189
190The results array will be initialized to zero for polydispersity loop
191entry zero, and preserved between calls to [start, stop] so that the
192results accumulate by the time the loop has completed.  Background and
193scale will be applied when the loop reaches the end.  This does require
194that the results array be allocated read-write, which is less efficient
195for the GPU, but it makes the calling sequence much more manageable.
196
197Scale and background cannot be coordinated with other polydisperse parameters
198
199Cutoff paramater is basically used to restrict the region where integration
200is peformed i.e. polydispersity hypercude is limitted spheres.
201
202*/
203
204#define MAX_PD 4  // MAX_PD is the max number of polydisperse parameters
205#define PD_2N 16  // PD_2N is the size of the coordination step table
206
207typedef struct {
208    int pd_par[MAX_PD];     // index of the nth polydispersity variable
209    int pd_length[MAX_PD];  // length of the nth polydispersity weight vector
210    int pd_offset[MAX_PD];  // offset of pd weights in the par & weight vector
211    int pd_stride[MAX_PD];  // stride to move to the next index at this level
212    int pd_isvol[MAX_PD];   // True if parameter is a volume weighting parameter
213    int par_offset[NPARS];  // offset of par values in the par & weight vector
214    int par_coord[NPARS];   // polydispersity coordination bitvector
215    int fast_coord_count;   // number of parameters coordinated with pd 1
216    int fast_coord_index[NPARS]; // index of the fast coordination parameters
217} ProblemDetails;
218
219typedef struct {
220    PARAMETER_DECL;
221} ParameterBlock;
222
223#define KERNEL_NAME test_Iq
224#define FULL_KERNEL_NAME test_Iq
225#define IQ_FUNC Iq
226
227#define IQ_PARAMETERS ignored
228#define IQ_FIXED_PARAMETER_DECLARATIONS const double scale, \
229    const double background, \
230    const double ignored
231#define IQ_PARAMETER_DECLARATIONS double ignored
232#define IQXY_KERNEL_NAME bessel_Iqxy
233#define IQXY_PARAMETERS ignored
234#define IQXY_FIXED_PARAMETER_DECLARATIONS const double scale, \
235    const double background, \
236    const double ignored
237#define IQXY_PARAMETER_DECLARATIONS double ignored
238
239
240void FULL_KERNEL_NAME(
241    int nq,                 // number of q values
242    global const ProblemDetails *problem,
243    global const double *weights,
244    global const double *pars,
245    global const double *q, // nq q values, with padding to boundary
246    global double *result,  // nq return values, again with padding
247    const double cutoff,    // cutoff in the polydispersity weight product
248    const int pd_start,     // where we are in the polydispersity loop
249    const int pd_stop,      // where we are stopping in the polydispersity loop
250    )
251{
252
253  // Storage for the current parameter values.  These will be updated as we
254  // walk the polydispersity cube.
255  local ParameterBlock local_pars;  // current parameter values
256  const double *parvec = &local_pars;  // Alias named parameters with a vector
257
258  local int offset[NPARS-2];
259
260#if defined(USE_SHORTCUT_OPTIMIZATION)
261  if (pd_length[0] == 1) {
262    // Shouldn't need to copy!!
263    for (int k=0; k < NPARS; k++) {
264      parvec[k] = pars[k+2];  // skip scale and background
265    }
266
267    #ifdef USE_OPENMP
268    #pragma omp parallel for
269    #endif
270    for (int i=0; i < nq; i++) {
271    {
272      const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
273      result[i] += pars[0]*scattering + pars[1];
274    }
275    return;
276  }
277#endif
278
279
280  // Since we are no longer looping over the entire polydispersity hypercube
281  // for each q, we need to track the normalization values for each q in a
282  // separate work vector.
283  double norm;   // contains sum over weights
284  double vol; // contains sum over volume
285  double norm_vol; // contains weights over volume
286
287  // Initialize the results to zero
288  if (pd_start == 0) {
289    norm_vol = 0.0;
290    norm = 0.0;
291    vol = 0.0;
292
293    #ifdef USE_OPENMP
294    #pragma omp parallel for
295    #endif
296    for (int i=0; i < nq; i++) {
297      result[i] = 0.0;
298    }
299  } else {
300    //Pulling values from previous segment
301    norm = result[nq];
302    vol = result[nq+1];
303    norm_vol = results[nq+2];
304  }
305
306  // Location in the polydispersity hypercube, one index per dimension.
307  local int pd_index[PD_MAX];
308
309  // Trigger the reset behaviour that happens at the end the fast loop
310  // by setting the initial index >= weight vector length.
311  pd_index[0] = pd_length[0];
312
313  // need product of weights at every Iq calc, so keep product of
314  // weights from the outer loops so that weight = partial_weight * fast_weight
315  double partial_weight = NAN; // product of weight w4*w3*w2 but not w1
316  double partial_volweight = NAN;
317  double weight = 1.0;        // set to 1 in case there are no weights
318  double vol_weight = 1.0;    // set to 1 in case there are no vol weights
319
320  // Loop over the weights then loop over q, accumulating values
321  for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
322    // check if indices need to be updated
323    if (pd_index[0] >= pd_length[0]) {
324      pd_index[0] = loop_index%pd_length[0];
325      partial_weight = 1.0;
326      partial_volweight = 1.0;
327      for (int k=1; k < MAX_PD; k++) {
328        pd_index[k] = (loop_index%pd_length[k])/pd_stride[k];
329        const double wi = weights[pd_offset[0]+pd_index[0]];
330        partial_weight *= wi;
331        if (pd_isvol[k]) partial_volweight *= wi;
332      }
333      weight = partial_weight * weights[pd_offset[0]+pd_index[0]]
334      for (int k=0; k < NPARS; k++) {
335        int coord = par_coord[k];
336        int this_offset = par_offset[k];
337        int block_size = 1;
338        for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
339          if (coord&1) {
340              this_offset += block_size * pd_index[bit];
341              block_size *= pd_length[bit];
342          }
343          coord /= 2;
344        }
345        offset[k] = this_offset;
346        parvec[k] = pars[this_offset];
347      }
348    } else {
349      pd_index[0] += 1;
350      const double wi = weights[pd_offset[0]+pd_index[0]];
351      weight = partial_weight*wi;
352      if (pd_isvol[0]) vol_weight *= wi;
353      for (int k=0; k < problem->fast_coord_count; k++) {
354        parvec[ fast_coord_index[k]]
355            = pars[offset[fast_coord_index[k]] + pd_index[0]];
356      }
357    }
358    #ifdef INVALID
359    if (INVALID(local_pars)) continue;
360    #endif
361    if (weight > cutoff) {
362      norm += weight;
363      vol += vol_weight * form_volume(VOLUME_PARAMETERS);
364      norm_vol += vol_weight;
365
366      #ifdef USE_OPENMP
367      #pragma omp parallel for
368      #endif
369      for (int i=0; i < nq; i++) {
370      {
371        const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
372        //const double scattering = Iq(q[i], IQ_PARAMETERS);
373        result[i] += weight*scattering;
374      }
375  }
376  //Makes a normalization avialable for the next round
377  result[nq] = norm;
378  result[nq+1] = vol;
379  result[nq+2] = norm_vol;
380
381  //End of the PD loop we can normalize
382  if (pd_stop == pd_stride[MAX_PD-1]) {}
383    #ifdef USE_OPENMP
384    #pragma omp parallel for
385    #endif
386    for (int i=0; i < nq; i++) {
387      if (vol*norm_vol != 0.0) {
388        result[i] *= norm_vol/vol;
389      }
390        result[i] = pars[0]*result[i]/norm + pars[1];
391    }
392  }
393}
394}
395}
Note: See TracBrowser for help on using the repository browser.