source: sasmodels/sasmodels/kernel_iq.c @ 3044216

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 3044216 was 3044216, checked in by wojciech, 8 years ago

Polydispersity loop proototype

  • Property mode set to 100644
File size: 12.8 KB
Line 
1
2/*
3    ##########################################################
4    #                                                        #
5    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
6    #   !!                                              !!   #
7    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
8    #   !!                                              !!   #
9    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
10    #                                                        #
11    ##########################################################
12*/
13
14/*
15The environment needs to provide the following #defines
16   USE_OPENCL is defined if running in opencl
17   KERNEL declares a function to be available externally
18   KERNEL_NAME is the name of the function being declared
19   NPARS is the number of parameters in the kernel
20   PARAMETER_TABLE is the declaration of the parameters to the kernel.
21
22       Cylinder:
23
24           #define PARAMETER_TABLE \
25           double length; \
26           double radius; \
27           double sld; \
28           double sld_solvent
29
30       Multi-shell cylinder (10 shell max):
31
32           #define PARAMETER_DECL \
33           double num_shells; \
34           double length; \
35           double radius[10]; \
36           double sld[10]; \
37           double sld_solvent
38
39   PARAMETER_CALL(var) is the declaration of a call to the kernel.
40
41       Cylinder:
42
43           #define PARAMETER_CALL(var) \
44           var.length, \
45           var.radius, \
46           var.sld, \
47           var.sld_solvent
48
49       Multi-shell cylinder:
50           #define PARAMETER_CALL(var) \
51           var.num_shells, \
52           var.length, \
53           var.radius, \
54           var.sld, \
55           var.sld_solvent
56
57   INVALID is a test for model parameters in the correct range
58
59       Cylinder:
60
61           #define INVALID(var) 0
62
63       BarBell:
64
65           #define INVALID(var) (var.bell_radius > var.radius)
66
67Our design supports a limited number of polydispersity loops, wherein
68we need to cycle through the values of the polydispersity, calculate
69the I(q, p) for each combination of parameters, and perform a normalized
70weighted sum across all the weights.  Parameters may be passed to the
71underlying calculation engine as scalars or vectors, but the polydispersity
72calculator treats the parameter set as one long vector.
73
74Let's assume we have 6 polydisperse parameters
750 : sld_solvent {s1 = constant}
761: sld_material {s2 = constant}
772: radius   {r = vector of 10pts}
783: lenghth {l = vector of 30pts}
794: background {b = constant}
805: scale {s = constant}
81
82The polydisperse parameters are stored in as an array of parameter
83indices, one for each polydisperse parameter, stored in pd_par[n].
84For the above mentioned expample that will give pd_par = {3, 2, x, x},
85where x stands for abitrary number and 3 corresponds to longest parameter (lenght).
86Non-polydisperse parameters do not appear in this array. Each polydisperse
87parameter has a weight vector whose length is stored in pd_length[n],
88In our case pd_length = {30,10,0,0}. The weights are stored in
89a contiguous vector of weights for all parameters, with the starting position
90for the each parameter stored in pd_offset[n] (pd_offset = {10,0,x,x}.
91The values corresponding to the weights are stored together in a
92separate weights[] vector, with offset stored in par_offset[pd_par[n]].
93In the above mentioned example weight = {r0, .., r9, l0, .., l29}.
94Polydisperse parameters should be stored in decreasing order of length
95for highest efficiency.
96
97We limit the number of polydisperse dimensions to MAX_PD (currently 4).
98This cuts the size of the structure in half compared to allowing a
99separate polydispersity for each parameter.  This will help a little
100bit for models with large numbers of parameters, such as the onion model.
101
102Parameters may be coordinated.  That is, we may have the value of one
103parameter depend on a set of other parameters, some of which may be
104polydisperse.  For example, if sld is inversely proportional to the
105volume of a cylinder, and the length and radius are independently
106polydisperse, then for each combination of length and radius we need a
107separate value for the sld.  The caller must provide a coordination table
108for each parameter containing the value for each parameter given the
109value of the polydisperse parameters v1, v2, etc.  The tables for each
110parameter are arranged contiguously in a vector, with offset[k] giving the
111starting location of parameter k in the vector.  Each parameter defines
112coord[k] as a bit mask indicating which polydispersity parameters the
113parameter depends upon. Usually this is zero, indicating that the parameter
114is independent, but for the cylinder example given, the bits for the
115radius and length polydispersity parameters would both be set, the result
116being a (#radius x #length) table, or maybe a (#length x #radius) table
117if length comes first in the polydispersity table.
118
119NB: If we can guarantee that a compiler and OpenCL driver are available,
120we could instead create the coordination function on the fly for each
121parameter, saving memory and transfer time, but requiring a C compiler
122as part of the environment.
123
124In ordering the polydisperse parameters by decreasing length we can
125iterate over the longest dispersion weight vector first.  All parameters
126coordinated with this weight vector (the 'fast' parameters), can be
127updated with a simple increment to the next position in the parameter
128value table.  The indices of these parameters is stored in fast_coord_index[],
129with fast_coord_count being the number of fast parameters.  A total
130of NPARS slots is allocated to allow for the case that all parameters
131are coordinated with the fast index, though this will likely be mostly
132empty.  When the fast increment count reaches the end of the weight
133vector, then the index of the second polydisperse parameter must be
134incremented, and all of its coordinated parameters updated.  Because this
135operation is not in the inner loop, a slower algorithm can be used.
136
137If there is no polydispersity we pretend that it is polydisperisty with one
138parameter.
139
140The problem details structure can be allocated and sent in as an integer
141array using the read-only flag.  This allows us to copy it once per fit
142along with the weights vector, since features such as the number of
143polydisperity elements per pd parameter or the coordinated won't change
144between function evaluations.  A new parameter vector is sent for
145each I(q) evaluation.
146
147To protect against expensive evaluations taking all the GPU resource
148on large fits, the entire polydispersity will not be computed at once.
149Instead, a start and stop location will be sent, indicating where in the
150polydispersity loop the calculation should start and where it should
151stop.  We can do this for arbitrary start/stop points since we have
152unwound the nested loop.  Instead, we use the same technique as array
153index translation, using div and mod to figure out the i,j,k,...
154indices in the virtual nested loop.
155
156The results array will be initialized to zero for polydispersity loop
157entry zero, and preserved between calls to [start, stop] so that the
158results accumulate by the time the loop has completed.  Background and
159scale will be applied when the loop reaches the end.  This does require
160that the results array be allocated read-write, which is less efficient
161for the GPU, but it makes the calling sequence much more manageable.
162
163TODO: cutoff
164*/
165
166#define MAX_PD 4  // MAX_PD is the max number of polydisperse parameters
167#define PD_2N 16  // PD_2N is the size of the coordination step table
168
169typedef struct {
170    int pd_par[MAX_PD];     // index of the nth polydispersity variable
171    int pd_length[MAX_PD];  // length of the nth polydispersity weight vector
172    int pd_offset[MAX_PD];  // offset of pd weights in the par & weight vector
173    int pd_stride[MAX_PD];  // stride to move to the next index at this level
174    int par_offset[NPARS];  // offset of par values in the par & weight vector
175    int par_coord[NPARS];   // polydispersity coordination bitvector
176    int fast_coord_count;   // number of parameters coordinated with pd 1
177    int fast_coord_index[NPARS]; // index of the fast coordination parameters
178} ProblemDetails;
179
180typedef struct {
181    PARAMETER_DECL;
182} ParameterBlock;
183
184
185KERNEL
186void KERNEL_NAME(
187    int nq,                 // number of q values
188    global const ProblemDetails *problem,
189    global const double *weights,
190    global const double *pars,
191    global const double *q, // nq q values, with padding to boundary
192    global double *result,  // nq return values, again with padding
193    global int *offset,     // nq+padding space for current parameter index
194    const double cutoff,    // cutoff in the polydispersity weight product
195    const int pd_start,     // where we are in the polydispersity loop
196    const int pd_stop,      // where we are stopping in the polydispersity loop
197    )
198{
199
200  // Storage for the current parameter values.  These will be updated as we
201  // walk the polydispersity cube.
202  local ParameterBlock local_pars;
203  const double *parvec = &local_pars;  // Alias named parameters with a vector
204
205#if defined(USE_SHORTCUT_OPTIMIZATION)
206  if (pd_length[0] == 1) {
207    // Shouldn't need to copy!!
208    for (int k=0; k < NPARS; k++) {
209      parvec[k] = pars[k];
210    }
211
212    #ifdef USE_OPENMP
213    #pragma omp parallel for
214    #endif
215    for (int i=0; i < nq; i++) {
216    {
217      const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
218      result[i] += local_pars.scale*scattering + local_pars.background;
219    }
220    return;
221  }
222#endif
223
224
225  // Since we are no longer looping over the entire polydispersity hypercube
226  // for each q, we need to track the normalization values for each q in a
227  // separate work vector.
228  double norm;   // contains sum over weights
229  double vol; // contains sum over volume
230  double norm_vol; // contains weights over volume
231
232  // Initialize the results to zero
233  if (pd_start == 0) {
234    norm_vol = 0.0;
235    norm = 0.0;
236    vol = 0.0;
237
238    #ifdef USE_OPENMP
239    #pragma omp parallel for
240    #endif
241    for (int i=0; i < nq; i++) {
242      result[i] = 0.0;
243    }
244  } else {
245    //Pulling values from previous segment
246    norm = result[nq];
247    vol = result[nq+1];
248    norm_vol = results[nq+2];
249  }
250
251  // Location in the polydispersity hypercube, one index per dimension.
252  local int pd_index[PD_MAX];
253
254  // Set the initial index greater than its vector length in order to
255  // trigger the reset behaviour that happens at the end the fast loop.
256  pd_index[0] = pd_length[0];
257
258  // need product of weights at every Iq calc, so keep product of
259  // weights from the outer loops so that weight = partial_weight * fast_weight
260  double partial_weight = NAN; // product of weight w4*w3*w2 but not w1
261
262  // Loop over the weights then loop over q, accumulating values
263  for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
264    // check if indices need to be updated
265    if (pd_index[0] >= pd_length[0]) {
266      pd_index[0] = loop_index%pd_length[0];
267      partial_weight = 1.0;
268      for (int k=1; k < MAX_PD; k++) {
269        pd_index[k] = (loop_index%pd_length[k])/pd_stride[k];
270        partial_weight *= weights[pd_offset[k]+pd_index[k]];
271      }
272      weight = partial_weight * weights[pd_offset[0]+pd_index[0]]
273      for (int k=0; k < NPARS; k++) {
274        int coord = par_coord[k];
275        int this_offset = par_offset[k];
276        int block_size = 1;
277        for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
278          if (coord&1) {
279              this_offset += block_size * pd_index[bit];
280              block_size *= pd_length[bit];
281          }
282          coord /= 2;
283        }
284        offset[k] = this_offset;
285        parvec[k] = pars[this_offset];
286      }
287    } else {
288      pd_index[0] += 1;
289      weight = partial_weight*weights[pd_offset[0]+pd_index[0]];
290      for (int k=0; k < problem->fast_coord_count; k++) {
291        parvec[ fast_coord_index[k]]
292            = pars[offset[fast_coord_index[k]] + pd_index[0]];
293      }
294    }
295    #ifdef INVALID
296    if (INVALID(local_pars)) continue;
297    #endif
298    if (weight > cutoff) {
299      const double vol_weight = VOLUME_WEIGHT_PRODUCT;
300      const double weighted_vol = vol_weight*form_volume(VOLUME_PARAMETERS);
301      norm += weight;
302      vol += weighted_vol;
303      norm_vol += vol_weight;
304
305      #ifdef USE_OPENMP
306      #pragma omp parallel for
307      #endif
308      for (int i=0; i < nq; i++) {
309      {
310        const double scattering = IQ_FUNC(IQ_PARS, IQ_PARAMETERS);
311        //const double scattering = Iq(q[i], IQ_PARAMETERS);
312        result[i] += weight*scattering;
313      }
314  }
315  //Makes a normalization avialable for the next round
316  result[nq] = norm;
317  result[nq+1] = vol;
318  results[nq+2] = norm_vol;
319
320  //End of the PD loop we can normalize
321  if (pd_stop == pd_stride[MAX_PD-1]) {}
322    #ifdef USE_OPENMP
323    #pragma omp parallel for
324    #endif
325    for (int i=0; i < nq; i++) {
326      if (vol*norm_vol != 0.0) {
327        result[i] *= norm_vol/vol;
328      }
329        result[i] = local_pars.scale*result[i]/norm+local_pars.background;
330    }
331  }
332}
333}
Note: See TracBrowser for help on using the repository browser.