source: sasmodels/sasmodels/kernel_iq.c @ 2e44ac7

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since 2e44ac7 was 2e44ac7, checked in by Paul Kienzle <pkienzle@…>, 8 years ago

design doc for revised calculator interface, with support for tied parameters

  • Property mode set to 100644
File size: 10.5 KB
Line 
1
2/*
3    ##########################################################
4    #                                                        #
5    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
6    #   !!                                              !!   #
7    #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
8    #   !!                                              !!   #
9    #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
10    #                                                        #
11    ##########################################################
12*/
13
14/*
15The environment needs to provide the following #defines
16   USE_OPENCL is defined if running in opencl
17   KERNEL declares a function to be available externally
18   KERNEL_NAME is the name of the function being declared
19   NPARS is the number of parameters in the kernel
20   PARAMETER_TABLE is the declaration of the parameters to the kernel.
21
22       Cylinder:
23
24           #define PARAMETER_TABLE \
25           double length; \
26           double radius; \
27           double sld; \
28           double sld_solvent
29
30       Multi-shell cylinder (10 shell max):
31
32           #define PARAMETER_DECL \
33           double num_shells; \
34           double length; \
35           double radius[10]; \
36           double sld[10]; \
37           double sld_solvent
38
39   PARAMETER_CALL(var) is the declaration of a call to the kernel.
40
41       Cylinder:
42
43           #define PARAMETER_CALL(var) \
44           var.length, \
45           var.radius, \
46           var.sld, \
47           var.sld_solvent
48
49       Multi-shell cylinder:
50           #define PARAMETER_CALL(var) \
51           var.num_shells, \
52           var.length, \
53           var.radius, \
54           var.sld, \
55           var.sld_solvent
56
57Our design supports a limited number of polydispersity loops, wherein
58we need to cycle through the values of the polydispersity, calculate
59the I(q, p) for each combination of parameters, and perform a normalized
60weighted sum across all the weights.  Parameters may be passed to the
61underlying calculation engine as scalars or vectors, but the polydispersity
62calculator treats the parameter set as one long vector.
63
64The polydisperse parameters are stored in as an array of parameter
65indices, one for each polydisperse parameter, stored in pd_par[n].
66Non-polydisperse parameters do not appear in this array. Each polydisperse
67parameter has a weight vector whose length is stored in pd_length[n].
68The weights are stored in a contiguous vector of weights for all
69parameters, with the starting position for the each parameter stored
70in pd_offset[n].  The values corresponding to the weights are stored
71together in a separate weights[] vector, with offset stored in
72par_offset[pd_par[n]]. Polydisperse parameters should be stored in
73decreasing order of length for highest efficiency.
74
75We limit the number of polydisperse dimensions to MAX_PD (currently 4).
76This cuts the size of the structure in half compared to allowing a
77separate polydispersity for each parameter.  This will help a little
78bit for models with large numbers of parameters, such as the onion model.
79
80Parameters may be coordinated.  That is, we may have the value of one
81parameter depend on a set of other parameters, some of which may be
82polydisperse.  For example, if sld is inversely proportional to the
83volume of a cylinder, and the length and radius are independently
84polydisperse, then for each combination of length and radius we need a
85separate value for the sld.  The caller must provide a coordination table
86for each parameter containing the value for each parameter given the
87value of the polydisperse parameters v1, v2, etc.   The tables for each
88parameter are arranged contiguously in a vector, with offset[k] giving the
89starting location of parameter k in the vector.  Each parameter defines
90coord[k] as a bit mask indicating which polydispersity parameters the
91parameter depends upon.  Usually this is zero, indicating that the parameter
92is independent, but for the cylinder example given, the bits for the
93radius and length polydispersity parameters would both be set, the result
94being a (#radius x #length) table, or maybe a (#length x #radius) table
95if length comes first in the polydispersity table.
96
97NB: If we can guarantee that a compiler and OpenCL driver are available,
98we could instead create the coordination function on the fly for each
99parameter, saving memory and transfer time, but requiring a C compiler
100as part of the environment.
101
102In ordering the polydisperse parameters by decreasing length we can
103iterate over the longest dispersion weight vector first.  All parameters
104coordinated with this weight vector (the 'fast' parameters), can be
105updated with a simple increment to the next position in the parameter
106value table.  The indices of these parameters is stored in fast_coord_index[],
107with fast_coord_count being the number of fast parameters.  A total
108of NPARS slots is allocated to allow for the case that all parameters
109are coordinated with the fast index, though this will likely be mostly
110empty.  When the fast increment count reaches the end of the weight
111vector, then the index of the second polydisperse parameter must be
112incremented, and all of its coordinated parameters updated.  Because this
113operation is not in the inner loop, a slower algorithm can be used.
114
115The problem details structure can be allocated and sent in as an integer
116array using the read-only flag.  This allows us to copy it once per fit
117along with the weights vector, since features such as the number of
118polydisperity elements per pd parameter or the coordinated won't change
119between function evaluations.  A new parameter vector is sent for
120each I(q) evaluation.
121
122To protect against expensive evaluations taking all the GPU resource
123on large fits, the entire polydispersity will not be computed at once.
124Instead, a start and stop location will be sent, indicating where in the
125polydispersity loop the calculation should start and where it should
126stop.  We can do this for arbitrary start/stop points since we have
127unwound the nested loop.  Instead, we use the same technique as array
128index translation, using div and mod to figure out the i,j,k,...
129indices in the virtual nested loop.
130
131The results array will be initialized to zero for polydispersity loop
132entry zero, and preserved between calls to [start, stop] so that the
133results accumulate by the time the loop has completed.  Background and
134scale will be applied when the loop reaches the end.  This does require
135that the results array be allocated read-write, which is less efficient
136for the GPU, but it makes the calling sequence much more manageable.
137*/
138
139#define MAX_PD 4  // MAX_PD is the max number of polydisperse parameters
140#define PD_2N 16  // PD_2N is the size of the coordination step table
141
142typedef struct {
143    int pd_par[MAX_PD];     // index of the nth polydispersity variable
144    int pd_length[MAX_PD];  // length of the nth polydispersity weight vector
145    int pd_offset[MAX_PD];  // offset of pd weights in the par & weight vector
146    int pd_stride[MAX_PD];  // stride to move to the next index at this level
147    int par_offset[NPARS];  // offset of par values in the par & weight vector
148    int par_coord[NPARS];   // polydispersity coordination bitvector
149    int fast_coord_count;   // number of parameters coordinated with pd 1
150    int fast_coord_index[NPARS]; // index of the fast coordination parameters
151} ProblemDetails;
152
153typedef struct {
154    PARAMETER_DECL;
155} ParameterBlock;
156
157
158KERNEL
159void KERNEL_NAME(
160    int nq,                 // number of q values
161    global const ProblemDetails *problem,
162    global const double *weights,
163    global const double *pars,
164    global const double *q, // nq q values, with padding to boundary
165    global double *result,  // nq return values, again with padding
166    global double *work,    // 3*(nq+padding) space for normalization
167    global int *int_work,   // nq+padding space for current position
168    const double cutoff,    // cutoff in the polydispersity weight product
169    const int pd_start,     // where we are in the polydispersity loop
170    const int pd_stop,      // where we are stopping in the polydispersity loop
171    )
172{
173  // the location in the polydispersity calculation
174  local ParameterBlock local_pars;
175  const double *parvec = &local_pars;  // Alias named parameters with a vector
176
177  // Initialize the results to zero
178  if (pd_start == 0) {
179    #ifdef USE_OPENMP
180    #pragma omp parallel for
181    #endif
182    for (int i=0; i < Nq; i++) {
183      norm_vol[i] = 0.0;
184      norm[i] = 0.0;
185      vol[i] = 0.0;
186      result[i] = 0.0;
187    }
188  }
189
190  // Force the index into a new state
191  local int pd_index[4];
192  pd_index[0] = pd_length[0]
193
194  // Loop over the weights then loop over q, accumulating values
195  double partial_weight = NaN;
196  for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
197    // check if indices need to be updated
198    if (pd_index[0] >= pd_length[0]) {
199      pd_index[0] = loop_index%pd_length[0];
200      partial_weight = 1.0;
201      for (int k=0; k < MAX_PD; k++) {
202        pd_index[k] = (loop_index%pd_length[k])/pd_stride[k];
203        partial_weight *= weights[pd_offset[k]+pd_index[k]];
204      }
205      weight = partial_weight * weights[pd_offset[0]+pd_index[0]]
206      for (int k=0; k < NPARS; k++) {
207        int coord = par_coord[k];
208        int this_offset = 0;
209        int block_size = 1;
210        for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
211          if (coord&1) {
212              this_offset += block_size * pd_index[bit];
213              block_size *= pd_length[bit];
214          }
215        }
216        offset[k] = this_offset;
217      }
218    } else {
219      pd_index[0] += 1;
220      weight = partial_weight*weights[pd_offset[0]+pd_index[0]];
221      for (int k=0; k < problem->fast_coord_count; k++) {
222        parvec[ fast_coord_index[k]]
223            = pars[offset[fast_coord_index[k]] + pd_index[0]];
224      }
225    }
226    #ifdef USE_OPENMP
227    #pragma omp parallel for
228    #endif
229    for (int i=0; i < Nq; i++) {
230    {
231      if (weight > cutoff) {
232          const double scattering = Iq(qi, IQ_PARAMETERS);
233          // allow kernels to exclude invalid regions by returning NaN
234          if (!isnan(scattering)) {
235            result[i] += weight*scattering;
236            norm[i] += weight;
237            const double vol_weight = VOLUME_WEIGHT_PRODUCT;
238            vol[i] += vol_weight*form_volume(VOLUME_PARAMETERS);
239            norm_vol[i] += vol_weight;
240      }
241    }
242  }
243
244  if (pd_stop == pd_stride[MAX_PD-1]) {}
245    #ifdef USE_OPENMP
246    #pragma omp parallel for
247    #endif
248    for (int i=0; i < Nq; i++) {
249      if (vol[i]*norm_vol[i] != 0.0) {
250        result[i] *= norm_vol[i]/vol[i];
251      }
252      result[i] = scale*result[i]/norm[i]+background;
253    }
254  }
255}
256
Note: See TracBrowser for help on using the repository browser.