← Previous Changeset
Next Changeset →

Changeset ee72c70 in sasmodels

Timestamp:

Apr 18, 2016 7:43:26 PM (9 years ago)

Author:

Paul Kienzle <pkienzle@…>

Branches:

master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests

Children:

Parents:

ae2b6b5 (diff), 1bf66d9 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.

Message:

Merge branch 'polydisp' of github.com:sasview/sasmodels into polydisp

Location:

Files:

: 1 added
: 9 edited

model_test.py (modified) (1 diff)
models/__init__.py (modified) (1 diff)
models/lib/librefl.c (added)
models/porod.py (modified) (3 diffs)
models/spherical_sld.c (modified) (8 diffs)
models/spherical_sld.py (modified) (1 diff)
generate.py (modified) (1 diff)
kernel_iq.c (modified) (6 diffs)
kernel_iq.cl (modified) (10 diffs)
kernelcl.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

sasmodels/model_test.py

-                      r0ff62d4
+                      r38a9b07
                     self.assertTrue(np.isfinite(actual_yi),
                                     'invalid f(%s): %s' % (xi, actual_yi))
+                elif np.isnan(yi):
+                    self.assertTrue(np.isnan(actual_yi),
+                                    'f(%s): expected:%s; actual:%s'
+                                    % (xi, yi, actual_yi))
                 else:
+                    self.assertTrue(is_near(yi, actual_yi, 5),
+                    # is_near does not work for infinite values, so also test
+                    # for exact values.  Note that this will not
+                    self.assertTrue(yi==actual_yi or is_near(yi, actual_yi, 5),
                                     'f(%s); expected:%s; actual:%s'
                                     % (xi, yi, actual_yi))

sasmodels/models/init.py

-                      r32c160a
+                      r1ca1fd9
+"""
+D Modeling for SAS
+"""
+#from sas.models import *
+import os
+from distutils.filelist import findall
+__version__ = "2.1.0"
+def get_data_path(media):
+    """
+    """
+    # Check for data path in the package
+    path = os.path.join(os.path.dirname(__file__), media)
+    if os.path.isdir(path):
+        return path
+    # Check for data path next to exe/zip file.
+    # If we are inside a py2exe zip file, we need to go up
+    # to get to the directory containing
+    # the media for this module
+    path = os.path.dirname(__file__)
+    #Look for maximum n_dir up of the current dir to find media
+    n_dir = 12
+    for i in range(n_dir):
+        path, _ = os.path.split(path)
+        media_path = os.path.join(path, media)
+        if os.path.isdir(media_path):
+            module_media_path = os.path.join(media_path, 'models_media')
+            if os.path.isdir(module_media_path):
+                return module_media_path
+            return media_path
+    raise RuntimeError('Could not find models media files')
+def data_files():
+    """
+    Return the data files associated with media.
+    The format is a list of (directory, [files...]) pairs which can be
+    used directly in setup(...,data_files=...) for setup.py.
+    """
+    data_files = []
+    path_img = get_data_path(media=os.path.join("sasmodels","sasmodels","models","img"))
+    #path_img = get_data_path(media="img")
+    im_list = findall(path_img)
+    #for f in findall(path):
+    #    if os.path.isfile(f) and f not in im_list:
+    #        data_files.append(('media/models_media', [f]))
+    for f in im_list:
+        data_files.append(('media/models_media/img', [f]))
+    return data_files

sasmodels/models/porod.py

-                      rec45c4f
+                      r82923a6
 """
 from numpy import sqrt, power
+from numpy import sqrt, power, inf, errstate
 name = "porod"
 …
     @param q: Input q-value
     """
+    return 1.0/power(q, 4)
+    with errstate(divide='ignore'):
+        return power(q, -4)
 Iq.vectorized = True  # Iq accepts an array of q values
 …
 demo = dict(scale=1.5, background=0.5)
+tests = [[{'scale': 0.00001, 'background':0.01}, 0.04, 3.916250]]
+tests = [
+    [{'scale': 0.00001, 'background':0.01}, 0.04, 3.916250],
+    [{}, 0.0, inf],
+]

sasmodels/models/spherical_sld.c

-                      r299dcce
+                      r1bf66d9
+//Headers
+static double form_volume(double thick_inter[],
+    double thick_flat_[],
+    double core_radius,
+    int n_shells);
+double Iq(double q,
+static double form_volume(
     int n_shells,
+    double thick_inter[],
+    double func_inter[],
+    double sld_core,
+    double sld_solvent,
+    double sld_flat[],
+    double radius_core,
     double thick_flat[],
+    double nu_inter[],
+    int npts_inter,
+    double core_radius);
+double Iqxy(double qx, double qy,
+    int n_shells,
+    double thick_inter[],
+    double func_inter[],
+    double sld_core,
+    double sld_solvent,
+    double sld_flat[],
+    double thick_flat[],
+    double nu_inter[],
+    int npts_inter,
+    double core_radius);
+//Main code
+static double form_volume(double thick_inter[],
+    double thick_flat_[],
+    double core_radius,
+    int n)
+    double thick_inter[])
+{
-    double radius = 0.0;
     int i;
     double r = core_radius;
     for (i=0; i < n; i++) {
+    double r = radius_core;
+    for (i=0; i < n_shells; i++) {
         r += thick_inter[i];
         r += thick_flat[i];
 …
   double background = dp[6];
   double npts = dp[57]; //number of sub_layers in each interface
   double nsl=npts;//21.0; //nsl = Num_sub_layer:  MUST ODD number in double //no other number works now
+  double nsl=npts;//21.0; //nsl = Num_sub_layer:  must be ODD double number
   int n_s;
 …
   double pi;
-  //int* fun_type;
-  //double* sld;
-  //double* thick_inter;
-  //double* thick;
-  //double* fun_coef;
   double total_thick=0.0;
-  //fun_type = (int*)malloc((n+2)*sizeof(int));
-  //sld = (double*)malloc((n+2)*sizeof(double));
-  //thick_inter = (double*)malloc((n+2)*sizeof(double));
-  //thick = (double*)malloc((n+2)*sizeof(double));
-  //fun_coef = (double*)malloc((n+2)*sizeof(double));
-  //TODO: Solution to avoid mallocs but probablyu can be done better
   int fun_type[12];
   double sld[12];
 …
             if (fabs(slope) > 0.0 ){
               //fun = sign * 3.0 * r * (2.0*qr*sin(qr)-((qr*qr)-2.0)*cos(qr))/(qr * qr * qr * qr);
+              fun = sign * r * sph_j1c(qr)  +  sign * 3.0 * sin(qr)/(qr * qr * q ) + sign * 6.0 * cos(qr)/(qr * qr * qr * q);
+              fun = sign * r * sph_j1c(qr) + sign * 3.0 * sin(qr)/(qr * qr * q )
+                + sign * 6.0 * cos(qr)/(qr * qr * qr * q);
+            }
+          }
 …
+    }
+  }
-  //vol += vol_sub;
   f2 = f * f / vol;
-  //f2 *= scale;
-  //f2 += background;
-  //free(fun_type);
-  //free(sld);
-  //free(thick_inter);
-  //free(thick);
-  //free(fun_coef);
   return (f2);
 …
  * @return: function value
  */
 double Iq(double q,
+static double Iq(double q,
     int n_shells,
     double thick_inter[],
     double func_inter[],
+    int npts_inter,
+    double radius_core,
     double sld_core,
     double sld_solvent,
     double sld_flat[],
     double thick_flat[],
+    double nu_inter[],
+    int npts_inter,
+    double core_radius
+    ) {
+    double func_inter[],
+    double thick_inter[],
+    double nu_inter[] ) {
     //printf("Number of points %d\n",npts_inter);
     double intensity;
     //TODO: Remove this container at later stage. It is only kept to minimize stupid errors now
+    //TODO: Remove this container at later stage.
     double dp[60];
     dp[0] = n_shells;
     //This is scale will also have to be removed at some stage
     dp[1] = 1.0;
     dp[2] = thick_inter_0;
     dp[3] = func_inter_0;
+    dp[2] = thick_inter[0];
+    dp[3] = func_inter[0];
     dp[4] = sld_core;
     dp[5] = sld_solvent;
     dp[6] = 0.0;
+    for (i=0; i<n; i++){
+    dp[7] = sld_flat[0];
+    //TODO: Something is messed up with this data strcucture!
+    dp[17] = thick_inter[0];
+    dp[27] = thick_flat[0];
+    dp[37] = func_inter[0];
+    dp[47] = nu_inter[0];
+    for (int i=1; i<=n_shells; i++){
         dp[i+7] = sld_flat[i];
         dp[i+17] = thick_inter[i];
 …
     dp[57] = npts_inter;
     dp[58] = nu_inter_0;
     dp[59] = rad_core_0;
+    dp[58] = nu_inter[0];
+    dp[59] = radius_core;
     intensity = 1.0e-4*sphere_sld_kernel(dp,q);
 …
  * @return: function value
  */
+double Iqxy(double qx, double qy,
+/*static double Iqxy(double qx, double qy,
     int n_shells,
     double thick_inter[],
     double func_inter[],
+    int npts_inter,
+    double radius_core
     double sld_core,
     double sld_solvent,
     double sld_flat[],
     double thick_flat[],
+    double func_inter[],
+    double thick_inter[],
     double nu_inter[],
-    int npts_inter,
-    double core_radius
     ) {
     double q = sqrt(qx*qx + qy*qy);
+    return Iq(q, n_shells, thick_inter_0, func_inter_0, core0_sld, solvent_sld,
+    flat1_sld, flat2_sld, flat3_sld, flat4_sld, flat5_sld,
+    flat6_sld, flat7_sld, flat8_sld, flat9_sld, flat10_sld,
+    thick_inter_1, thick_inter_2, thick_inter_3, thick_inter_4, thick_inter_5,
+    thick_inter_6, thick_inter_7, thick_inter_8, thick_inter_9, thick_inter_10,
+    thick_flat_1, thick_flat_2, thick_flat_3, thick_flat_4, thick_flat_5,
+    thick_flat_6, thick_flat_7, thick_flat_8, thick_flat_9, thick_flat_10,
+    func_inter_1, func_inter_2, func_inter_3, func_inter_4, func_inter_5,
+    func_inter_6, func_inter_7, func_inter_8, func_inter_9, func_inter_10,
+    nu_inter_1, nu_inter_2, nu_inter_3, nu_inter_4, nu_inter_5,
+    nu_inter_6, nu_inter_7, nu_inter_8, nu_inter_9, nu_inter_10,
+    npts_inter, nu_inter_0, rad_core_0);
+    //TODO: Check if evalute rphi is not needed?
+}
+    return Iq(q, n_shells, npts_inter, radius_core, sld_core, sld_solvent,
+    sld_flat[], thick_flat[], func_inter[], thick_inter[], nu_inter[])
+}*/

sasmodels/models/spherical_sld.py

-                      rd2bb604
+                      r1bf66d9
 # pylint: disable=bad-whitespace, line-too-long
 #            ["name", "units", default, [lower, upper], "type", "description"],
+parameters = [["n",                "",               1,      [0, 9],         "", "number of shells"],
+parameters = [["n_shells",                "",               1,      [0, 9],         "", "number of shells"],
+              ["npts_inter",       "",               35,     [0, 35],        "", "number of points in each sublayer Must be odd number"],
               ["radius_core",      "Ang",            50.0,   [0, inf],       "", "intern layer thickness"],
               ["sld_core",         "1e-6/Ang^2",     2.07,   [-inf, inf],    "", "sld function flat"],
-              ["sld_flat[n]",      "1e-6/Ang^2",     4.06,   [-inf, inf],    "", "sld function flat"],
-              ["thick_flat[n]",    "Ang",            100.0,  [0, inf],       "", "flat layer_thickness"],
-              ["func_inter[n]",    "",               0,      [0, 4],         "", "Erf:0, RPower:1, LPower:2, RExp:3, LExp:4"],
-              ["thick_inter[n]",   "Ang",            50.0,   [0, inf],       "", "intern layer thickness"],
-              ["inter_nu[n]",      "",               2.5,    [-inf, inf],    "", "steepness parameter"],
-              ["npts_inter",       "",               35,     [0, 35],        "", "number of points in each sublayer Must be odd number"],
               ["sld_solvent",      "1e-6/Ang^2",     1.0,    [-inf, inf],    "", "sld function solvent"],
+              ["sld_flat[n_shells]",      "1e-6/Ang^2",     4.06,   [-inf, inf],    "", "sld function flat"],
+              ["thick_flat[n_shells]",    "Ang",            100.0,  [0, inf],       "", "flat layer_thickness"],
+              ["func_inter[n_shells]",    "",               0,      [0, 4],         "", "Erf:0, RPower:1, LPower:2, RExp:3, LExp:4"],
+              ["thick_inter[n_shells]",   "Ang",            50.0,   [0, inf],       "", "intern layer thickness"],
+              ["nu_inter[n_shells]",      "",               2.5,    [-inf, inf],    "", "steepness parameter"],
+              ]
 # pylint: enable=bad-whitespace, line-too-long
+#source = ["lib/librefl.c",  "lib/sph_j1c.c", "spherical_sld.c"]
+def Iq(q, *args, **kw):
+    return q
+def Iqxy(qx, *args, **kw):
+    return qx
+demo = dict(
+    n=4,
+    scale=1.0,
+    solvent_sld=1.0,
+    background=0.0,
+    npts_inter=35.0,
+    )
+source = ["lib/librefl.c",  "lib/sph_j1c.c", "spherical_sld.c"]
+#def Iq(q, *args, **kw):
+#    return q
+#def Iqxy(qx, *args, **kw):
+#    return qx
+def profile(n_shells, radius_core,  sld_core,  sld_solvent, sld_flat,
+            thick_flat, func_inter, thick_inter, nu_inter, npts_inter):
+    """
+    Returns shape profile with x=radius, y=SLD.
+    """
+    z = []
+    beta = []
+    z0 = 0
+    # two sld points for core
+    z.append(0)
+    beta.append(sld_core)
+    z.append(radius_core)
+    beta.append(sld_core)
+    z0 += radius_core
+    for i in range(1, n_shells+2):
+        dz = thick_inter[i-1]/npts_inter
+        # j=0 for interface, j=1 for flat layer
+        for j in range(0, 2):
+            # interation for sub-layers
+            for n_s in range(0, npts_inter+1):
+                if j == 1:
+                    if i == n_shells+1:
+                        break
+                    # shift half sub thickness for the first point
+                    z0 -= dz#/2.0
+                    z.append(z0)
+                    #z0 -= dz/2.0
+                    z0 += thick_flat[i]
+                    sld_i = sld_flat[i]
+                    beta.append(sld_flat[i])
+                    dz = 0
+                else:
+                    nu = nu_inter[i-1]
+                    # decide which sld is which, sld_r or sld_l
+                    if i == 1:
+                        sld_l = sld_core
+                    else:
+                        sld_l = sld_flat[i-1]
+                    if i == n_shells+1:
+                        sld_r = sld_solvent
+                    else:
+                        sld_r = sld_flat[i]
+                    # get function type
+                    func_idx = func_inter[i-1]
+                    # calculate the sld
+                    sld_i = intersldfunc(func_idx, npts_inter, n_s, nu,
+                                            sld_l, sld_r)
+                # append to the list
+                z.append(z0)
+                beta.append(sld_i)
+                z0 += dz
+                if j == 1:
+                    break
+    z.append(z0)
+    beta.append(sld_solvent)
+    z_ext = z0/5.0
+    z.append(z0+z_ext)
+    beta.append(sld_solvent)
+    # return sld profile (r, beta)
+    return np.asarray(z), np.asarray(beta)*1e-6
+def ER(core_radius, n, thickness):
+    return np.sum(thickness[:n[0]], axis=0) + core_radius
+def VR(core_radius, n, thickness):
+    return 1.0, 1.0
+demo = {
+    "n_shells":4,
+    "npts_inter":35.0,
+    "radius_core":50.0,
+    "sld_core":2.07,
+    "sld_solvent": 1.0,
+    "sld_flat":[4.0,3.5,4.0,3.5,4.0],
+    "thick_flat":[100.0,100.0,100.0,100.0,100.0],
+    "func_inter":[0,0,0,0,0],
+    "thick_inter":[50.0,50.0,50.0,50.0,50.0],
+    "nu_inter":[2.5,2.5,2.5,2.5,2.5]
+    }
 #TODO: Not working yet

sasmodels/generate.py

rf2f67a6	rae2b6b5
473	473	dll_code = load_template('kernel_iq.c')
474	474	ocl_code = load_template('kernel_iq.cl')
	475	#ocl_code = load_template('kernel_iq_local.cl')
475	476	user_code = [open(f).read() for f in model_sources(model_info)]
476	477

sasmodels/kernel_iq.c

-                      rf2f67a6
+                      rae2b6b5
   // Storage for the current parameter values.  These will be updated as we
   // walk the polydispersity cube.
   local ParameterBlock local_values;  // current parameter values
+  ParameterBlock local_values;  // current parameter values
   double *pvec = (double *)(&local_values);  // Alias named parameters with a vector
   double norm;
 …
     norm = CALL_VOLUME(local_values);
     const double scale = values[0];
     const double background = values[1];
     // result[nq] = norm; // Total volume normalization
+    double scale, background;
+    scale = values[0];
+    background = values[1];
     #ifdef USE_OPENMP
     #pragma omp parallel for
     #endif
     for (int i=0; i < nq; i++) {
       double scattering = CALL_IQ(q, i, local_values);
       result[i] = (norm>0. ? scale*scattering/norm + background : background);
+    for (int q_index=0; q_index < nq; q_index++) {
+      double scattering = CALL_IQ(q, q_index, local_values);
+      result[q_index] = (norm>0. ? scale*scattering/norm + background : background);
+    }
     return;
 …
 #if MAX_PD > 0
+  // If it is the first round initialize the result to zero, otherwise
+  // assume that the previous result has been passed back.
+  // Note: doing this even in the monodisperse case in order to handle the
+  // rare case where the model parameters are invalid and zero is returned.
+  // So slightly increased cost for slightly smaller code size.
+  // need product of weights at every Iq calc, so keep product of
+  // weights from the outer loops so that weight = partial_weight * fast_weight
+  double partial_weight; // product of weight w4*w3*w2 but not w1
+  double spherical_correction; // cosine correction for latitude variation
+  double weight; // product of partial_weight*w1*spherical_correction
+  // Location in the polydispersity hypercube, one index per dimension.
+  int pd_index[MAX_PD];
+  // Location of the coordinated parameters in their own sub-cubes.
+  int offset[NPARS];
+  // Number of coordinated indices
+  const int num_coord = details->num_coord;
+  // Number of elements in the longest polydispersity loop
+  const int fast_length = details->pd_length[0];
+  // Trigger the reset behaviour that happens at the end the fast loop
+  // by setting the initial index >= weight vector length.
+  pd_index[0] = fast_length;
+  // Default the spherical correction to 1.0 in case it is not otherwise set
+  spherical_correction = 1.0;
+  // Since we are no longer looping over the entire polydispersity hypercube
+  // for each q, we need to track the result and normalization values between
+  // calls.  This means initializing them to 0 at the start and accumulating
+  // them between calls.
+  norm = pd_start == 0 ? 0.0 : result[nq];
   if (pd_start == 0) {
     #ifdef USE_OPENMP
     #pragma omp parallel for
     #endif
+    for (int i=0; i < nq+1; i++) {
+      result[i] = 0.0;
+    }
+    norm = 0.0;
+  } else {
+    norm = result[nq];
+  }
+  // need product of weights at every Iq calc, so keep product of
+  // weights from the outer loops so that weight = partial_weight * fast_weight
+  double partial_weight = NAN; // product of weight w4*w3*w2 but not w1
+  double spherical_correction = 1.0;  // cosine correction for latitude variation
+  // Location in the polydispersity hypercube, one index per dimension.
+  local int pd_index[MAX_PD];
+  // Location of the coordinated parameters in their own sub-cubes.
+  local int offset[NPARS];
+  // Trigger the reset behaviour that happens at the end the fast loop
+  // by setting the initial index >= weight vector length.
+  const int fast_length = details->pd_length[0];
+  pd_index[0] = fast_length;
+  // Number of coordinated indices
+  const int num_coord = details->num_coord;
+    for (int q_index=0; q_index < nq; q_index++) {
+      result[q_index] = 0.0;
+    }
+  }
   // Loop over the weights then loop over q, accumulating values
 …
+    }
+    // Increment fast index
+    const double wi = weights[details->pd_offset[0] + pd_index[0]++];
+    double weight = partial_weight*wi;
+    // Update fast parameters
     //printf("fast %d: ", loop_index);
     for (int k=0; k < num_coord; k++) {
 …
     //printf("\n");
+    // Increment fast index
+    const double wi = weights[details->pd_offset[0] + pd_index[0]];
+    weight = partial_weight*wi;
+    pd_index[0]++;
     #ifdef INVALID
     if (INVALID(local_values)) continue;
 …
       #pragma omp parallel for
       #endif
+      for (int i=0; i < nq; i++) {
+        const double scattering = CALL_IQ(q, i, local_values);
+        result[i] += weight*scattering;
+      }
+    }
+  }
+  // End of the PD loop we can normalize
+      for (int q_index=0; q_index < nq; q_index++) {
+        const double scattering = CALL_IQ(q, q_index, local_values);
+        result[q_index] += weight*scattering;
+      }
+    }
+  }
   if (pd_stop >= details->total_pd) {
+    const double scale = values[0];
+    const double background = values[1];
+    // End of the PD loop we can normalize
+    double scale, background;
+    scale = values[0];
+    background = values[1];
     #ifdef USE_OPENMP
     #pragma omp parallel for
     #endif
     for (int i=0; i < nq; i++) {
       result[i] = (norm>0. ? scale*result[i]/norm + background : background);
+    for (int q_index=0; q_index < nq; q_index++) {
+      result[q_index] = (norm>0. ? scale*result[q_index]/norm + background : background);
+    }
+  }

sasmodels/kernel_iq.cl

-                      rf2f67a6
+                      rae2b6b5
+    )
+{
-  double norm;
-  // who we are and what element we are working with
-  const int q_index = get_global_id(0);
-  // number of active loops
-  const int num_active = details->num_active;
   // Storage for the current parameter values.  These will be updated as we
   // walk the polydispersity cube.
   ParameterBlock local_values;  // current parameter values
   double *pvec = (double *)(&local_values);  // Alias named parameters with a vector
+  double norm;
+  // who we are and what element we are working with
+  const int q_index = get_global_id(0);
+  // number of active loops
+  const int num_active = details->num_active;
   // Fill in the initial variables
   for (int k = 0; k < NPARS; k++) {
+  for (int k=0; k < NPARS; k++) {
     pvec[k] = values[details->par_offset[k]];
+  }
 …
     if (INVALID(local_values)) { return; }
     #endif
+    norm = CALL_VOLUME(local_values);
     double scale, background;
-    norm = CALL_VOLUME(local_values);
     scale = values[0];
     background = values[1];
-    // if (i==0) result[nq] = norm; // Total volume normalization
     if (q_index < nq) {
 …
 #if MAX_PD > 0
-  // If it is the first round initialize the result to zero, otherwise
-  // assume that the previous result has been passed back.
-  // Note: doing this even in the monodisperse case in order to handle the
-  // rare case where the model parameters are invalid and zero is returned.
-  // So slightly increased cost for slightly smaller code size.
   double this_result;
 …
   // weights from the outer loops so that weight = partial_weight * fast_weight
   double partial_weight; // product of weight w4*w3*w2 but not w1
+  double spherical_correction;  // cosine correction for latitude variation
+  double spherical_correction; // cosine correction for latitude variation
+  double weight; // product of partial_weight*w1*spherical_correction
   // Location in the polydispersity hypercube, one index per dimension.
 …
   int offset[NPARS];
+  // Number of coordinated indices
+  const int num_coord = details->num_coord;
   // Number of elements in the longest polydispersity loop
   const int fast_length = details->pd_length[0];
-  // Number of coordinated indices
-  const int num_coord = details->num_coord;
-  // We could in theory spread this work across different threads, but
-  // lets keep it simple;
-  norm = pd_start == 0 ? 0.0 : result[nq];
-  spherical_correction = 1.0;  // the usual case.
-  // partial_weight = NAN;
   // Trigger the reset behaviour that happens at the end the fast loop
   // by setting the initial index >= weight vector length.
   pd_index[0] = fast_length;
+  // Default the spherical correction to 1.0 in case it is not otherwise set
+  spherical_correction = 1.0;
   // Since we are no longer looping over the entire polydispersity hypercube
 …
   // calls.  This means initializing them to 0 at the start and accumulating
   // them between calls.
+  norm = pd_start == 0 ? 0.0 : result[nq];
   if (q_index < nq) {
     this_result = pd_start == 0 ? 0.0 : result[q_index];
 …
       // Compute position in polydispersity hypercube
       for (int k=0; k < num_active; k++) {
           pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k];
           //printf("pd_index[%d] = %d\n",k,pd_index[k]);
+        pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k];
+        //printf("pd_index[%d] = %d\n",k,pd_index[k]);
+      }
 …
       //printf("slow %d: ", loop_index);
       for (int k=0; k < num_coord; k++) {
+        if (k < num_coord) {
+          int par = details->par_coord[k];
+          int coord = details->pd_coord[k];
+          int this_offset = details->par_offset[par];
+          int block_size = 1;
+          for (int bit=0; coord != 0; bit++) {
+            if (coord&1) {
+                this_offset += block_size * pd_index[bit];
+                block_size *= details->pd_length[bit];
+            }
+            coord >>= 1;
+        int par = details->par_coord[k];
+        int coord = details->pd_coord[k];
+        int this_offset = details->par_offset[par];
+        int block_size = 1;
+        for (int bit=0; coord != 0; bit++) {
+          if (coord&1) {
+              this_offset += block_size * pd_index[bit];
+              block_size *= details->pd_length[bit];
+          }
+          offset[par] = this_offset;
+          pvec[par] = values[this_offset];
+          //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]);
+          // if theta is not coordinated with fast index, precompute spherical correction
+          if (par == details->theta_par && !(details->par_coord[k]&1)) {
+            spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6);
+          }
+          coord >>= 1;
+        }
+        offset[par] = this_offset;
+        pvec[par] = values[this_offset];
+        //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]);
+        // if theta is not coordinated with fast index, precompute spherical correction
+        if (par == details->theta_par && !(details->par_coord[k]&1)) {
+          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6);
+        }
+      }
       //printf("\n");
+    }
+    double weight;
+    // Update fast parameters
+    //printf("fast %d: ", loop_index);
+    for (int k=0; k < num_coord; k++) {
+      if (details->pd_coord[k]&1) {
+        const int par = details->par_coord[k];
+        pvec[par] = values[offset[par]++];
+        //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]);
+        // if theta is coordinated with fast index, compute spherical correction each time
+        if (par == details->theta_par) {
+          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6);
+        }
+      }
+    }
+    //printf("\n");
+    // Increment fast index
     const double wi = weights[details->pd_offset[0] + pd_index[0]];
     weight = partial_weight*wi;
     pd_index[0]++;
-    // Increment fast index
-    //printf("fast %d: ", loop_index);
-    for (int k=0; k < num_coord; k++) {
-      if (k < num_coord) {
-        if (details->pd_coord[k]&1) {
-          const int par = details->par_coord[k];
-          pvec[par] = values[offset[par]++];
-          //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]);
-          // if theta is coordinated with fast index, compute spherical correction each time
-          if (par == details->theta_par) {
-            spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6);
+          }
+        }
+      }
+    }
-    //printf("\n");
     #ifdef INVALID
 …
     if (pd_stop >= details->total_pd) {
       // End of the PD loop we can normalize
+      const double scale = values[0];
+      const double background = values[1];
+      double scale, background;
+      scale = values[0];
+      background = values[1];
       result[q_index] = (norm>0. ? scale*this_result/norm + background : background);
     } else {
 …
       result[q_index] = this_result;
+    }
+    // Accumulate norm.
+    // Remember the updated norm.
     if (q_index == 0) result[nq] = norm;
+  }

sasmodels/kernelcl.py

-                      rf2f67a6
+                      rae2b6b5
                              hostbuf=values)
+        start, stop = 0, call_details.total_pd
+        args = [
+            np.uint32(self.q_input.nq), np.int32(start), np.int32(stop),
+            details_b, weights_b, values_b, self.q_input.q_b, self.result_b,
+            self.real(cutoff),
+        ]
+        self.kernel(self.queue, self.q_input.global_size, None, *args)
+        # Call kernel and retrieve results
+        step = 100
+        for start in range(0, call_details.total_pd, step):
+            stop = min(start+step, call_details.total_pd)
+            args = [
+                np.uint32(self.q_input.nq), np.int32(start), np.int32(stop),
+                details_b, weights_b, values_b, self.q_input.q_b, self.result_b,
+                self.real(cutoff),
+            ]
+            self.kernel(self.queue, self.q_input.global_size, None, *args)
         cl.enqueue_copy(self.queue, self.result, self.result_b)
+        # Free buffers
         for v in (details_b, weights_b, values_b):
             if v is not None: v.release()

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: