← Previous Changeset
Next Changeset →

Changeset 5ff1b03 in sasmodels

Timestamp:

Mar 25, 2016 11:44:37 AM (9 years ago)

Author:

Paul Kienzle <pkienzle@…>

Branches:

master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests

Children:

Parents:

Message:

working kerneldll

Location:

Files:

: 4 edited

core.py (modified) (2 diffs)
generate.py (modified) (2 diffs)
kernel_iq.c (modified) (5 diffs)
kerneldll.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

sasmodels/core.py

-                      r380e8c9
+                      r5ff1b03
     value = values.get(parameter.name, parameter.default)
     if parameter.type not in ('volume', 'orientation'):
         return [value], [1.0]
+        return [value], []
     relative = parameter.type == 'volume'
     limits = parameter.limits
 …
         active = lambda name: True
     vw_pairs = [(get_weights(p, pars) if active(p.name) else ([p.default], [1.0]))
+    vw_pairs = [(get_weights(p, pars) if active(p.name) else ([p.default], []))
                 for p in kernel.info['parameters']]
     values, weights = zip(*vw_pairs)

sasmodels/generate.py

-                      r380e8c9
+                      r5ff1b03
         # faster by not using/transferring the volume normalizations, but
         # the ifdef's reduce readability more than is worthwhile.
         call_volume = "#define CALL_VOLUME(v) 0.0"
+        call_volume = "#define CALL_VOLUME(v) 1.0"
     source.append(call_volume)
 …
     model_info['max_pd'] = min(partable.num_pd, MAX_PD)
+class CoordinationDetails(object):
+    def __init__(self, model_info):
+        max_pd = model_info['max_pd']
+        npars = len(model_info['parameters'].kernel_pars())
+        par_offset = 4*max_pd
+        self.details = np.zeros(par_offset + 3*npars + 4, 'i4')
+        # generate views on different parts of the array
+        self._pd_par     = self.details[0*max_pd:1*max_pd]
+        self._pd_length  = self.details[1*max_pd:2*max_pd]
+        self._pd_offset  = self.details[2*max_pd:3*max_pd]
+        self._pd_stride  = self.details[3*max_pd:4*max_pd]
+        self._par_offset = self.details[par_offset+0*npars:par_offset+1*npars]
+        self._par_coord  = self.details[par_offset+1*npars:par_offset+2*npars]
+        self._pd_coord   = self.details[par_offset+2*npars:par_offset+3*npars]
+        # theta_par is fixed
+        self.details[-1] = model_info['parameters'].theta_par
+    @property
+    def ctypes(self): return self.details.ctypes
+    @property
+    def pd_par(self): return self._pd_par
+    @property
+    def pd_length(self): return self._pd_length
+    @property
+    def pd_offset(self): return self._pd_offset
+    @property
+    def pd_stride(self): return self._pd_stride
+    @property
+    def pd_coord(self): return self._pd_coord
+    @property
+    def par_coord(self): return self._par_coord
+    @property
+    def par_offset(self): return self._par_offset
+    @property
+    def num_coord(self): return self.details[-2]
+    @num_coord.setter
+    def num_coord(self, v): self.details[-2] = v
+    @property
+    def total_pd(self): return self.details[-3]
+    @total_pd.setter
+    def total_pd(self, v): self.details[-3] = v
+    @property
+    def num_active(self): return self.details[-4]
+    @num_active.setter
+    def num_active(self, v): self.details[-4] = v
+    def show(self):
+        print("total_pd", self.total_pd)
+        print("num_active", self.num_active)
+        print("pd_par", self.pd_par)
+        print("pd_length", self.pd_length)
+        print("pd_offset", self.pd_offset)
+        print("pd_stride", self.pd_stride)
+        print("par_offsets", self.par_offset)
+        print("num_coord", self.num_coord)
+        print("par_coord", self.par_coord)
+        print("pd_coord", self.pd_coord)
+        print("theta par", self.details[-1])
 def mono_details(model_info):
+    # TODO: move max_pd into ParameterTable?
+    max_pd = model_info['max_pd']
+    pars = model_info['parameters'].kernel_pars()
+    npars = len(pars)
+    par_offset = 5*max_pd
+    constants_offset = par_offset + 3*npars
+    details = np.zeros(constants_offset + 2, 'int32')
+    details[0*max_pd:1*max_pd] = range(max_pd)       # pd_par: arbitrary order; use first
+    details[1*max_pd:2*max_pd] = [1]*max_pd          # pd_length: only one element
+    details[2*max_pd:3*max_pd] = range(max_pd)       # pd_offset: consecutive 1.0 weights
+    details[3*max_pd:4*max_pd] = [1]*max_pd          # pd_stride: vectors of length 1
+    details[4*max_pd:5*max_pd] = [0]*max_pd          # pd_isvol: doens't matter if no norm
+    details[par_offset+0*npars:par_offset+1*npars] = range(2, npars+2) # par_offset: skip scale and background
+    details[par_offset+1*npars:par_offset+2*npars] = [0]*npars         # no coordination
+    #details[p+npars] = 1 # par_coord[0] is coordinated with the first par?
+    details[par_offset+2*npars:par_offset+3*npars] = 0 # fast coord with 0
+    details[constants_offset]   = 1     # fast_coord_count: one fast index
+    details[constants_offset+1] = -1    # theta_par: None
+    details = CoordinationDetails(model_info)
+    # The zero defaults for monodisperse systems are mostly fine
+    details.par_offset[:] = np.arange(2, len(details.par_offset)+2)
     return details
 def poly_details(model_info, weights):
     weights = weights[2:]
-    # TODO: move max_pd into ParameterTable?
     max_pd = model_info['max_pd']
-    pars = model_info['parameters'].kernel_pars()
-    npars = len(pars)
-    par_offset = 5*max_pd
-    constants_offset = par_offset + 3*npars
     # Decreasing list of polydispersity lengths
     # Note: the reversing view, x[::-1], does not require a copy
     pd_length = np.array([len(w) for w in weights])
+    num_active = np.sum(pd_length>1)
+    if num_active > max_pd:
+        raise ValueError("Too many polydisperse parameters")
     pd_offset = np.cumsum(np.hstack((0, pd_length)))
+    pd_isvol = np.array([p.type=='volume' for p in pars])
+    idx = np.argsort(pd_length)[::-1][:max_pd]
+    pd_stride = np.cumprod(np.hstack((1, pd_length[idx][:-1])))
+    par_offsets = np.cumsum(np.hstack((2, pd_length)))[:-1]
+    coord_offset = par_offset+npars
+    fast_coord_offset = par_offset+2*npars
+    theta_par = -1
+    if 'theta_par' in model_info:
+        theta_par = model_info['theta_par']
+        if theta_par >= 0 and pd_length[theta_par] <= 1:
+            theta_par = -1
+    details = np.empty(constants_offset + 2, 'int32')
+    details[0*max_pd:1*max_pd] = idx             # pd_par
+    details[1*max_pd:2*max_pd] = pd_length[idx]
+    details[2*max_pd:3*max_pd] = pd_offset[idx]
+    details[3*max_pd:4*max_pd] = pd_stride
+    details[4*max_pd:5*max_pd] = pd_isvol[idx]
+    details[par_offset+0*npars:par_offset+1*npars] = par_offsets
+    details[par_offset+1*npars:par_offset+2*npars] = 0  # no coordination for most
+    for k,parameter_num in enumerate(idx):
+        details[coord_offset+parameter_num] = 2**k
+    details[fast_coord_offset] = idx[0]
+    details[fast_coord_offset+1:fast_coord_offset+npars] = 0  # no fast coord with 0
+    details[constants_offset] = 1   # fast_coord_count: one fast index
+    details[constants_offset+1] = theta_par
+    print("polydispersity details")
+    print_details(model_info, details)
+    idx = np.argsort(pd_length)[::-1][:num_active]
+    par_length = np.array([max(len(w),1) for w in weights])
+    pd_stride = np.cumprod(np.hstack((1, par_length[idx])))
+    par_offsets = np.cumsum(np.hstack((2, par_length)))
+    details = CoordinationDetails(model_info)
+    details.pd_par[:num_active] = idx
+    details.pd_length[:num_active] = pd_length[idx]
+    details.pd_offset[:num_active] = pd_offset[idx]
+    details.pd_stride[:num_active] = pd_stride[:-1]
+    details.par_offset[:] = par_offsets[:-1]
+    details.total_pd = pd_stride[-1]
+    details.num_active = num_active
+    # Without constraints coordinated parameters are just the pd parameters
+    details.par_coord[:num_active] = idx
+    details.pd_coord[:num_active] = 2**np.arange(num_active)
+    details.num_coord = num_active
+    #details.show()
     return details
-def print_details(model_info, details):
-    max_pd = model_info['max_pd']
-    pars = model_info['parameters'].kernel_pars()
-    npars = len(pars)
-    par_offset = 5*max_pd
-    constants_offset = par_offset + 3*npars
-    print("pd_par", details[0*max_pd:1*max_pd])
-    print("pd_length", details[1*max_pd:2*max_pd])
-    print("pd_offset", details[2*max_pd:3*max_pd])
-    print("pd_stride", details[3*max_pd:4*max_pd])
-    print("pd_isvol", details[4*max_pd:5*max_pd])
-    print("par_offsets", details[par_offset+0*npars:par_offset+1*npars])
-    print("par_coord", details[par_offset+1*npars:par_offset+2*npars])
-    print("fast_coord_pars", details[par_offset+2*npars:par_offset+3*npars])
-    print("fast_coord_count", details[constants_offset])
-    print("theta par", details[constants_offset+1])
 def constrained_poly_details(model_info, weights, constraints):

sasmodels/kernel_iq.c

-                      rba32cdd
+                      r5ff1b03
     int32_t pd_offset[MAX_PD];  // offset of pd weights in the value & weight vector
     int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level
-    int32_t pd_isvol[MAX_PD];   // True if parameter is a volume weighting parameter
 #endif // MAX_PD > 0
+    int32_t par_offset[NPARS];  // offset of par values in the value & weight vector
+    int32_t par_coord[NPARS];   // polydispersity coordination bitvector
+    int32_t fast_coord_pars[NPARS]; // ids of the fast coordination parameters
+    int32_t fast_coord_count;   // number of parameters coordinated with pd 1
+    int32_t par_offset[NPARS];  // offset of par value blocks in the value & weight vector
+    int32_t par_coord[NPARS];   // ids of the coordination parameters
+    int32_t pd_coord[NPARS];    // polydispersity coordination bitvector
+    int32_t num_active;         // number of non-trivial pd loops
+    int32_t total_pd;           // total number of voxels in hypercube
+    int32_t num_coord;          // number of coordinated parameters
     int32_t theta_par;          // id of spherical correction variable
 } ProblemDetails;
 …
   double *pvec = (double *)(&local_values);  // Alias named parameters with a vector
+  // Monodisperse computation
+  if (pd_stop == 1) {
+    // Shouldn't need to copy!!
+    for (int k=0; k < NPARS; k++) {
+      pvec[k] = values[k+2];  // skip scale and background
+    }
+    const double volume = CALL_VOLUME(local_values);
+  // Fill in the initial variables
+  #ifdef USE_OPENMP
+  #pragma omp parallel for
+  #endif
+  for (int k=0; k < NPARS; k++) {
+    pvec[k] = values[problem->par_offset[k]];
+  }
+  // If it is the first round initialize the result to zero, otherwise
+  // assume that the previous result has been passed back.
+  // Note: doing this even in the monodisperse case in order to handle the
+  // rare case where the model parameters are invalid and zero is returned.
+  // So slightly increased cost for slightly smaller code size.
+  if (pd_start == 0) {
     #ifdef USE_OPENMP
     #pragma omp parallel for
     #endif
+    for (int i=0; i < nq+1; i++) {
+      result[i] = 0.0;
+    }
+  }
+  // Monodisperse computation
+  if (problem->num_active == 0) {
+    #ifdef INVALID
+    if (INVALID(local_values)) { return; }
+    #endif
+    const double norm = CALL_VOLUME(local_values);
+    #ifdef USE_OPENMP
+    #pragma omp parallel for
+    #endif
+    result[nq] = norm; // Total volume normalization
     for (int i=0; i < nq; i++) {
       double scattering = CALL_IQ(q, i, local_values);
+      if (volume != 0.0) scattering /= volume;
+      result[i] = values[0]*scattering + values[1];
+      result[i] = values[0]*scattering/norm + values[1];
+    }
     return;
 …
 #if MAX_PD > 0
+  //printf("Entering polydispersity\n");
+  //printf("Entering polydispersity from %d to %d\n", pd_start, pd_stop);
   // Since we are no longer looping over the entire polydispersity hypercube
+  // for each q, we need to track the normalization values for each q in a
+  // separate work vector.
+  double norm;   // contains sum over weights
+  double vol; // contains sum over volume
+  double norm_vol; // contains weights over volume
+  // Initialize the results to zero
+  if (pd_start == 0) {
+    norm_vol = 0.0;
+    norm = 0.0;
+    vol = 0.0;
+    #ifdef USE_OPENMP
+    #pragma omp parallel for
+    #endif
+    for (int i=0; i < nq; i++) {
+      result[i] = 0.0;
+    }
+  } else {
+    //Pulling values from previous segment
+    norm = result[nq];
+    vol = result[nq+1];
+    norm_vol = result[nq+2];
+  }
+  // Location in the polydispersity hypercube, one index per dimension.
+  local int pd_index[MAX_PD];
+  // polydispersity loop index positions
+  local int offset[NPARS];  // NPARS excludes scale/background
+  // Trigger the reset behaviour that happens at the end the fast loop
+  // by setting the initial index >= weight vector length.
+  pd_index[0] = problem->pd_length[0];
+  // for each q, we need to track the normalization values between calls.
+  double norm = 0.0;
   // need product of weights at every Iq calc, so keep product of
   // weights from the outer loops so that weight = partial_weight * fast_weight
   double partial_weight = NAN; // product of weight w4*w3*w2 but not w1
+  double partial_volweight = NAN;
+  double weight = 1.0;        // set to 1 in case there are no weights
+  double vol_weight = 1.0;    // set to 1 in case there are no vol weights
+  double spherical_correction = 1.0;  // correction for latitude variation
+  double spherical_correction = 1.0;  // cosine correction for latitude variation
+  // Location in the polydispersity hypercube, one index per dimension.
+  local int pd_index[MAX_PD];
+  // Location of the coordinated parameters in their own sub-cubes.
+  local int offset[NPARS];
+  // Trigger the reset behaviour that happens at the end the fast loop
+  // by setting the initial index >= weight vector length.
+  const int fast_length = problem->pd_length[0];
+  pd_index[0] = fast_length;
   // Loop over the weights then loop over q, accumulating values
   for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) {
     // check if indices need to be updated
+    if (pd_index[0] >= problem->pd_length[0]) {
+      // RESET INDICES
+      pd_index[0] = loop_index%problem->pd_length[0];
+    if (pd_index[0] == fast_length) {
+      //printf("should be here with %d active\n", problem->num_active);
+      // Compute position in polydispersity hypercube
+      for (int k=0; k < problem->num_active; k++) {
+        pd_index[k] = (loop_index/problem->pd_stride[k])%problem->pd_length[k];
+        //printf("pd_index[%d] = %d\n",k,pd_index[k]);
+      }
+      // Compute partial weights
       partial_weight = 1.0;
       partial_volweight = 1.0;
       for (int k=1; k < MAX_PD; k++) {
         pd_index[k] = (loop_index%problem->pd_length[k])/problem->pd_stride[k];
         const double wi = weights[problem->pd_offset[k]+pd_index[k]];
+      //printf("partial weight %d: ", loop_index);
+      for (int k=1; k < problem->num_active; k++) {
+        double wi = weights[problem->pd_offset[k] + pd_index[k]];
+        //printf("pd[%d]=par[%d]=%g ", k, problem->pd_par[k], wi);
         partial_weight *= wi;
+        if (problem->pd_isvol[k]) partial_volweight *= wi;
+      }
+      }
+      //printf("\n");
+      // Update parameter offsets in weight vector
       //printf("slow %d: ", loop_index);
+      for (int k=0; k < NPARS; k++) {
+        int coord = problem->par_coord[k];
+        int this_offset = problem->par_offset[k];
+      for (int k=0; k < problem->num_coord; k++) {
+        int par = problem->par_coord[k];
+        int coord = problem->pd_coord[k];
+        int this_offset = problem->par_offset[par];
         int block_size = 1;
         for (int bit=0; bit < MAX_PD && coord != 0; bit++) {
+        for (int bit=0; coord != 0; bit++) {
           if (coord&1) {
               this_offset += block_size * pd_index[bit];
               block_size *= problem->pd_length[bit];
+          }
           coord /= 2;
+          coord >>= 1;
+        }
+        offset[k] = this_offset;
+        pvec[k] = values[this_offset];
+        //printf("p[%d]=v[%d]=%g ", k, offset[k], pvec[k]);
+        offset[par] = this_offset;
+        pvec[par] = values[this_offset];
+        //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]);
+        // if theta is not coordinated with fast index, precompute spherical correction
+        if (par == problem->theta_par && !(problem->par_coord[k]&1)) {
+          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[problem->theta_par])), 1e-6);
+        }
+      }
       //printf("\n");
+      weight = partial_weight * weights[problem->pd_offset[0]+pd_index[0]];
+      if (problem->theta_par >= 0) {
+        spherical_correction = fabs(cos(M_PI_180*pvec[problem->theta_par]));
+      }
+      if (problem->theta_par == problem->pd_par[0]) {
+        weight *= spherical_correction;
+      }
+      pd_index[0] += 1;
+    } else {
+      // INCREMENT INDICES
+      const double wi = weights[problem->pd_offset[0]+pd_index[0]];
+      weight = partial_weight*wi;
+      if (problem->pd_isvol[0]) vol_weight *= wi;
+      //printf("fast %d: ", loop_index);
+      for (int k=0; k < problem->fast_coord_count; k++) {
+        const int pindex = problem->fast_coord_pars[k];
+        pvec[pindex] = values[++offset[pindex]];
+        //printf("p[%d]=v[%d]=%g ", pindex, offset[pindex], pvec[pindex]);
+      }
+      //printf("\n");
+      if (problem->theta_par == problem->pd_par[0]) {
+        weight *= fabs(cos(M_PI_180*pvec[problem->theta_par]));
+      }
+      pd_index[0] += 1;
+    }
+    }
+    // Increment fast index
+    const double wi = weights[problem->pd_offset[0] + pd_index[0]++];
+    double weight = partial_weight*wi;
+    //printf("fast %d: ", loop_index);
+    for (int k=0; k < problem->num_coord; k++) {
+      if (problem->pd_coord[k]&1) {
+        const int par = problem->par_coord[k];
+        pvec[par] = values[offset[par]++];
+        //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]);
+        // if theta is coordinated with fast index, compute spherical correction each time
+        if (par == problem->theta_par) {
+          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[problem->theta_par])), 1e-6);
+        }
+      }
+    }
+    //printf("\n");
     #ifdef INVALID
     if (INVALID(local_values)) continue;
 …
     // Note: weight==0 must always be excluded
     if (weight > cutoff) {
+      norm += weight;
+      vol += vol_weight * CALL_VOLUME(local_values);
+      norm_vol += vol_weight;
+      // spherical correction has some nasty effects when theta is +90 or -90
+      // where it becomes zero.  If the entirety of the correction
+      weight *= spherical_correction;
+      norm += weight * CALL_VOLUME(local_values);
       #ifdef USE_OPENMP
 …
   // Make normalization available for the next round
+  result[nq] = norm;
+  result[nq+1] = vol;
+  result[nq+2] = norm_vol;
+  result[nq] += norm;
   // End of the PD loop we can normalize
   if (pd_stop >= problem->pd_stride[MAX_PD-1]) {
+  if (pd_stop >= problem->total_pd) {
     #ifdef USE_OPENMP
     #pragma omp parallel for
     #endif
     for (int i=0; i < nq; i++) {
-      if (vol*norm_vol != 0.0) {
-        result[i] *= norm_vol/vol;
+      }
       result[i] = values[0]*result[i]/norm + values[1];
+    }

sasmodels/kerneldll.py

-                      r151f3bc
+                      r5ff1b03
                 else np.float64 if self.q_input.dtype == generate.F64
                 else np.float128)
         assert details.dtype == np.int32
+        assert isinstance(details, generate.CoordinationDetails)
         assert weights.dtype == real and values.dtype == real
         max_pd = self.info['max_pd']
+        start, stop = 0, details[4*max_pd-1]
+        print("in kerneldll")
+        print("details", details)
+        print("weights", weights)
+        print("values", values)
+        start, stop = 0, details.total_pd
+        #print("in kerneldll")
+        #print("weights", weights)
+        #print("values", values)
         args = [
             self.q_input.nq, # nq

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: