Changeset a738209 in sasmodels


Ignore:
Timestamp:
Jul 15, 2016 7:33:33 AM (8 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
def2c1b
Parents:
98ba1fc
Message:

simplify kernels by remove coordination parameter logic

Location:
sasmodels
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/compare.py

    rf2f67a6 ra738209  
    460460    Return a model calculator using the OpenCL calculation engine. 
    461461    """ 
    462     try: 
    463         model = core.build_model(model_info, dtype=dtype, platform="ocl") 
    464     except Exception as exc: 
    465         print(exc) 
    466         print("... trying again with single precision") 
    467         model = core.build_model(model_info, dtype='single', platform="ocl") 
     462    if not core.HAVE_OPENCL: 
     463        raise RuntimeError("OpenCL not available") 
     464    model = core.build_model(model_info, dtype=dtype, platform="ocl") 
    468465    calculator = DirectModel(data, model, cutoff=cutoff) 
    469466    calculator.engine = "OCL%s"%DTYPE_MAP[dtype] 
  • sasmodels/details.py

    r0ff62d4 ra738209  
     1from __future__ import print_function 
     2 
    13import numpy as np  # type: ignore 
    24 
     
    1517        parameters = model_info.parameters 
    1618        max_pd = parameters.max_pd 
    17         npars = parameters.npars 
    18         par_offset = 4*max_pd 
    19         self.buffer = np.zeros(par_offset + 3 * npars + 4, 'i4') 
     19 
     20        # Structure of the call details buffer: 
     21        #   pd_par[max_pd]     pd params in order of length 
     22        #   pd_length[max_pd]  length of each pd param 
     23        #   pd_offset[max_pd]  offset of pd values in parameter array 
     24        #   pd_stride[max_pd]  index of pd value in loop = n//stride[k] 
     25        #   pd_prod            total length of pd loop 
     26        #   pd_sum             total length of the weight vector 
     27        #   num_active         number of pd params 
     28        #   theta_par          parameter number for theta parameter 
     29        self.buffer = np.zeros(4*max_pd + 4, 'i4') 
    2030 
    2131        # generate views on different parts of the array 
     
    2434        self._pd_offset  = self.buffer[2 * max_pd:3 * max_pd] 
    2535        self._pd_stride  = self.buffer[3 * max_pd:4 * max_pd] 
    26         self._par_offset = self.buffer[par_offset + 0 * npars:par_offset + 1 * npars] 
    27         self._par_coord  = self.buffer[par_offset + 1 * npars:par_offset + 2 * npars] 
    28         self._pd_coord   = self.buffer[par_offset + 2 * npars:par_offset + 3 * npars] 
    2936 
    3037        # theta_par is fixed 
    31         self.buffer[-1] = parameters.theta_offset 
     38        self.theta_par = parameters.theta_offset 
    3239 
    3340    @property 
     
    4451 
    4552    @property 
    46     def pd_coord(self): return self._pd_coord 
     53    def pd_prod(self): return self.buffer[-4] 
     54    @pd_prod.setter 
     55    def pd_prod(self, v): self.buffer[-4] = v 
    4756 
    4857    @property 
    49     def par_coord(self): return self._par_coord 
     58    def pd_sum(self): return self.buffer[-3] 
     59    @pd_sum.setter 
     60    def pd_sum(self, v): self.buffer[-3] = v 
    5061 
    5162    @property 
    52     def par_offset(self): return self._par_offset 
    53  
    54     @property 
    55     def num_active(self): return self.buffer[-4] 
     63    def num_active(self): return self.buffer[-2] 
    5664    @num_active.setter 
    57     def num_active(self, v): self.buffer[-4] = v 
    58  
    59     @property 
    60     def total_pd(self): return self.buffer[-3] 
    61     @total_pd.setter 
    62     def total_pd(self, v): self.buffer[-3] = v 
    63  
    64     @property 
    65     def num_coord(self): return self.buffer[-2] 
    66     @num_coord.setter 
    67     def num_coord(self, v): self.buffer[-2] = v 
     65    def num_active(self, v): self.buffer[-2] = v 
    6866 
    6967    @property 
    7068    def theta_par(self): return self.buffer[-1] 
     69    @theta_par.setter 
     70    def theta_par(self, v): self.buffer[-1] = v 
    7171 
    7272    def show(self): 
    73         print("total_pd", self.total_pd) 
    7473        print("num_active", self.num_active) 
     74        print("pd_prod", self.pd_prod) 
     75        print("pd_sum", self.pd_sum) 
     76        print("theta par", self.theta_par) 
    7577        print("pd_par", self.pd_par) 
    7678        print("pd_length", self.pd_length) 
    7779        print("pd_offset", self.pd_offset) 
    7880        print("pd_stride", self.pd_stride) 
    79         print("par_offsets", self.par_offset) 
    80         print("num_coord", self.num_coord) 
    81         print("par_coord", self.par_coord) 
    82         print("pd_coord", self.pd_coord) 
    83         print("theta par", self.buffer[-1]) 
    8481 
    8582def mono_details(model_info): 
    8683    call_details = CallDetails(model_info) 
    87     # The zero defaults for monodisperse systems are mostly fine 
    88     call_details.par_offset[:] = np.arange(2, len(call_details.par_offset)+2) 
     84    call_details.pd_prod = 1 
    8985    return call_details 
    9086 
    9187def poly_details(model_info, weights): 
    9288    #print("weights",weights) 
    93     weights = weights[2:] # Skip scale and background 
     89    #weights = weights[2:] # Skip scale and background 
    9490 
    9591    # Decreasing list of polydispersity lengths 
    96     # Note: the reversing view, x[::-1], does not require a copy 
    9792    pd_length = np.array([len(w) for w in weights]) 
    9893    num_active = np.sum(pd_length>1) 
     
    10196 
    10297    pd_offset = np.cumsum(np.hstack((0, pd_length))) 
     98    # Note: the reversing view, x[::-1], does not require a copy 
    10399    idx = np.argsort(pd_length)[::-1][:num_active] 
    104     par_length = np.array([max(len(w),1) for w in weights]) 
     100    par_length = np.array([len(w) for w in weights]) 
    105101    pd_stride = np.cumprod(np.hstack((1, par_length[idx]))) 
    106     par_offsets = np.cumsum(np.hstack((2, par_length))) 
    107102 
    108103    call_details = CallDetails(model_info) 
    109     call_details.pd_par[:num_active] = idx 
     104    call_details.pd_par[:num_active] = idx - 2  # skip background & scale 
    110105    call_details.pd_length[:num_active] = pd_length[idx] 
    111106    call_details.pd_offset[:num_active] = pd_offset[idx] 
    112107    call_details.pd_stride[:num_active] = pd_stride[:-1] 
    113     call_details.par_offset[:] = par_offsets[:-1] 
    114     call_details.total_pd = pd_stride[-1] 
     108    call_details.pd_prod = pd_stride[-1] 
     109    call_details.pd_sum = np.sum(par_length) 
    115110    call_details.num_active = num_active 
    116     # Without constraints coordinated parameters are just the pd parameters 
    117     call_details.par_coord[:num_active] = idx 
    118     call_details.pd_coord[:num_active] = 2**np.arange(num_active) 
    119     call_details.num_coord = num_active 
    120111    #call_details.show() 
    121112    return call_details 
    122  
    123 def constrained_poly_details(model_info, weights, constraints): 
    124     # Need to find the independently varying pars and sort them 
    125     # Need to build a coordination list for the dependent variables 
    126     # Need to generate a constraints function which takes values 
    127     # and weights, returning par blocks 
    128     raise NotImplementedError("Can't handle constraints yet") 
    129  
  • sasmodels/direct_model.py

    r56b2687 ra738209  
    6666 
    6767    vw_pairs = [(get_weights(p, pars) if active(p.name) 
    68                  else ([pars.get(p.name, p.default)], [])) 
     68                 else ([pars.get(p.name, p.default)], [1.0])) 
    6969                for p in parameters.call_parameters] 
    7070 
    71     call_details, weights, values = kernel.build_details(calculator, vw_pairs) 
    72     return calculator(call_details, weights, values, cutoff) 
     71    call_details, values = kernel.build_details(calculator, vw_pairs) 
     72    return calculator(call_details, values, cutoff) 
    7373 
    7474def get_weights(parameter, values): 
     
    8989    nsigma = values.get(parameter.name+'_pd_nsigma', 3.0) 
    9090    if npts == 0 or width == 0: 
    91         return [value], [] 
     91        return [value], [1.0] 
    9292    value, weight = weights.get_weights( 
    9393        disperser, npts, width, nsigma, value, limits, relative) 
  • sasmodels/generate.py

    r56b2687 ra738209  
    451451 
    452452 
    453 _IQXY_PATTERN = re.compile("^((inline|static) )? *(double )? *Iqxy *([(]|$)", 
     453# type in IQXY pattern could be single, float, double, long double, ... 
     454_IQXY_PATTERN = re.compile("^((inline|static) )? *([a-z ]+ )? *Iqxy *([(]|$)", 
    454455                           flags=re.MULTILINE) 
    455456def _have_Iqxy(sources): 
     
    469470    line instead. 
    470471    """ 
    471     for code, path in sources: 
     472    for path, code in sources: 
    472473        if _IQXY_PATTERN.search(code): 
    473474            return True 
  • sasmodels/kernel.py

    r0ff62d4 ra738209  
    99the kernel should be released, which also releases the inputs. 
    1010""" 
     11 
     12from __future__ import division, print_function 
    1113 
    1214import numpy as np 
     
    9092    """ 
    9193    values, weights = zip(*pairs) 
    92     if max([len(w) for w in weights]) > 1: 
     94    scalars = [v[0] for v in values] 
     95    if all(len(w)==1 for w in weights): 
     96        call_details = mono_details(kernel.info) 
     97        data = np.array(scalars, dtype=kernel.dtype) 
     98    else: 
    9399        call_details = poly_details(kernel.info, weights) 
    94     else: 
    95         call_details = mono_details(kernel.info) 
    96     weights, values = [np.hstack(v) for v in (weights, values)] 
    97     weights = weights.astype(dtype=kernel.dtype) 
    98     values = values.astype(dtype=kernel.dtype) 
    99     return call_details, weights, values 
    100  
     100        data = np.hstack(scalars+list(values)+list(weights)).astype(kernel.dtype) 
     101    return call_details, data 
  • sasmodels/kernel_iq.c

    rae2b6b5 ra738209  
    2222    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level 
    2323#endif // MAX_PD > 0 
    24     int32_t par_offset[NPARS];  // offset of par value blocks in the value & weight vector 
    25     int32_t par_coord[NPARS];   // ids of the coordination parameters 
    26     int32_t pd_coord[NPARS];    // polydispersity coordination bitvector 
     24    int32_t pd_prod;            // total number of voxels in hypercube 
     25    int32_t pd_sum;             // total length of the weights vector 
    2726    int32_t num_active;         // number of non-trivial pd loops 
    28     int32_t total_pd;           // total number of voxels in hypercube 
    29     int32_t num_coord;          // number of coordinated parameters 
    3027    int32_t theta_par;          // id of spherical correction variable 
    3128} ProblemDetails; 
     
    4340    const int32_t pd_stop,      // where we are stopping in the polydispersity loop 
    4441    global const ProblemDetails *details, 
    45     global const double *weights, 
    4642    global const double *values, 
    4743    global const double *q, // nq q values, with padding to boundary 
     
    5450  ParameterBlock local_values;  // current parameter values 
    5551  double *pvec = (double *)(&local_values);  // Alias named parameters with a vector 
    56   double norm; 
    57  
    58   // number of active loops 
    59   const int num_active = details->num_active; 
    6052 
    6153  // Fill in the initial variables 
     
    6456  #endif 
    6557  for (int k=0; k < NPARS; k++) { 
    66     pvec[k] = values[details->par_offset[k]]; 
     58    pvec[k] = values[k+2]; 
    6759  } 
    6860 
    6961  // Monodisperse computation 
    70   if (num_active == 0) { 
     62  if (details->num_active == 0) { 
     63    double norm, scale, background; 
    7164    #ifdef INVALID 
    7265    if (INVALID(local_values)) { return; } 
    7366    #endif 
     67 
    7468    norm = CALL_VOLUME(local_values); 
    75  
    76     double scale, background; 
    7769    scale = values[0]; 
    7870    background = values[1]; 
     
    9082#if MAX_PD > 0 
    9183 
     84  const double *pd_value = values+2+NPARS; 
     85  const double *pd_weight = pd_value+details->pd_sum; 
     86 
    9287  // need product of weights at every Iq calc, so keep product of 
    9388  // weights from the outer loops so that weight = partial_weight * fast_weight 
     89  double pd_norm; 
    9490  double partial_weight; // product of weight w4*w3*w2 but not w1 
    9591  double spherical_correction; // cosine correction for latitude variation 
    9692  double weight; // product of partial_weight*w1*spherical_correction 
    9793 
    98   // Location in the polydispersity hypercube, one index per dimension. 
    99   int pd_index[MAX_PD]; 
    100  
    101   // Location of the coordinated parameters in their own sub-cubes. 
    102   int offset[NPARS]; 
    103  
    104   // Number of coordinated indices 
    105   const int num_coord = details->num_coord; 
    106  
    10794  // Number of elements in the longest polydispersity loop 
    108   const int fast_length = details->pd_length[0]; 
     95  const int p0_par = details->pd_par[0]; 
     96  const int p0_length = details->pd_length[0]; 
     97  const int p0_offset = details->pd_offset[0]; 
     98  const int p0_is_theta = (p0_par == details->theta_par); 
     99  int p0_index; 
    109100 
    110101  // Trigger the reset behaviour that happens at the end the fast loop 
    111102  // by setting the initial index >= weight vector length. 
    112   pd_index[0] = fast_length; 
     103  p0_index = p0_length; 
    113104 
    114105  // Default the spherical correction to 1.0 in case it is not otherwise set 
     
    119110  // calls.  This means initializing them to 0 at the start and accumulating 
    120111  // them between calls. 
    121   norm = pd_start == 0 ? 0.0 : result[nq]; 
     112  pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 
     113 
    122114  if (pd_start == 0) { 
    123115    #ifdef USE_OPENMP 
     
    132124  for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { 
    133125    // check if fast loop needs to be reset 
    134     if (pd_index[0] == fast_length) { 
    135       //printf("should be here with %d active\n", num_active); 
     126    if (p0_index == p0_length) { 
    136127 
    137       // Compute position in polydispersity hypercube 
    138       for (int k=0; k < num_active; k++) { 
    139         pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k]; 
    140         //printf("pd_index[%d] = %d\n",k,pd_index[k]); 
    141       } 
    142  
    143       // Compute partial weights 
     128      // Compute position in polydispersity hypercube and partial weight 
    144129      partial_weight = 1.0; 
    145       //printf("partial weight %d: ", loop_index); 
    146       for (int k=1; k < num_active; k++) { 
    147         double wi = weights[details->pd_offset[k] + pd_index[k]]; 
    148         //printf("pd[%d]=par[%d]=%g ", k, details->pd_par[k], wi); 
    149         partial_weight *= wi; 
    150       } 
    151       //printf("\n"); 
    152  
    153       // Update parameter offsets in weight vector 
    154       //printf("slow %d: ", loop_index); 
    155       for (int k=0; k < num_coord; k++) { 
    156         int par = details->par_coord[k]; 
    157         int coord = details->pd_coord[k]; 
    158         int this_offset = details->par_offset[par]; 
    159         int block_size = 1; 
    160         for (int bit=0; coord != 0; bit++) { 
    161           if (coord&1) { 
    162               this_offset += block_size * pd_index[bit]; 
    163               block_size *= details->pd_length[bit]; 
    164           } 
    165           coord >>= 1; 
    166         } 
    167         offset[par] = this_offset; 
    168         pvec[par] = values[this_offset]; 
    169         //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]); 
    170         // if theta is not coordinated with fast index, precompute spherical correction 
    171         if (par == details->theta_par && !(details->par_coord[k]&1)) { 
    172           spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
     130      for (int k=1; k < details->num_active; k++) { 
     131        int pk = details->pd_par[k]; 
     132        int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k]; 
     133        pvec[pk] = pd_value[index]; 
     134        partial_weight *= pd_weight[index]; 
     135        if (pk == details->theta_par) { 
     136          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6); 
    173137        } 
    174138      } 
    175       //printf("\n"); 
     139      p0_index = loop_index%p0_length; 
    176140    } 
    177141 
    178     // Update fast parameters 
    179     //printf("fast %d: ", loop_index); 
    180     for (int k=0; k < num_coord; k++) { 
    181       if (details->pd_coord[k]&1) { 
    182         const int par = details->par_coord[k]; 
    183         pvec[par] = values[offset[par]++]; 
    184         //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]); 
    185         // if theta is coordinated with fast index, compute spherical correction each time 
    186         if (par == details->theta_par) { 
    187           spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
    188         } 
    189       } 
     142    // Update parameter p0 
     143    weight = partial_weight*pd_weight[p0_offset + p0_index]; 
     144    pvec[p0_par] = pd_value[p0_offset + p0_index]; 
     145    if (p0_is_theta) { 
     146      spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6); 
    190147    } 
    191     //printf("\n"); 
    192  
    193     // Increment fast index 
    194     const double wi = weights[details->pd_offset[0] + pd_index[0]]; 
    195     weight = partial_weight*wi; 
    196     pd_index[0]++; 
     148    p0_index++; 
    197149 
    198150    #ifdef INVALID 
     
    206158      // where it becomes zero.  If the entirety of the correction 
    207159      weight *= spherical_correction; 
    208       norm += weight * CALL_VOLUME(local_values); 
     160      pd_norm += weight * CALL_VOLUME(local_values); 
    209161 
    210162      #ifdef USE_OPENMP 
     
    218170  } 
    219171 
    220   if (pd_stop >= details->total_pd) { 
     172  if (pd_stop >= details->pd_prod) { 
    221173    // End of the PD loop we can normalize 
    222174    double scale, background; 
     
    227179    #endif 
    228180    for (int q_index=0; q_index < nq; q_index++) { 
    229       result[q_index] = (norm>0. ? scale*result[q_index]/norm + background : background); 
     181      result[q_index] = (pd_norm>0. ? scale*result[q_index]/pd_norm + background : background); 
    230182    } 
    231183  } 
    232184 
    233185  // Remember the updated norm. 
    234   result[nq] = norm; 
     186  result[nq] = pd_norm; 
    235187#endif // MAX_PD > 0 
    236188} 
  • sasmodels/kernel_iq.cl

    rae2b6b5 ra738209  
    2222    int32_t pd_stride[MAX_PD];  // stride to move to the next index at this level 
    2323#endif // MAX_PD > 0 
    24     int32_t par_offset[NPARS];  // offset of par value blocks in the value & weight vector 
    25     int32_t par_coord[NPARS];   // ids of the coordination parameters 
    26     int32_t pd_coord[NPARS];    // polydispersity coordination bitvector 
     24    int32_t pd_prod;            // total number of voxels in hypercube 
     25    int32_t pd_sum;             // total length of the weights vector 
    2726    int32_t num_active;         // number of non-trivial pd loops 
    28     int32_t total_pd;           // total number of voxels in hypercube 
    29     int32_t num_coord;          // number of coordinated parameters 
    3027    int32_t theta_par;          // id of spherical correction variable 
    3128} ProblemDetails; 
     
    4340    const int32_t pd_stop,      // where we are stopping in the polydispersity loop 
    4441    global const ProblemDetails *details, 
    45     global const double *weights, 
    4642    global const double *values, 
    4743    global const double *q, // nq q values, with padding to boundary 
    48     global double *result,  // nq+3 return values, again with padding 
     44    global double *result,  // nq+1 return values, again with padding 
    4945    const double cutoff     // cutoff in the polydispersity weight product 
    5046    ) 
    5147{ 
    5248  // Storage for the current parameter values.  These will be updated as we 
    53   // walk the polydispersity cube. 
    54   ParameterBlock local_values;  // current parameter values 
    55   double *pvec = (double *)(&local_values);  // Alias named parameters with a vector 
    56   double norm; 
     49  // walk the polydispersity cube.  local_values will be aliased to pvec. 
     50  local ParameterBlock local_values; 
    5751 
    5852  // who we are and what element we are working with 
    5953  const int q_index = get_global_id(0); 
    60  
    61   // number of active loops 
    62   const int num_active = details->num_active; 
     54  const int thread = get_local_id(0); 
    6355 
    6456  // Fill in the initial variables 
    65   for (int k=0; k < NPARS; k++) { 
    66     pvec[k] = values[details->par_offset[k]]; 
    67   } 
     57  event_t e = async_work_group_copy((local double *)&local_values, values+2, NPARS, 0); 
     58  wait_group_events(1, &e); 
    6859 
    6960  // Monodisperse computation 
    70   if (num_active == 0) { 
     61  if (details->num_active == 0) { 
     62    double norm, scale, background; 
     63    // TODO: only needs to be done by one process... 
    7164    #ifdef INVALID 
    7265    if (INVALID(local_values)) { return; } 
    7366    #endif 
     67 
    7468    norm = CALL_VOLUME(local_values); 
    75  
    76     double scale, background; 
    7769    scale = values[0]; 
    7870    background = values[1]; 
     
    9284  // norm will be shared across all threads. 
    9385 
     86  // "values" is global and can't be assigned to a local, so even though only 
     87  // the alias is only needed for thread 0 it is allocated in all threads. 
     88  global const double *pd_value = values+2+NPARS; 
     89  global const double *pd_weight = pd_value+details->pd_sum; 
     90 
    9491  // need product of weights at every Iq calc, so keep product of 
    9592  // weights from the outer loops so that weight = partial_weight * fast_weight 
    96   double partial_weight; // product of weight w4*w3*w2 but not w1 
    97   double spherical_correction; // cosine correction for latitude variation 
    98   double weight; // product of partial_weight*w1*spherical_correction 
    99  
    100   // Location in the polydispersity hypercube, one index per dimension. 
    101   int pd_index[MAX_PD]; 
    102  
    103   // Location of the coordinated parameters in their own sub-cubes. 
    104   int offset[NPARS]; 
    105  
    106   // Number of coordinated indices 
    107   const int num_coord = details->num_coord; 
     93  local double pd_norm; 
     94  local double partial_weight; // product of weight w4*w3*w2 but not w1 
     95  local double spherical_correction; // cosine correction for latitude variation 
     96  local double weight; // product of partial_weight*w1*spherical_correction 
     97  local double *pvec; 
     98  local int p0_par; 
     99  local int p0_length; 
     100  local int p0_offset; 
     101  local int p0_is_theta; 
     102  local int p0_index; 
    108103 
    109104  // Number of elements in the longest polydispersity loop 
    110   const int fast_length = details->pd_length[0]; 
    111  
    112   // Trigger the reset behaviour that happens at the end the fast loop 
    113   // by setting the initial index >= weight vector length. 
    114   pd_index[0] = fast_length; 
    115  
    116   // Default the spherical correction to 1.0 in case it is not otherwise set 
    117   spherical_correction = 1.0; 
    118  
    119   // Since we are no longer looping over the entire polydispersity hypercube 
    120   // for each q, we need to track the result and normalization values between 
    121   // calls.  This means initializing them to 0 at the start and accumulating 
    122   // them between calls. 
    123   norm = pd_start == 0 ? 0.0 : result[nq]; 
     105  barrier(CLK_LOCAL_MEM_FENCE); 
     106  if (thread == 0) { 
     107    pvec = (local double *)(&local_values); 
     108 
     109    // Number of elements in the longest polydispersity loop 
     110    p0_par = details->pd_par[0]; 
     111    p0_length = details->pd_length[0]; 
     112    p0_offset = details->pd_offset[0]; 
     113    p0_is_theta = (p0_par == details->theta_par); 
     114 
     115    // Trigger the reset behaviour that happens at the end the fast loop 
     116    // by setting the initial index >= weight vector length. 
     117    p0_index = p0_length; 
     118 
     119    // Default the spherical correction to 1.0 in case it is not otherwise set 
     120    spherical_correction = 1.0; 
     121 
     122    // Since we are no longer looping over the entire polydispersity hypercube 
     123    // for each q, we need to track the result and normalization values between 
     124    // calls.  This means initializing them to 0 at the start and accumulating 
     125    // them between calls. 
     126    pd_norm = pd_start == 0 ? 0.0 : result[nq]; 
     127  } 
     128  barrier(CLK_LOCAL_MEM_FENCE); 
     129 
    124130  if (q_index < nq) { 
    125131    this_result = pd_start == 0 ? 0.0 : result[q_index]; 
     
    128134  // Loop over the weights then loop over q, accumulating values 
    129135  for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { 
    130     // check if fast loop needs to be reset 
    131     if (pd_index[0] == fast_length) { 
    132       //printf("should be here with %d active\n", num_active); 
    133  
    134       // Compute position in polydispersity hypercube 
    135       for (int k=0; k < num_active; k++) { 
    136         pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k]; 
    137         //printf("pd_index[%d] = %d\n",k,pd_index[k]); 
     136    barrier(CLK_LOCAL_MEM_FENCE); 
     137    if (thread == 0) { 
     138      // check if fast loop needs to be reset 
     139      if (p0_index == p0_length) { 
     140        //printf("should be here with %d active\n", num_active); 
     141 
     142        // Compute position in polydispersity hypercube and partial weight 
     143        partial_weight = 1.0; 
     144        for (int k=1; k < details->num_active; k++) { 
     145          int pk = details->pd_par[k]; 
     146          int index = details->pd_offset[k] + (loop_index/details->pd_stride[k])%details->pd_length[k]; 
     147          pvec[pk] = pd_value[index]; 
     148          partial_weight *= pd_weight[index]; 
     149          //printf("index[%d] = %d\n",k,index); 
     150          if (pk == details->theta_par) { 
     151            spherical_correction = fmax(fabs(cos(M_PI_180*pvec[pk])), 1.e-6); 
     152          } 
     153        } 
     154        p0_index = loop_index%p0_length; 
     155        //printf("\n"); 
    138156      } 
    139157 
    140       // need to compute the product of the weights.  If the vector were really 
    141       // long, we could split the work into groups, with each thread taking 
    142       // every nth weight, but there really is no call for it here.  We could 
    143       // also do some clever pair-wise multiplication similar to parallel 
    144       // prefix, but again simpler is probably faster since n is likely small. 
    145       // Compute partial weights 
    146       partial_weight = 1.0; 
    147       //printf("partial weight %d: ", loop_index); 
    148       for (int k=1; k < num_active; k++) { 
    149         double wi = weights[details->pd_offset[k] + pd_index[k]]; 
    150         //printf("pd[%d]=par[%d]=%g ", k, details->pd_par[k], wi); 
    151         partial_weight *= wi; 
     158      // Update parameter p0 
     159      weight = partial_weight*pd_weight[p0_offset + p0_index]; 
     160      pvec[p0_par] = pd_value[p0_offset + p0_index]; 
     161      if (p0_is_theta) { 
     162        spherical_correction = fmax(fabs(cos(M_PI_180*pvec[p0_par])), 1.e-6); 
    152163      } 
    153       //printf("\n"); 
    154  
    155       // Update parameter offsets in weight vector 
    156       //printf("slow %d: ", loop_index); 
    157       for (int k=0; k < num_coord; k++) { 
    158         int par = details->par_coord[k]; 
    159         int coord = details->pd_coord[k]; 
    160         int this_offset = details->par_offset[par]; 
    161         int block_size = 1; 
    162         for (int bit=0; coord != 0; bit++) { 
    163           if (coord&1) { 
    164               this_offset += block_size * pd_index[bit]; 
    165               block_size *= details->pd_length[bit]; 
    166           } 
    167           coord >>= 1; 
    168         } 
    169         offset[par] = this_offset; 
    170         pvec[par] = values[this_offset]; 
    171         //printf("par[%d]=v[%d]=%g \n", k, offset[k], pvec[k]); 
    172         // if theta is not coordinated with fast index, precompute spherical correction 
    173         if (par == details->theta_par && !(details->par_coord[k]&1)) { 
    174           spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
    175         } 
    176       } 
    177       //printf("\n"); 
    178     } 
    179  
    180     // Update fast parameters 
    181     //printf("fast %d: ", loop_index); 
    182     for (int k=0; k < num_coord; k++) { 
    183       if (details->pd_coord[k]&1) { 
    184         const int par = details->par_coord[k]; 
    185         pvec[par] = values[offset[par]++]; 
    186         //printf("p[%d]=v[%d]=%g ", par, offset[par]-1, pvec[par]); 
    187         // if theta is coordinated with fast index, compute spherical correction each time 
    188         if (par == details->theta_par) { 
    189           spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
    190         } 
    191       } 
    192     } 
     164      p0_index++; 
     165    } 
     166    barrier(CLK_LOCAL_MEM_FENCE); 
    193167    //printf("\n"); 
    194168 
    195169    // Increment fast index 
    196     const double wi = weights[details->pd_offset[0] + pd_index[0]]; 
    197     weight = partial_weight*wi; 
    198     pd_index[0]++; 
    199170 
    200171    #ifdef INVALID 
     
    208179      // where it becomes zero.  If the entirety of the correction 
    209180      weight *= spherical_correction; 
    210       norm += weight * CALL_VOLUME(local_values); 
     181      pd_norm += weight * CALL_VOLUME(local_values); 
    211182 
    212183      const double scattering = CALL_IQ(q, q_index, local_values); 
     
    216187 
    217188  if (q_index < nq) { 
    218     if (pd_stop >= details->total_pd) { 
     189    if (pd_stop >= details->pd_prod) { 
    219190      // End of the PD loop we can normalize 
    220191      double scale, background; 
    221192      scale = values[0]; 
    222193      background = values[1]; 
    223       result[q_index] = (norm>0. ? scale*this_result/norm + background : background); 
     194      result[q_index] = (pd_norm>0. ? scale*this_result/pd_norm + background : background); 
    224195    } else { 
    225196      // Partial result, so remember it but don't normalize it. 
     
    228199 
    229200    // Remember the updated norm. 
    230     if (q_index == 0) result[nq] = norm; 
     201    if (q_index == 0) result[nq] = pd_norm; 
    231202  } 
    232203 
  • sasmodels/kernelcl.py

    r56b2687 ra738209  
    521521                     else np.float32)  # will never get here, so use np.float32 
    522522 
    523     def __call__(self, call_details, weights, values, cutoff): 
     523    def __call__(self, call_details, values, cutoff): 
    524524        # type: (CallDetails, np.ndarray, np.ndarray, float) -> np.ndarray 
    525525        context = self.queue.context 
     
    527527        details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    528528                              hostbuf=call_details.buffer) 
    529         weights_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    530                               hostbuf=weights) if len(weights) else None 
    531529        values_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    532530                             hostbuf=values) 
     
    534532        # Call kernel and retrieve results 
    535533        step = 100 
    536         for start in range(0, call_details.total_pd, step): 
    537             stop = min(start+step, call_details.total_pd) 
     534        for start in range(0, call_details.pd_prod, step): 
     535            stop = min(start+step, call_details.pd_prod) 
    538536            args = [ 
    539537                np.uint32(self.q_input.nq), np.int32(start), np.int32(stop), 
    540                 details_b, weights_b, values_b, self.q_input.q_b, self.result_b, 
     538                details_b, values_b, self.q_input.q_b, self.result_b, 
    541539                self.real(cutoff), 
    542540            ] 
     
    545543 
    546544        # Free buffers 
    547         for v in (details_b, weights_b, values_b): 
     545        for v in (details_b, values_b): 
    548546            if v is not None: v.release() 
    549547 
  • sasmodels/kerneldll.py

    r56b2687 ra738209  
    165165    exist yet if it hasn't been compiled. 
    166166    """ 
    167     return os.path.join(DLL_PATH, dll_name(model_info, dtype)+".so") 
     167    return os.path.join(DLL_PATH, dll_name(model_info, dtype)) 
    168168 
    169169 
     
    206206        need_recompile = dll_time < newest_source 
    207207    if need_recompile: 
    208         basename = dll_name(model_info, dtype) + "_" 
    209         fid, filename = tempfile.mkstemp(suffix=".c", prefix=basename) 
     208        basename = os.path.splitext(os.path.basename(dll))[0] + "_" 
     209        fd, filename = tempfile.mkstemp(suffix=".c", prefix=basename) 
    210210        source = generate.convert_type(source, dtype) 
    211         fd, filename = tempfile.mkstemp(suffix=".c", prefix=tempfile_prefix) 
    212211        with os.fdopen(fd, "w") as file: 
    213212            file.write(source) 
     
    269268              else c_longdouble) 
    270269 
    271         # int, int, int, int*, double*, double*, double*, double*, double*, double 
    272         argtypes = [c_int32]*3 + [c_void_p]*5 + [fp] 
     270        # int, int, int, int*, double*, double*, double*, double*, double 
     271        argtypes = [c_int32]*3 + [c_void_p]*4 + [fp] 
    273272        self._Iq = self._dll[generate.kernel_name(self.info, is_2d=False)] 
    274273        self._Iqxy = self._dll[generate.kernel_name(self.info, is_2d=True)] 
     
    342341                     else np.float128) 
    343342 
    344     def __call__(self, call_details, weights, values, cutoff): 
     343    def __call__(self, call_details, values, cutoff): 
    345344        # type: (CallDetails, np.ndarray, np.ndarray, float) -> np.ndarray 
    346345 
    347346        #print("in kerneldll") 
    348         #print("weights", weights) 
    349347        #print("values", values) 
    350         start, stop = 0, call_details.total_pd 
     348        start, stop = 0, call_details.pd_prod 
    351349        args = [ 
    352350            self.q_input.nq, # nq 
     
    354352            stop, # pd_stop pd_stride[MAX_PD] 
    355353            call_details.buffer.ctypes.data, # problem 
    356             weights.ctypes.data,  # weights 
    357354            values.ctypes.data,  #pars 
    358355            self.q_input.q.ctypes.data, #q 
  • sasmodels/kernelpy.py

    r7ae2b7f ra738209  
    77:class:`kernelcl.GpuModel` and :class:`kerneldll.DllModel`. 
    88""" 
     9from __future__ import division, print_function 
     10 
    911import numpy as np  # type: ignore 
    1012from numpy import pi, cos  #type: ignore 
     
    153155                        else (lambda: 1.0)) 
    154156 
    155     def __call__(self, call_details, weights, values, cutoff): 
     157    def __call__(self, call_details, values, cutoff): 
    156158        assert isinstance(call_details, details.CallDetails) 
    157159        res = _loops(self._parameter_vector, self._form, self._volume, 
    158                      self.q_input.nq, call_details, weights, values, cutoff) 
     160                     self.q_input.nq, call_details, values, cutoff) 
    159161        return res 
    160162 
     
    166168        self.q_input = None 
    167169 
    168 def _loops(parameters, form, form_volume, nq, call_details, 
    169            weights, values, cutoff): 
     170def _loops(parameters, form, form_volume, nq, details, 
     171           values, cutoff): 
    170172    # type: (np.ndarray, Callable[[], np.ndarray], Callable[[], float], int, details.CallDetails, np.ndarray, np.ndarray, float) -> None 
    171173    ################################################################ 
     
    178180    #                                                              # 
    179181    ################################################################ 
    180     parameters[:] = values[call_details.par_offset] 
     182    NPARS = len(parameters) 
     183    parameters[:] = values[2:NPARS+2] 
    181184    scale, background = values[0], values[1] 
    182     if call_details.num_active == 0: 
     185    if details.num_active == 0: 
    183186        norm = float(form_volume()) 
    184187        if norm > 0.0: 
     
    187190            return np.ones(nq, 'd')*background 
    188191 
     192    pd_value = values[2+NPARS:2+NPARS+details.pd_sum] 
     193    pd_weight = values[2+NPARS+details.pd_sum:] 
     194 
     195    pd_norm = 0.0 
     196    spherical_correction = 1.0 
    189197    partial_weight = np.NaN 
    190     spherical_correction = 1.0 
    191     pd_stride = call_details.pd_stride[:call_details.num_active] 
    192     pd_length = call_details.pd_length[:call_details.num_active] 
    193     pd_offset = call_details.pd_offset[:call_details.num_active] 
    194     pd_index = np.empty_like(pd_offset) 
    195     offset = np.empty_like(call_details.par_offset) 
    196     theta = call_details.theta_par 
    197     fast_length = pd_length[0] 
    198     pd_index[0] = fast_length 
     198    weight =np.NaN 
     199 
     200    p0_par = details.pd_par[0] 
     201    p0_is_theta = (p0_par == details.theta_par) 
     202    p0_length = details.pd_length[0] 
     203    p0_index = p0_length 
     204    p0_offset = details.pd_offset[0] 
     205 
     206    pd_par = details.pd_par[:details.num_active] 
     207    pd_offset = details.pd_offset[:details.num_active] 
     208    pd_stride = details.pd_stride[:details.num_active] 
     209    pd_length = details.pd_length[:details.num_active] 
     210 
    199211    total = np.zeros(nq, 'd') 
    200     norm = 0.0 
    201     for loop_index in range(call_details.total_pd): 
     212    for loop_index in range(details.pd_prod): 
    202213        # update polydispersity parameter values 
    203         if pd_index[0] == fast_length: 
    204             pd_index[:] = (loop_index/pd_stride)%pd_length 
    205             partial_weight = np.prod(weights[pd_offset+pd_index][1:]) 
    206             for k in range(call_details.num_coord): 
    207                 par = call_details.par_coord[k] 
    208                 coord = call_details.pd_coord[k] 
    209                 this_offset = call_details.par_offset[par] 
    210                 block_size = 1 
    211                 for bit in range(len(pd_offset)): 
    212                     if coord&1: 
    213                         this_offset += block_size * pd_index[bit] 
    214                         block_size *= pd_length[bit] 
    215                     coord >>= 1 
    216                     if coord == 0: break 
    217                 offset[par] = this_offset 
    218                 parameters[par] = values[this_offset] 
    219                 if par == theta and not (call_details.par_coord[k]&1): 
    220                     spherical_correction = max(abs(cos(pi/180 * parameters[theta])), 1e-6) 
    221         for k in range(call_details.num_coord): 
    222             if call_details.pd_coord[k]&1: 
    223                 #par = call_details.par_coord[k] 
    224                 parameters[par] = values[offset[par]] 
    225                 #print "par",par,offset[par],parameters[par+2] 
    226                 offset[par] += 1 
    227                 if par == theta: 
    228                     spherical_correction = max(abs(cos(pi/180 * parameters[theta])), 1e-6) 
    229  
    230         weight = partial_weight * weights[pd_offset[0] + pd_index[0]] 
    231         pd_index[0] += 1 
     214        if p0_index == p0_length: 
     215            pd_index = (loop_index//pd_stride)%pd_length 
     216            parameters[pd_par] = pd_value[pd_offset+pd_index] 
     217            partial_weight = np.prod(pd_weight[pd_offset+pd_index][1:]) 
     218            if details.theta_par >= 0: 
     219                spherical_correction = max(abs(cos(pi/180 * parameters[details.theta_par])), 1e-6) 
     220            p0_index = loop_index%p0_length 
     221 
     222        weight = partial_weight * pd_weight[p0_offset + p0_index] 
     223        parameters[p0_par] = pd_value[p0_offset + p0_index] 
     224        if p0_is_theta: 
     225            spherical_correction = max(abs(cos(pi/180 * parameters[p0_par])), 1e-6) 
     226        p0_index += 1 
    232227        if weight > cutoff: 
    233228            # Call the scattering function 
     
    241236            weight *= spherical_correction 
    242237            total += weight * I 
    243             norm += weight * form_volume() 
    244  
    245     if norm > 0.0: 
    246         return (scale/norm)*total + background 
     238            pd_norm += weight * form_volume() 
     239 
     240    if pd_norm > 0.0: 
     241        return (scale/pd_norm)*total + background 
    247242    else: 
    248243        return np.ones(nq, 'd')*background 
  • sasmodels/sasview_model.py

    r56b2687 ra738209  
    513513        else: 
    514514            q_vectors = [np.asarray(qx)] 
    515         kernel = self._model.make_kernel(q_vectors) 
     515        calculator = self._model.make_kernel(q_vectors) 
    516516        pairs = [self._get_weights(p) 
    517517                 for p in self._model_info.parameters.call_parameters] 
    518         call_details, weight, value = kernel.build_details(kernel, pairs) 
    519         result = kernel(call_details, weight, value, cutoff=self.cutoff) 
    520         kernel.release() 
     518        call_details, value = kernel.build_details(calculator, pairs) 
     519        result = calculator(call_details, value, cutoff=self.cutoff) 
     520        calculator.release() 
    521521        return result 
    522522 
Note: See TracChangeset for help on using the changeset viewer.