Changeset f2f67a6 in sasmodels


Ignore:
Timestamp:
Apr 15, 2016 5:26:24 PM (8 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
ae2b6b5, 38a9b07, eb97b11
Parents:
0ff62d4
Message:

reenable opencl; works on cpu but not gpu

Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • .gitignore

    r4a82d4d rf2f67a6  
    44/*.csv 
    55*.pyc 
    6 *.cl 
    76*.so 
    87*.obj 
  • sasmodels/compare.py

    r8d62008 rf2f67a6  
    497497        value = calculator(**pars) 
    498498    average_time = toc()*1000./Nevals 
     499    #print("I(q)",value) 
    499500    return value, average_time 
    500501 
     
    604605    if Nbase > 0 and Ncomp > 0: 
    605606        resid = (base_value - comp_value) 
    606         relerr = resid/comp_value 
     607        relerr = resid/np.where(comp_value!=0., abs(comp_value), 1.0) 
    607608        _print_stats("|%s-%s|" 
    608609                     % (base.engine, comp.engine) + (" "*(3+len(comp.engine))), 
  • sasmodels/core.py

    rdd7fc12 rf2f67a6  
    142142            or not HAVE_OPENCL 
    143143            or not kernelcl.environment().has_type(numpy_dtype)): 
     144        #print("building dll", numpy_dtype) 
    144145        return kerneldll.load_dll(source, model_info, numpy_dtype) 
    145146    else: 
     147        #print("building ocl", numpy_dtype) 
    146148        return kernelcl.GpuModel(source, model_info, numpy_dtype, fast=fast) 
    147149 
  • sasmodels/generate.py

    ra5b8477 rf2f67a6  
    173173 
    174174try: 
    175     from typing import Tuple, Sequence, Iterator 
     175    from typing import Tuple, Sequence, Iterator, Dict 
    176176    from .modelinfo import ModelInfo 
    177177except ImportError: 
     
    298298    Return a timestamp for the model corresponding to the most recently 
    299299    changed file or dependency. 
     300 
     301    Note that this does not look at the time stamps for the OpenCL header 
     302    information since that need not trigger a recompile of the DLL. 
    300303    """ 
    301304    source_files = (model_sources(model_info) 
     
    304307    newest = max(getmtime(f) for f in source_files) 
    305308    return newest 
     309 
     310def model_templates(): 
     311    # type: () -> List[str] 
     312    # TODO: fails DRY; templates appear two places. 
     313    # should instead have model_info contain a list of paths 
     314    # Note: kernel_iq.cl is not on this list because changing it need not 
     315    # trigger a recompile of the dll. 
     316    return [joinpath(TEMPLATE_ROOT, filename) 
     317            for filename in ('kernel_header.c', 'kernel_iq.c')] 
    306318 
    307319def convert_type(source, dtype): 
     
    377389    return _template_cache[filename][1] 
    378390 
    379 def model_templates(): 
    380     # type: () -> List[str] 
    381     # TODO: fails DRY; templates are listed in two places. 
    382     # should instead have model_info contain a list of paths 
    383     return [joinpath(TEMPLATE_ROOT, filename) 
    384             for filename in ('kernel_header.c', 'kernel_iq.c')] 
    385  
    386391 
    387392_FN_TEMPLATE = """\ 
     
    390395    %(body)s 
    391396} 
    392  
    393397 
    394398""" 
     
    467471    # Load templates and user code 
    468472    kernel_header = load_template('kernel_header.c') 
    469     kernel_code = load_template('kernel_iq.c') 
     473    dll_code = load_template('kernel_iq.c') 
     474    ocl_code = load_template('kernel_iq.cl') 
    470475    user_code = [open(f).read() for f in model_sources(model_info)] 
    471476 
     
    506511    if _have_Iqxy(user_code): 
    507512        # Call 2D model 
    508         refs = ["q[2*i]", "q[2*i+1]"] + _call_pars("_v.", partable.iqxy_parameters) 
     513        refs = ["q[2*_i]", "q[2*_i+1]"] + _call_pars("_v.", partable.iqxy_parameters) 
    509514        call_iqxy = "#define CALL_IQ(_q,_i,_v) Iqxy(%s)" % (",".join(refs)) 
    510515    else: 
     
    521526    # TODO: allow mixed python/opencl kernels? 
    522527 
    523     # define the Iq kernel 
    524     source.append("#define KERNEL_NAME %s_Iq"%model_info.name) 
    525     source.append(call_iq) 
    526     source.append(kernel_code) 
    527     source.append("#undef CALL_IQ") 
    528     source.append("#undef KERNEL_NAME") 
    529  
    530     # define the Iqxy kernel from the same source with different #defines 
    531     source.append("#define KERNEL_NAME %s_Iqxy"%model_info.name) 
    532     source.append(call_iqxy) 
    533     source.append(kernel_code) 
    534     source.append("#undef CALL_IQ") 
    535     source.append("#undef KERNEL_NAME") 
    536  
     528    source.append("#if defined(USE_OPENCL)") 
     529    source.extend(_add_kernels(ocl_code, call_iq, call_iqxy, model_info.name)) 
     530    source.append("#else /* !USE_OPENCL */") 
     531    source.extend(_add_kernels(dll_code, call_iq, call_iqxy, model_info.name)) 
     532    source.append("#endif /* !USE_OPENCL */") 
    537533    return '\n'.join(source) 
     534 
     535def _add_kernels(kernel_code, call_iq, call_iqxy, name): 
     536    # type: (str, str, str, str) -> List[str] 
     537    source = [ 
     538        # define the Iq kernel 
     539        "#define KERNEL_NAME %s_Iq"%name, 
     540        call_iq, 
     541        kernel_code, 
     542        "#undef CALL_IQ", 
     543        "#undef KERNEL_NAME", 
     544 
     545        # define the Iqxy kernel from the same source with different #defines 
     546        "#define KERNEL_NAME %s_Iqxy"%name, 
     547        call_iqxy, 
     548        kernel_code, 
     549        "#undef CALL_IQ", 
     550        "#undef KERNEL_NAME", 
     551    ] 
     552    return source 
    538553 
    539554def load_kernel_module(model_name): 
    540555    # type: (str) -> module 
     556    """ 
     557    Return the kernel module named in *model_name*. 
     558 
     559    If the name ends in *.py* then load it as a custom model using 
     560    :func:`sasmodels.custom.load_custom_kernel_module`, otherwise 
     561    load it from :mod:`sasmodels.models`. 
     562    """ 
    541563    if model_name.endswith('.py'): 
    542564        kernel_module = load_custom_kernel_module(model_name) 
  • sasmodels/kernel_iq.c

    r6e7ff6d rf2f67a6  
    5454  local ParameterBlock local_values;  // current parameter values 
    5555  double *pvec = (double *)(&local_values);  // Alias named parameters with a vector 
     56  double norm; 
     57 
     58  // number of active loops 
     59  const int num_active = details->num_active; 
    5660 
    5761  // Fill in the initial variables 
     
    6367  } 
    6468 
     69  // Monodisperse computation 
     70  if (num_active == 0) { 
     71    #ifdef INVALID 
     72    if (INVALID(local_values)) { return; } 
     73    #endif 
     74    norm = CALL_VOLUME(local_values); 
     75 
     76    const double scale = values[0]; 
     77    const double background = values[1]; 
     78    // result[nq] = norm; // Total volume normalization 
     79 
     80    #ifdef USE_OPENMP 
     81    #pragma omp parallel for 
     82    #endif 
     83    for (int i=0; i < nq; i++) { 
     84      double scattering = CALL_IQ(q, i, local_values); 
     85      result[i] = (norm>0. ? scale*scattering/norm + background : background); 
     86    } 
     87    return; 
     88  } 
     89 
     90#if MAX_PD > 0 
    6591  // If it is the first round initialize the result to zero, otherwise 
    6692  // assume that the previous result has been passed back. 
     
    75101      result[i] = 0.0; 
    76102    } 
    77   } 
    78  
    79   // Monodisperse computation 
    80   if (details->num_active == 0) { 
    81     #ifdef INVALID 
    82     if (INVALID(local_values)) { return; } 
    83     #endif 
    84  
    85     const double norm = CALL_VOLUME(local_values); 
    86     const double scale = values[0]; 
    87     const double background = values[1]; 
    88     #ifdef USE_OPENMP 
    89     #pragma omp parallel for 
    90     #endif 
    91     result[nq] = norm; // Total volume normalization 
    92     for (int i=0; i < nq; i++) { 
    93       double scattering = CALL_IQ(q, i, local_values); 
    94       result[i] = (norm>0. ? scale*scattering/norm + background : background); 
    95     } 
    96     return; 
    97   } 
    98  
    99 #if MAX_PD > 0 
    100   //printf("Entering polydispersity from %d to %d\n", pd_start, pd_stop); 
    101   // Since we are no longer looping over the entire polydispersity hypercube 
    102   // for each q, we need to track the normalization values between calls. 
    103   double norm = 0.0; 
     103    norm = 0.0; 
     104  } else { 
     105    norm = result[nq]; 
     106  } 
    104107 
    105108  // need product of weights at every Iq calc, so keep product of 
     
    119122  pd_index[0] = fast_length; 
    120123 
     124  // Number of coordinated indices 
     125  const int num_coord = details->num_coord; 
     126 
    121127  // Loop over the weights then loop over q, accumulating values 
    122128  for (int loop_index=pd_start; loop_index < pd_stop; loop_index++) { 
    123     // check if indices need to be updated 
     129    // check if fast loop needs to be reset 
    124130    if (pd_index[0] == fast_length) { 
    125       //printf("should be here with %d active\n", details->num_active); 
     131      //printf("should be here with %d active\n", num_active); 
    126132 
    127133      // Compute position in polydispersity hypercube 
    128       for (int k=0; k < details->num_active; k++) { 
     134      for (int k=0; k < num_active; k++) { 
    129135        pd_index[k] = (loop_index/details->pd_stride[k])%details->pd_length[k]; 
    130136        //printf("pd_index[%d] = %d\n",k,pd_index[k]); 
     
    134140      partial_weight = 1.0; 
    135141      //printf("partial weight %d: ", loop_index); 
    136       for (int k=1; k < details->num_active; k++) { 
     142      for (int k=1; k < num_active; k++) { 
    137143        double wi = weights[details->pd_offset[k] + pd_index[k]]; 
    138144        //printf("pd[%d]=par[%d]=%g ", k, details->pd_par[k], wi); 
     
    143149      // Update parameter offsets in weight vector 
    144150      //printf("slow %d: ", loop_index); 
    145       for (int k=0; k < details->num_coord; k++) { 
     151      for (int k=0; k < num_coord; k++) { 
    146152        int par = details->par_coord[k]; 
    147153        int coord = details->pd_coord[k]; 
     
    160166        // if theta is not coordinated with fast index, precompute spherical correction 
    161167        if (par == details->theta_par && !(details->par_coord[k]&1)) { 
    162           spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1e-6); 
     168          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
    163169        } 
    164170      } 
     
    170176    double weight = partial_weight*wi; 
    171177    //printf("fast %d: ", loop_index); 
    172     for (int k=0; k < details->num_coord; k++) { 
     178    for (int k=0; k < num_coord; k++) { 
    173179      if (details->pd_coord[k]&1) { 
    174180        const int par = details->par_coord[k]; 
     
    177183        // if theta is coordinated with fast index, compute spherical correction each time 
    178184        if (par == details->theta_par) { 
    179           spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1e-6); 
     185          spherical_correction = fmax(fabs(cos(M_PI_180*pvec[details->theta_par])), 1.e-6); 
    180186        } 
    181187      } 
     
    205211  } 
    206212 
    207   // Accumulate norm. 
    208   result[nq] += norm; 
    209  
    210213  // End of the PD loop we can normalize 
    211214  if (pd_stop >= details->total_pd) { 
     
    219222    } 
    220223  } 
     224 
     225  // Remember the updated norm. 
     226  result[nq] = norm; 
    221227#endif // MAX_PD > 0 
    222228} 
  • sasmodels/kernelcl.py

    r8d62008 rf2f67a6  
    5656 
    5757try: 
    58     raise NotImplementedError("OpenCL not yet implemented for new kernel template") 
     58    #raise NotImplementedError("OpenCL not yet implemented for new kernel template") 
    5959    import pyopencl as cl  # type: ignore 
    6060    # Ask OpenCL for the default context so that we know that one exists 
     
    264264        key = "%s-%s-%s"%(name, dtype, fast) 
    265265        if key not in self.compiled: 
    266             print("compiling",name) 
     266            #print("OpenCL compile",name) 
    267267            dtype = np.dtype(dtype) 
    268268            program = compile_model(self.get_context(dtype), 
     
    373373        kernel_name = generate.kernel_name(self.info, is_2d) 
    374374        kernel = getattr(self.program, kernel_name) 
    375         return GpuKernel(kernel, self.info, q_vectors) 
     375        return GpuKernel(kernel, self.dtype, self.info, q_vectors) 
    376376 
    377377    def release(self): 
     
    443443        Free the memory. 
    444444        """ 
    445         if self.q is not None: 
    446             self.q.release() 
    447             self.q = None 
     445        if self.q_b is not None: 
     446            self.q_b.release() 
     447            self.q_b = None 
    448448 
    449449    def __del__(self): 
     
    471471    Call :meth:`release` when done with the kernel instance. 
    472472    """ 
    473     def __init__(self, kernel, model_info, q_vectors): 
    474         # type: (cl.Kernel, ModelInfo, List[np.ndarray]) -> None 
     473    def __init__(self, kernel, dtype, model_info, q_vectors): 
     474        # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 
    475475        max_pd = model_info.parameters.max_pd 
    476476        npars = len(model_info.parameters.kernel_parameters)-2 
    477         q_input = GpuInput(q_vectors, kernel.dtype) 
     477        q_input = GpuInput(q_vectors, dtype) 
    478478        self.kernel = kernel 
    479479        self.info = model_info 
    480         self.dtype = kernel.dtype 
     480        self.dtype = dtype 
    481481        self.dim = '2d' if q_input.is_2d else '1d' 
    482482        # plus three for the normalization values 
    483         self.result = np.empty(q_input.nq+3, q_input.dtype) 
     483        self.result = np.empty(q_input.nq+3, dtype) 
    484484 
    485485        # Inputs and outputs for each kernel call 
    486486        # Note: res may be shorter than res_b if global_size != nq 
    487487        env = environment() 
    488         self.queue = env.get_queue(kernel.dtype) 
     488        self.queue = env.get_queue(dtype) 
    489489 
    490490        # details is int32 data, padded to an 8 integer boundary 
    491491        size = ((max_pd*5 + npars*3 + 2 + 7)//8)*8 
    492492        self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 
    493                                q_input.global_size[0] * kernel.dtype.itemsize) 
     493                               q_input.global_size[0] * dtype.itemsize) 
    494494        self.q_input = q_input # allocated by GpuInput above 
    495495 
    496496        self._need_release = [ self.result_b, self.q_input ] 
    497         self.real = (np.float32 if self.q_input.dtype == generate.F32 
    498                      else np.float64 if self.q_input.dtype == generate.F64 
    499                      else np.float16 if self.q_input.dtype == generate.F16 
     497        self.real = (np.float32 if dtype == generate.F32 
     498                     else np.float64 if dtype == generate.F64 
     499                     else np.float16 if dtype == generate.F16 
    500500                     else np.float32)  # will never get here, so use np.float32 
    501501 
    502502    def __call__(self, call_details, weights, values, cutoff): 
    503503        # type: (CallDetails, np.ndarray, np.ndarray, float) -> np.ndarray 
    504  
    505504        context = self.queue.context 
    506505        # Arrange data transfer to card 
     
    508507                              hostbuf=call_details.buffer) 
    509508        weights_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    510                               hostbuf=weights) 
     509                              hostbuf=weights) if len(weights) else None 
    511510        values_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    512511                             hostbuf=values) 
     
    521520        cl.enqueue_copy(self.queue, self.result, self.result_b) 
    522521        for v in (details_b, weights_b, values_b): 
    523             v.release() 
     522            if v is not None: v.release() 
    524523 
    525524        return self.result[:self.q_input.nq] 
Note: See TracChangeset for help on using the changeset viewer.