Changeset 7126c04 in sasmodels for sasmodels/kernelcl.py


Ignore:
Timestamp:
Nov 9, 2018 2:33:23 PM (5 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
0be86aa
Parents:
63d4dd1
Message:

use similar code for cuda and opencl

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernelcl.py

    rf872fd1 r7126c04  
    265265        # Cache for compiled programs, and for items in context 
    266266        self.compiled = {} 
    267         self.cache = {} 
    268267 
    269268    def has_type(self, dtype): 
     
    297296        return program 
    298297 
    299     def free_buffer(self, key): 
    300         if key in self.cache: 
    301             self.cache[key].release() 
    302             del self.cache[key] 
    303  
    304     def __del__(self): 
    305         for v in self.cache.values(): 
    306             release = getattr(v, 'release', lambda: None) 
    307             release() 
    308         self.cache = {} 
    309  
    310298_CURRENT_ID = 0 
    311 def unique_id(): 
    312     global _CURRENT_ID 
    313     _CURRENT_ID += 1 
    314     return _CURRENT_ID 
    315  
    316299def _create_some_context(): 
    317300    # type: () -> cl.Context 
     
    413396    that the compiler is allowed to take shortcuts. 
    414397    """ 
     398    info = None # type: ModelInfo 
     399    source = "" # type: str 
     400    dtype = None # type: np.dtype 
     401    fast = False # type: bool 
     402    _program = None # type: cl.Program 
     403    _kernels = None # type: Dict[str, cl.Kernel] 
     404 
    415405    def __init__(self, source, model_info, dtype=generate.F32, fast=False): 
    416406        # type: (Dict[str,str], ModelInfo, np.dtype, bool) -> None 
     
    419409        self.dtype = dtype 
    420410        self.fast = fast 
    421         self.timestamp = generate.ocl_timestamp(self.info) 
    422         self._cache_key = unique_id() 
    423411 
    424412    def __getstate__(self): 
     
    429417        # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 
    430418        self.info, self.source, self.dtype, self.fast = state 
     419        self._program = self._kernels = None 
    431420 
    432421    def make_kernel(self, q_vectors): 
     
    434423        return GpuKernel(self, q_vectors) 
    435424 
    436     @property 
    437     def Iq(self): 
    438         return self._fetch_kernel('Iq') 
    439  
    440     def fetch_kernel(self, name): 
     425    def get_function(self, name): 
    441426        # type: (str) -> cl.Kernel 
    442427        """ 
     
    444429        does not already exist. 
    445430        """ 
    446         gpu = environment() 
    447         key = self._cache_key 
    448         if key not in gpu.cache: 
    449             program = gpu.compile_program( 
    450                 self.info.name, 
    451                 self.source['opencl'], 
    452                 self.dtype, 
    453                 self.fast, 
    454                 self.timestamp) 
    455             variants = ['Iq', 'Iqxy', 'Imagnetic'] 
    456             names = [generate.kernel_name(self.info, k) for k in variants] 
    457             kernels = [getattr(program, k) for k in names] 
    458             data = dict((k, v) for k, v in zip(variants, kernels)) 
    459             # keep a handle to program so GC doesn't collect 
    460             data['program'] = program 
    461             gpu.cache[key] = data 
    462         else: 
    463             data = gpu.cache[key] 
    464         return data[name] 
     431        if self._program is None: 
     432            self._prepare_program() 
     433        return self._kernels[name] 
     434 
     435    def _prepare_program(self): 
     436        # type: (str) -> None 
     437        env = environment() 
     438        timestamp = generate.ocl_timestamp(self.info) 
     439        program = env.compile_program( 
     440            self.info.name, 
     441            self.source['opencl'], 
     442            self.dtype, 
     443            self.fast, 
     444            timestamp) 
     445        variants = ['Iq', 'Iqxy', 'Imagnetic'] 
     446        names = [generate.kernel_name(self.info, k) for k in variants] 
     447        handles = [getattr(program, k) for k in names] 
     448        self._kernels = {k: v for k, v in zip(variants, handles)} 
     449        # keep a handle to program so GC doesn't collect 
     450        self._program = program 
    465451 
    466452# TODO: check that we don't need a destructor for buffers which go out of scope 
     
    504490            self.q[:self.nq] = q_vectors[0] 
    505491        self.global_size = [self.q.shape[0]] 
    506         self._cache_key = unique_id() 
    507  
    508     @property 
    509     def q_b(self): 
    510         """Lazy creation of q buffer so it can survive context reset""" 
     492        #print("creating inputs of size", self.global_size) 
     493 
     494        # transfer input value to gpu 
    511495        env = environment() 
    512         key = self._cache_key 
    513         if key not in env.cache: 
    514             context = env.context[self.dtype] 
    515             #print("creating inputs of size", self.global_size) 
    516             buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    517                                hostbuf=self.q) 
    518             env.cache[key] = buffer 
    519         return env.cache[key] 
     496        context = env.context[self.dtype] 
     497        self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
     498                             hostbuf=self.q) 
    520499 
    521500    def release(self): 
     
    524503        Free the buffer associated with the q value 
    525504        """ 
    526         environment().free_buffer(id(self)) 
     505        if self.q_b is not None: 
     506            self.q_b.release() 
     507            self.q_b = None 
    527508 
    528509    def __del__(self): 
     
    536517    *model* is the GpuModel object to call 
    537518 
    538     The following attributes are defined: 
    539  
    540     *info* is the module information 
    541  
    542     *dtype* is the kernel precision 
    543  
    544     *dim* is '1d' or '2d' 
    545  
    546     *result* is a vector to contain the results of the call 
    547  
    548     The resulting call method takes the *pars*, a list of values for 
    549     the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 
    550     vectors for the polydisperse parameters.  *cutoff* determines the 
    551     integration limits: any points with combined weight less than *cutoff* 
    552     will not be calculated. 
     519    The kernel is derived from :class:`Kernel`, providing the 
     520    :meth:`call_kernel` method to evaluate the kernel for a given set of 
     521    parameters.  Because of the need to move the q values to the GPU before 
     522    evaluation, the kernel is instantiated for a particular set of q vectors, 
     523    and can be called many times without transfering q each time. 
    553524 
    554525    Call :meth:`release` when done with the kernel instance. 
    555526    """ 
     527    #: SAS model information structure 
     528    info = None # type: ModelInfo 
     529    #: kernel precision 
     530    dtype = None # type: np.dtype 
     531    #: kernel dimensions (1d or 2d) 
     532    dim = "" # type: str 
     533    #: calculation results, updated after each call to :meth:`_call_kernel` 
     534    result = None # type: np.ndarray 
     535 
    556536    def __init__(self, model, q_vectors): 
    557         # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 
     537        # type: (GpuModel, List[np.ndarray]) -> None 
    558538        dtype = model.dtype 
    559539        self.q_input = GpuInput(q_vectors, dtype) 
     
    561541        # F16 isn't sufficient, so don't support it 
    562542        self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 
    563         self._cache_key = unique_id() 
    564543 
    565544        # attributes accessed from the outside 
     
    573552        self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 
    574553 
    575     @property 
    576     def _result_b(self): 
    577         """Lazy creation of result buffer so it can survive context reset""" 
     554        # allocate result value on gpu 
    578555        env = environment() 
    579         key = self._cache_key 
    580         if key not in env.cache: 
    581             context = env.context[self.dtype] 
    582             width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 
    583             buffer = cl.Buffer(context, mf.READ_WRITE, width) 
    584             env.cache[key] = buffer 
    585         return env.cache[key] 
     556        context = env.context[self.dtype] 
     557        width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 
     558        self._result_b = cl.Buffer(context, mf.READ_WRITE, width) 
    586559 
    587560    def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 
     
    592565 
    593566        # Arrange data transfer to/from card 
    594         q_b = self.q_input.q_b 
    595         result_b = self._result_b 
    596567        details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    597568                              hostbuf=call_details.buffer) 
     
    600571 
    601572        name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 
    602         kernel = self._model.fetch_kernel(name) 
     573        kernel = self._model.get_function(name) 
    603574        kernel_args = [ 
    604575            np.uint32(self.q_input.nq), None, None, 
    605             details_b, values_b, q_b, result_b, 
     576            details_b, values_b, self.q_input.q_b, self._result_b, 
    606577            self._as_dtype(cutoff), 
    607578            np.uint32(effective_radius_type), 
     
    626597                    time.sleep(0.001) 
    627598                    last_nap = current_time 
    628         cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for) 
     599        cl.enqueue_copy(queue, self.result, self._result_b, wait_for=wait_for) 
    629600        #print("result", self.result) 
    630601 
    631602        # Free buffers 
    632         for v in (details_b, values_b): 
    633             if v is not None: 
    634                 v.release() 
     603        details_b.release() 
     604        values_b.release() 
    635605 
    636606    def release(self): 
     
    639609        Release resources associated with the kernel. 
    640610        """ 
    641         environment().free_buffer(id(self)) 
    642611        self.q_input.release() 
     612        if self._result_b is not None: 
     613            self._result_b.release() 
     614            self._result_b = None 
    643615 
    644616    def __del__(self): 
Note: See TracChangeset for help on using the changeset viewer.