Changeset 7126c04 in sasmodels for sasmodels/kernelcuda.py


Ignore:
Timestamp:
Nov 9, 2018 2:33:23 PM (5 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
0be86aa
Parents:
63d4dd1
Message:

use similar code for cuda and opencl

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernelcuda.py

    rf872fd1 r7126c04  
    296296    dtype = None # type: np.dtype 
    297297    fast = False # type: bool 
    298     program = None # type: SourceModule 
    299     _kernels = None # type: List[cuda.Function] 
     298    _program = None # type: SourceModule 
     299    _kernels = None # type: Dict[str, cuda.Function] 
    300300 
    301301    def __init__(self, source, model_info, dtype=generate.F32, fast=False): 
     
    305305        self.dtype = dtype 
    306306        self.fast = fast 
    307         self.program = None # delay program creation 
    308         self._kernels = None 
    309307 
    310308    def __getstate__(self): 
     
    315313        # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 
    316314        self.info, self.source, self.dtype, self.fast = state 
    317         self.program = None 
     315        self._program = self._kernels = None 
    318316 
    319317    def make_kernel(self, q_vectors): 
    320318        # type: (List[np.ndarray]) -> "GpuKernel" 
    321         if self.program is None: 
    322             compile_program = environment().compile_program 
    323             timestamp = generate.ocl_timestamp(self.info) 
    324             self.program = compile_program( 
    325                 self.info.name, 
    326                 self.source['opencl'], 
    327                 self.dtype, 
    328                 self.fast, 
    329                 timestamp) 
    330             variants = ['Iq', 'Iqxy', 'Imagnetic'] 
    331             names = [generate.kernel_name(self.info, k) for k in variants] 
    332             kernels = [self.program.get_function(k) for k in names] 
    333             self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 
    334         is_2d = len(q_vectors) == 2 
    335         if is_2d: 
    336             kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 
    337         else: 
    338             kernel = [self._kernels['Iq']]*2 
    339         return GpuKernel(kernel, self.dtype, self.info, q_vectors) 
     319        return GpuKernel(self, q_vectors) 
     320 
     321    def get_function(self, name): 
     322        # type: (str) -> cuda.Function 
     323        """ 
     324        Fetch the kernel from the environment by name, compiling it if it 
     325        does not already exist. 
     326        """ 
     327        if self._program is None: 
     328            self._prepare_program() 
     329        return self._kernels[name] 
     330 
     331    def _prepare_program(self): 
     332        # type: (str) -> None 
     333        env = environment() 
     334        timestamp = generate.ocl_timestamp(self.info) 
     335        program = env.compile_program( 
     336            self.info.name, 
     337            self.source['opencl'], 
     338            self.dtype, 
     339            self.fast, 
     340            timestamp) 
     341        variants = ['Iq', 'Iqxy', 'Imagnetic'] 
     342        names = [generate.kernel_name(self.info, k) for k in variants] 
     343        handles = [program.get_function(k) for k in names] 
     344        self._kernels = {k: v for k, v in zip(variants, kernels)} 
     345        # keep a handle to program so GC doesn't collect 
     346        self._program = program 
    340347 
    341348    def release(self): 
     
    394401        self.global_size = [self.q.shape[0]] 
    395402        #print("creating inputs of size", self.global_size) 
     403 
     404        # transfer input value to gpu 
    396405        self.q_b = cuda.to_device(self.q) 
    397406 
     
    413422    Callable SAS kernel. 
    414423 
    415     *kernel* is the GpuKernel object to call 
    416  
    417     *model_info* is the module information 
    418  
    419     *q_vectors* is the q vectors at which the kernel should be evaluated 
    420  
    421     *dtype* is the kernel precision 
    422  
    423     The resulting call method takes the *pars*, a list of values for 
    424     the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 
    425     vectors for the polydisperse parameters.  *cutoff* determines the 
    426     integration limits: any points with combined weight less than *cutoff* 
    427     will not be calculated. 
     424    *model* is the GpuModel object to call 
     425 
     426    The kernel is derived from :class:`Kernel`, providing the 
     427    :meth:`call_kernel` method to evaluate the kernel for a given set of 
     428    parameters.  Because of the need to move the q values to the GPU before 
     429    evaluation, the kernel is instantiated for a particular set of q vectors, 
     430    and can be called many times without transfering q each time. 
    428431 
    429432    Call :meth:`release` when done with the kernel instance. 
    430433    """ 
    431     def __init__(self, kernel, dtype, model_info, q_vectors): 
    432         # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 
     434    #: SAS model information structure 
     435    info = None # type: ModelInfo 
     436    #: kernel precision 
     437    dtype = None # type: np.dtype 
     438    #: kernel dimensions (1d or 2d) 
     439    dim = "" # type: str 
     440    #: calculation results, updated after each call to :meth:`_call_kernel` 
     441    result = None # type: np.ndarray 
     442 
     443    def __init__(self, model, q_vectors): 
     444        # type: (GpuModel, List[np.ndarray]) -> None 
     445        dtype = model.dtype 
    433446        self.q_input = GpuInput(q_vectors, dtype) 
    434         self.kernel = kernel 
     447        self._model = model 
    435448        # F16 isn't sufficient, so don't support it 
    436449        self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 
     
    438451        # attributes accessed from the outside 
    439452        self.dim = '2d' if self.q_input.is_2d else '1d' 
    440         self.info = model_info 
    441         self.dtype = dtype 
     453        self.info = model.info 
     454        self.dtype = model.dtype 
    442455 
    443456        # holding place for the returned value 
     
    446459        self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 
    447460 
    448         # Inputs and outputs for each kernel call 
    449         # Note: res may be shorter than res_b if global_size != nq 
     461        # allocate result value on gpu 
    450462        width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 
    451         self.result_b = cuda.mem_alloc(width) 
    452         self._need_release = [self.result_b] 
     463        self._result_b = cuda.mem_alloc(width) 
    453464 
    454465    def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 
     
    458469        values_b = cuda.to_device(values) 
    459470 
    460         kernel = self.kernel[1 if magnetic else 0] 
    461         args = [ 
     471        name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 
     472        kernel = self._model.get_function(name) 
     473        kernel_args = [ 
    462474            np.uint32(self.q_input.nq), None, None, 
    463475            details_b, values_b, self.q_input.q_b, self.result_b, 
     
    496508        Release resources associated with the kernel. 
    497509        """ 
    498         for p in self._need_release: 
    499             p.free() 
    500         self._need_release = [] 
     510        self.q_input.release() 
     511        if self._result_b is not None: 
     512            self._result_b.free() 
     513            self._result_b = None 
    501514 
    502515    def __del__(self): 
Note: See TracChangeset for help on using the changeset viewer.