Changeset f2f67a6 in sasmodels for sasmodels/kernelcl.py


Ignore:
Timestamp:
Apr 15, 2016 7:26:24 PM (8 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
ae2b6b5, 38a9b07, eb97b11
Parents:
0ff62d4
Message:

reenable opencl; works on cpu but not gpu

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernelcl.py

    r8d62008 rf2f67a6  
    5656 
    5757try: 
    58     raise NotImplementedError("OpenCL not yet implemented for new kernel template") 
     58    #raise NotImplementedError("OpenCL not yet implemented for new kernel template") 
    5959    import pyopencl as cl  # type: ignore 
    6060    # Ask OpenCL for the default context so that we know that one exists 
     
    264264        key = "%s-%s-%s"%(name, dtype, fast) 
    265265        if key not in self.compiled: 
    266             print("compiling",name) 
     266            #print("OpenCL compile",name) 
    267267            dtype = np.dtype(dtype) 
    268268            program = compile_model(self.get_context(dtype), 
     
    373373        kernel_name = generate.kernel_name(self.info, is_2d) 
    374374        kernel = getattr(self.program, kernel_name) 
    375         return GpuKernel(kernel, self.info, q_vectors) 
     375        return GpuKernel(kernel, self.dtype, self.info, q_vectors) 
    376376 
    377377    def release(self): 
     
    443443        Free the memory. 
    444444        """ 
    445         if self.q is not None: 
    446             self.q.release() 
    447             self.q = None 
     445        if self.q_b is not None: 
     446            self.q_b.release() 
     447            self.q_b = None 
    448448 
    449449    def __del__(self): 
     
    471471    Call :meth:`release` when done with the kernel instance. 
    472472    """ 
    473     def __init__(self, kernel, model_info, q_vectors): 
    474         # type: (cl.Kernel, ModelInfo, List[np.ndarray]) -> None 
     473    def __init__(self, kernel, dtype, model_info, q_vectors): 
     474        # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 
    475475        max_pd = model_info.parameters.max_pd 
    476476        npars = len(model_info.parameters.kernel_parameters)-2 
    477         q_input = GpuInput(q_vectors, kernel.dtype) 
     477        q_input = GpuInput(q_vectors, dtype) 
    478478        self.kernel = kernel 
    479479        self.info = model_info 
    480         self.dtype = kernel.dtype 
     480        self.dtype = dtype 
    481481        self.dim = '2d' if q_input.is_2d else '1d' 
    482482        # plus three for the normalization values 
    483         self.result = np.empty(q_input.nq+3, q_input.dtype) 
     483        self.result = np.empty(q_input.nq+3, dtype) 
    484484 
    485485        # Inputs and outputs for each kernel call 
    486486        # Note: res may be shorter than res_b if global_size != nq 
    487487        env = environment() 
    488         self.queue = env.get_queue(kernel.dtype) 
     488        self.queue = env.get_queue(dtype) 
    489489 
    490490        # details is int32 data, padded to an 8 integer boundary 
    491491        size = ((max_pd*5 + npars*3 + 2 + 7)//8)*8 
    492492        self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 
    493                                q_input.global_size[0] * kernel.dtype.itemsize) 
     493                               q_input.global_size[0] * dtype.itemsize) 
    494494        self.q_input = q_input # allocated by GpuInput above 
    495495 
    496496        self._need_release = [ self.result_b, self.q_input ] 
    497         self.real = (np.float32 if self.q_input.dtype == generate.F32 
    498                      else np.float64 if self.q_input.dtype == generate.F64 
    499                      else np.float16 if self.q_input.dtype == generate.F16 
     497        self.real = (np.float32 if dtype == generate.F32 
     498                     else np.float64 if dtype == generate.F64 
     499                     else np.float16 if dtype == generate.F16 
    500500                     else np.float32)  # will never get here, so use np.float32 
    501501 
    502502    def __call__(self, call_details, weights, values, cutoff): 
    503503        # type: (CallDetails, np.ndarray, np.ndarray, float) -> np.ndarray 
    504  
    505504        context = self.queue.context 
    506505        # Arrange data transfer to card 
     
    508507                              hostbuf=call_details.buffer) 
    509508        weights_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    510                               hostbuf=weights) 
     509                              hostbuf=weights) if len(weights) else None 
    511510        values_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 
    512511                             hostbuf=values) 
     
    521520        cl.enqueue_copy(self.queue, self.result, self.result_b) 
    522521        for v in (details_b, weights_b, values_b): 
    523             v.release() 
     522            if v is not None: v.release() 
    524523 
    525524        return self.result[:self.q_input.nq] 
Note: See TracChangeset for help on using the changeset viewer.