Changeset f2f67a6 in sasmodels for sasmodels/kernelcl.py
- Timestamp:
- Apr 15, 2016 7:26:24 PM (8 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- ae2b6b5, 38a9b07, eb97b11
- Parents:
- 0ff62d4
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
r8d62008 rf2f67a6 56 56 57 57 try: 58 raise NotImplementedError("OpenCL not yet implemented for new kernel template")58 #raise NotImplementedError("OpenCL not yet implemented for new kernel template") 59 59 import pyopencl as cl # type: ignore 60 60 # Ask OpenCL for the default context so that we know that one exists … … 264 264 key = "%s-%s-%s"%(name, dtype, fast) 265 265 if key not in self.compiled: 266 print("compiling",name)266 #print("OpenCL compile",name) 267 267 dtype = np.dtype(dtype) 268 268 program = compile_model(self.get_context(dtype), … … 373 373 kernel_name = generate.kernel_name(self.info, is_2d) 374 374 kernel = getattr(self.program, kernel_name) 375 return GpuKernel(kernel, self. info, q_vectors)375 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 376 376 377 377 def release(self): … … 443 443 Free the memory. 444 444 """ 445 if self.q is not None:446 self.q .release()447 self.q = None445 if self.q_b is not None: 446 self.q_b.release() 447 self.q_b = None 448 448 449 449 def __del__(self): … … 471 471 Call :meth:`release` when done with the kernel instance. 472 472 """ 473 def __init__(self, kernel, model_info, q_vectors):474 # type: (cl.Kernel, ModelInfo, List[np.ndarray]) -> None473 def __init__(self, kernel, dtype, model_info, q_vectors): 474 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 475 475 max_pd = model_info.parameters.max_pd 476 476 npars = len(model_info.parameters.kernel_parameters)-2 477 q_input = GpuInput(q_vectors, kernel.dtype)477 q_input = GpuInput(q_vectors, dtype) 478 478 self.kernel = kernel 479 479 self.info = model_info 480 self.dtype = kernel.dtype480 self.dtype = dtype 481 481 self.dim = '2d' if q_input.is_2d else '1d' 482 482 # plus three for the normalization values 483 self.result = np.empty(q_input.nq+3, q_input.dtype)483 self.result = np.empty(q_input.nq+3, dtype) 484 484 485 485 # Inputs and outputs for each kernel call 486 486 # Note: res may be shorter than res_b if global_size != nq 487 487 env = environment() 488 self.queue = env.get_queue( kernel.dtype)488 self.queue = env.get_queue(dtype) 489 489 490 490 # details is int32 data, padded to an 8 integer boundary 491 491 size = ((max_pd*5 + npars*3 + 2 + 7)//8)*8 492 492 self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 493 q_input.global_size[0] * kernel.dtype.itemsize)493 q_input.global_size[0] * dtype.itemsize) 494 494 self.q_input = q_input # allocated by GpuInput above 495 495 496 496 self._need_release = [ self.result_b, self.q_input ] 497 self.real = (np.float32 if self.q_input.dtype == generate.F32498 else np.float64 if self.q_input.dtype == generate.F64499 else np.float16 if self.q_input.dtype == generate.F16497 self.real = (np.float32 if dtype == generate.F32 498 else np.float64 if dtype == generate.F64 499 else np.float16 if dtype == generate.F16 500 500 else np.float32) # will never get here, so use np.float32 501 501 502 502 def __call__(self, call_details, weights, values, cutoff): 503 503 # type: (CallDetails, np.ndarray, np.ndarray, float) -> np.ndarray 504 505 504 context = self.queue.context 506 505 # Arrange data transfer to card … … 508 507 hostbuf=call_details.buffer) 509 508 weights_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 510 hostbuf=weights) 509 hostbuf=weights) if len(weights) else None 511 510 values_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 512 511 hostbuf=values) … … 521 520 cl.enqueue_copy(self.queue, self.result, self.result_b) 522 521 for v in (details_b, weights_b, values_b): 523 v.release()522 if v is not None: v.release() 524 523 525 524 return self.result[:self.q_input.nq]
Note: See TracChangeset
for help on using the changeset viewer.