Changeset 7126c04 in sasmodels for sasmodels/kernelcuda.py
- Timestamp:
- Nov 9, 2018 2:33:23 PM (5 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 0be86aa
- Parents:
- 63d4dd1
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcuda.py
rf872fd1 r7126c04 296 296 dtype = None # type: np.dtype 297 297 fast = False # type: bool 298 program = None # type: SourceModule299 _kernels = None # type: List[cuda.Function]298 _program = None # type: SourceModule 299 _kernels = None # type: Dict[str, cuda.Function] 300 300 301 301 def __init__(self, source, model_info, dtype=generate.F32, fast=False): … … 305 305 self.dtype = dtype 306 306 self.fast = fast 307 self.program = None # delay program creation308 self._kernels = None309 307 310 308 def __getstate__(self): … … 315 313 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 316 314 self.info, self.source, self.dtype, self.fast = state 317 self. program= None315 self._program = self._kernels = None 318 316 319 317 def make_kernel(self, q_vectors): 320 318 # type: (List[np.ndarray]) -> "GpuKernel" 321 if self.program is None: 322 compile_program = environment().compile_program 323 timestamp = generate.ocl_timestamp(self.info) 324 self.program = compile_program( 325 self.info.name, 326 self.source['opencl'], 327 self.dtype, 328 self.fast, 329 timestamp) 330 variants = ['Iq', 'Iqxy', 'Imagnetic'] 331 names = [generate.kernel_name(self.info, k) for k in variants] 332 kernels = [self.program.get_function(k) for k in names] 333 self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 334 is_2d = len(q_vectors) == 2 335 if is_2d: 336 kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 337 else: 338 kernel = [self._kernels['Iq']]*2 339 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 319 return GpuKernel(self, q_vectors) 320 321 def get_function(self, name): 322 # type: (str) -> cuda.Function 323 """ 324 Fetch the kernel from the environment by name, compiling it if it 325 does not already exist. 326 """ 327 if self._program is None: 328 self._prepare_program() 329 return self._kernels[name] 330 331 def _prepare_program(self): 332 # type: (str) -> None 333 env = environment() 334 timestamp = generate.ocl_timestamp(self.info) 335 program = env.compile_program( 336 self.info.name, 337 self.source['opencl'], 338 self.dtype, 339 self.fast, 340 timestamp) 341 variants = ['Iq', 'Iqxy', 'Imagnetic'] 342 names = [generate.kernel_name(self.info, k) for k in variants] 343 handles = [program.get_function(k) for k in names] 344 self._kernels = {k: v for k, v in zip(variants, kernels)} 345 # keep a handle to program so GC doesn't collect 346 self._program = program 340 347 341 348 def release(self): … … 394 401 self.global_size = [self.q.shape[0]] 395 402 #print("creating inputs of size", self.global_size) 403 404 # transfer input value to gpu 396 405 self.q_b = cuda.to_device(self.q) 397 406 … … 413 422 Callable SAS kernel. 414 423 415 *kernel* is the GpuKernel object to call 416 417 *model_info* is the module information 418 419 *q_vectors* is the q vectors at which the kernel should be evaluated 420 421 *dtype* is the kernel precision 422 423 The resulting call method takes the *pars*, a list of values for 424 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 425 vectors for the polydisperse parameters. *cutoff* determines the 426 integration limits: any points with combined weight less than *cutoff* 427 will not be calculated. 424 *model* is the GpuModel object to call 425 426 The kernel is derived from :class:`Kernel`, providing the 427 :meth:`call_kernel` method to evaluate the kernel for a given set of 428 parameters. Because of the need to move the q values to the GPU before 429 evaluation, the kernel is instantiated for a particular set of q vectors, 430 and can be called many times without transfering q each time. 428 431 429 432 Call :meth:`release` when done with the kernel instance. 430 433 """ 431 def __init__(self, kernel, dtype, model_info, q_vectors): 432 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 434 #: SAS model information structure 435 info = None # type: ModelInfo 436 #: kernel precision 437 dtype = None # type: np.dtype 438 #: kernel dimensions (1d or 2d) 439 dim = "" # type: str 440 #: calculation results, updated after each call to :meth:`_call_kernel` 441 result = None # type: np.ndarray 442 443 def __init__(self, model, q_vectors): 444 # type: (GpuModel, List[np.ndarray]) -> None 445 dtype = model.dtype 433 446 self.q_input = GpuInput(q_vectors, dtype) 434 self. kernel = kernel447 self._model = model 435 448 # F16 isn't sufficient, so don't support it 436 449 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 … … 438 451 # attributes accessed from the outside 439 452 self.dim = '2d' if self.q_input.is_2d else '1d' 440 self.info = model _info441 self.dtype = dtype453 self.info = model.info 454 self.dtype = model.dtype 442 455 443 456 # holding place for the returned value … … 446 459 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 447 460 448 # Inputs and outputs for each kernel call 449 # Note: res may be shorter than res_b if global_size != nq 461 # allocate result value on gpu 450 462 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 451 self.result_b = cuda.mem_alloc(width) 452 self._need_release = [self.result_b] 463 self._result_b = cuda.mem_alloc(width) 453 464 454 465 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): … … 458 469 values_b = cuda.to_device(values) 459 470 460 kernel = self.kernel[1 if magnetic else 0] 461 args = [ 471 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 472 kernel = self._model.get_function(name) 473 kernel_args = [ 462 474 np.uint32(self.q_input.nq), None, None, 463 475 details_b, values_b, self.q_input.q_b, self.result_b, … … 496 508 Release resources associated with the kernel. 497 509 """ 498 for p in self._need_release: 499 p.free() 500 self._need_release = [] 510 self.q_input.release() 511 if self._result_b is not None: 512 self._result_b.free() 513 self._result_b = None 501 514 502 515 def __del__(self):
Note: See TracChangeset
for help on using the changeset viewer.