Changeset 7126c04 in sasmodels
- Timestamp:
- Nov 9, 2018 4:33:23 PM (6 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 0be86aa
- Parents:
- 63d4dd1
- Location:
- sasmodels
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
rf872fd1 r7126c04 265 265 # Cache for compiled programs, and for items in context 266 266 self.compiled = {} 267 self.cache = {}268 267 269 268 def has_type(self, dtype): … … 297 296 return program 298 297 299 def free_buffer(self, key):300 if key in self.cache:301 self.cache[key].release()302 del self.cache[key]303 304 def __del__(self):305 for v in self.cache.values():306 release = getattr(v, 'release', lambda: None)307 release()308 self.cache = {}309 310 298 _CURRENT_ID = 0 311 def unique_id():312 global _CURRENT_ID313 _CURRENT_ID += 1314 return _CURRENT_ID315 316 299 def _create_some_context(): 317 300 # type: () -> cl.Context … … 413 396 that the compiler is allowed to take shortcuts. 414 397 """ 398 info = None # type: ModelInfo 399 source = "" # type: str 400 dtype = None # type: np.dtype 401 fast = False # type: bool 402 _program = None # type: cl.Program 403 _kernels = None # type: Dict[str, cl.Kernel] 404 415 405 def __init__(self, source, model_info, dtype=generate.F32, fast=False): 416 406 # type: (Dict[str,str], ModelInfo, np.dtype, bool) -> None … … 419 409 self.dtype = dtype 420 410 self.fast = fast 421 self.timestamp = generate.ocl_timestamp(self.info)422 self._cache_key = unique_id()423 411 424 412 def __getstate__(self): … … 429 417 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 430 418 self.info, self.source, self.dtype, self.fast = state 419 self._program = self._kernels = None 431 420 432 421 def make_kernel(self, q_vectors): … … 434 423 return GpuKernel(self, q_vectors) 435 424 436 @property 437 def Iq(self): 438 return self._fetch_kernel('Iq') 439 440 def fetch_kernel(self, name): 425 def get_function(self, name): 441 426 # type: (str) -> cl.Kernel 442 427 """ … … 444 429 does not already exist. 445 430 """ 446 gpu = environment() 447 key = self._cache_key 448 if key not in gpu.cache: 449 program = gpu.compile_program( 450 self.info.name, 451 self.source['opencl'], 452 self.dtype, 453 self.fast, 454 self.timestamp) 455 variants = ['Iq', 'Iqxy', 'Imagnetic'] 456 names = [generate.kernel_name(self.info, k) for k in variants] 457 kernels = [getattr(program, k) for k in names] 458 data = dict((k, v) for k, v in zip(variants, kernels)) 459 # keep a handle to program so GC doesn't collect 460 data['program'] = program 461 gpu.cache[key] = data 462 else: 463 data = gpu.cache[key] 464 return data[name] 431 if self._program is None: 432 self._prepare_program() 433 return self._kernels[name] 434 435 def _prepare_program(self): 436 # type: (str) -> None 437 env = environment() 438 timestamp = generate.ocl_timestamp(self.info) 439 program = env.compile_program( 440 self.info.name, 441 self.source['opencl'], 442 self.dtype, 443 self.fast, 444 timestamp) 445 variants = ['Iq', 'Iqxy', 'Imagnetic'] 446 names = [generate.kernel_name(self.info, k) for k in variants] 447 handles = [getattr(program, k) for k in names] 448 self._kernels = {k: v for k, v in zip(variants, handles)} 449 # keep a handle to program so GC doesn't collect 450 self._program = program 465 451 466 452 # TODO: check that we don't need a destructor for buffers which go out of scope … … 504 490 self.q[:self.nq] = q_vectors[0] 505 491 self.global_size = [self.q.shape[0]] 506 self._cache_key = unique_id() 507 508 @property 509 def q_b(self): 510 """Lazy creation of q buffer so it can survive context reset""" 492 #print("creating inputs of size", self.global_size) 493 494 # transfer input value to gpu 511 495 env = environment() 512 key = self._cache_key 513 if key not in env.cache: 514 context = env.context[self.dtype] 515 #print("creating inputs of size", self.global_size) 516 buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 517 hostbuf=self.q) 518 env.cache[key] = buffer 519 return env.cache[key] 496 context = env.context[self.dtype] 497 self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 498 hostbuf=self.q) 520 499 521 500 def release(self): … … 524 503 Free the buffer associated with the q value 525 504 """ 526 environment().free_buffer(id(self)) 505 if self.q_b is not None: 506 self.q_b.release() 507 self.q_b = None 527 508 528 509 def __del__(self): … … 536 517 *model* is the GpuModel object to call 537 518 538 The following attributes are defined: 539 540 *info* is the module information 541 542 *dtype* is the kernel precision 543 544 *dim* is '1d' or '2d' 545 546 *result* is a vector to contain the results of the call 547 548 The resulting call method takes the *pars*, a list of values for 549 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 550 vectors for the polydisperse parameters. *cutoff* determines the 551 integration limits: any points with combined weight less than *cutoff* 552 will not be calculated. 519 The kernel is derived from :class:`Kernel`, providing the 520 :meth:`call_kernel` method to evaluate the kernel for a given set of 521 parameters. Because of the need to move the q values to the GPU before 522 evaluation, the kernel is instantiated for a particular set of q vectors, 523 and can be called many times without transfering q each time. 553 524 554 525 Call :meth:`release` when done with the kernel instance. 555 526 """ 527 #: SAS model information structure 528 info = None # type: ModelInfo 529 #: kernel precision 530 dtype = None # type: np.dtype 531 #: kernel dimensions (1d or 2d) 532 dim = "" # type: str 533 #: calculation results, updated after each call to :meth:`_call_kernel` 534 result = None # type: np.ndarray 535 556 536 def __init__(self, model, q_vectors): 557 # type: ( cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None537 # type: (GpuModel, List[np.ndarray]) -> None 558 538 dtype = model.dtype 559 539 self.q_input = GpuInput(q_vectors, dtype) … … 561 541 # F16 isn't sufficient, so don't support it 562 542 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 563 self._cache_key = unique_id()564 543 565 544 # attributes accessed from the outside … … 573 552 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 574 553 575 @property 576 def _result_b(self): 577 """Lazy creation of result buffer so it can survive context reset""" 554 # allocate result value on gpu 578 555 env = environment() 579 key = self._cache_key 580 if key not in env.cache: 581 context = env.context[self.dtype] 582 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 583 buffer = cl.Buffer(context, mf.READ_WRITE, width) 584 env.cache[key] = buffer 585 return env.cache[key] 556 context = env.context[self.dtype] 557 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 558 self._result_b = cl.Buffer(context, mf.READ_WRITE, width) 586 559 587 560 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): … … 592 565 593 566 # Arrange data transfer to/from card 594 q_b = self.q_input.q_b595 result_b = self._result_b596 567 details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 597 568 hostbuf=call_details.buffer) … … 600 571 601 572 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 602 kernel = self._model. fetch_kernel(name)573 kernel = self._model.get_function(name) 603 574 kernel_args = [ 604 575 np.uint32(self.q_input.nq), None, None, 605 details_b, values_b, q_b,result_b,576 details_b, values_b, self.q_input.q_b, self._result_b, 606 577 self._as_dtype(cutoff), 607 578 np.uint32(effective_radius_type), … … 626 597 time.sleep(0.001) 627 598 last_nap = current_time 628 cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for)599 cl.enqueue_copy(queue, self.result, self._result_b, wait_for=wait_for) 629 600 #print("result", self.result) 630 601 631 602 # Free buffers 632 for v in (details_b, values_b): 633 if v is not None: 634 v.release() 603 details_b.release() 604 values_b.release() 635 605 636 606 def release(self): … … 639 609 Release resources associated with the kernel. 640 610 """ 641 environment().free_buffer(id(self))642 611 self.q_input.release() 612 if self._result_b is not None: 613 self._result_b.release() 614 self._result_b = None 643 615 644 616 def __del__(self): -
sasmodels/kernelcuda.py
rf872fd1 r7126c04 296 296 dtype = None # type: np.dtype 297 297 fast = False # type: bool 298 program = None # type: SourceModule299 _kernels = None # type: List[cuda.Function]298 _program = None # type: SourceModule 299 _kernels = None # type: Dict[str, cuda.Function] 300 300 301 301 def __init__(self, source, model_info, dtype=generate.F32, fast=False): … … 305 305 self.dtype = dtype 306 306 self.fast = fast 307 self.program = None # delay program creation308 self._kernels = None309 307 310 308 def __getstate__(self): … … 315 313 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 316 314 self.info, self.source, self.dtype, self.fast = state 317 self. program= None315 self._program = self._kernels = None 318 316 319 317 def make_kernel(self, q_vectors): 320 318 # type: (List[np.ndarray]) -> "GpuKernel" 321 if self.program is None: 322 compile_program = environment().compile_program 323 timestamp = generate.ocl_timestamp(self.info) 324 self.program = compile_program( 325 self.info.name, 326 self.source['opencl'], 327 self.dtype, 328 self.fast, 329 timestamp) 330 variants = ['Iq', 'Iqxy', 'Imagnetic'] 331 names = [generate.kernel_name(self.info, k) for k in variants] 332 kernels = [self.program.get_function(k) for k in names] 333 self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 334 is_2d = len(q_vectors) == 2 335 if is_2d: 336 kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 337 else: 338 kernel = [self._kernels['Iq']]*2 339 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 319 return GpuKernel(self, q_vectors) 320 321 def get_function(self, name): 322 # type: (str) -> cuda.Function 323 """ 324 Fetch the kernel from the environment by name, compiling it if it 325 does not already exist. 326 """ 327 if self._program is None: 328 self._prepare_program() 329 return self._kernels[name] 330 331 def _prepare_program(self): 332 # type: (str) -> None 333 env = environment() 334 timestamp = generate.ocl_timestamp(self.info) 335 program = env.compile_program( 336 self.info.name, 337 self.source['opencl'], 338 self.dtype, 339 self.fast, 340 timestamp) 341 variants = ['Iq', 'Iqxy', 'Imagnetic'] 342 names = [generate.kernel_name(self.info, k) for k in variants] 343 handles = [program.get_function(k) for k in names] 344 self._kernels = {k: v for k, v in zip(variants, kernels)} 345 # keep a handle to program so GC doesn't collect 346 self._program = program 340 347 341 348 def release(self): … … 394 401 self.global_size = [self.q.shape[0]] 395 402 #print("creating inputs of size", self.global_size) 403 404 # transfer input value to gpu 396 405 self.q_b = cuda.to_device(self.q) 397 406 … … 413 422 Callable SAS kernel. 414 423 415 *kernel* is the GpuKernel object to call 416 417 *model_info* is the module information 418 419 *q_vectors* is the q vectors at which the kernel should be evaluated 420 421 *dtype* is the kernel precision 422 423 The resulting call method takes the *pars*, a list of values for 424 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 425 vectors for the polydisperse parameters. *cutoff* determines the 426 integration limits: any points with combined weight less than *cutoff* 427 will not be calculated. 424 *model* is the GpuModel object to call 425 426 The kernel is derived from :class:`Kernel`, providing the 427 :meth:`call_kernel` method to evaluate the kernel for a given set of 428 parameters. Because of the need to move the q values to the GPU before 429 evaluation, the kernel is instantiated for a particular set of q vectors, 430 and can be called many times without transfering q each time. 428 431 429 432 Call :meth:`release` when done with the kernel instance. 430 433 """ 431 def __init__(self, kernel, dtype, model_info, q_vectors): 432 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 434 #: SAS model information structure 435 info = None # type: ModelInfo 436 #: kernel precision 437 dtype = None # type: np.dtype 438 #: kernel dimensions (1d or 2d) 439 dim = "" # type: str 440 #: calculation results, updated after each call to :meth:`_call_kernel` 441 result = None # type: np.ndarray 442 443 def __init__(self, model, q_vectors): 444 # type: (GpuModel, List[np.ndarray]) -> None 445 dtype = model.dtype 433 446 self.q_input = GpuInput(q_vectors, dtype) 434 self. kernel = kernel447 self._model = model 435 448 # F16 isn't sufficient, so don't support it 436 449 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 … … 438 451 # attributes accessed from the outside 439 452 self.dim = '2d' if self.q_input.is_2d else '1d' 440 self.info = model _info441 self.dtype = dtype453 self.info = model.info 454 self.dtype = model.dtype 442 455 443 456 # holding place for the returned value … … 446 459 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 447 460 448 # Inputs and outputs for each kernel call 449 # Note: res may be shorter than res_b if global_size != nq 461 # allocate result value on gpu 450 462 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 451 self.result_b = cuda.mem_alloc(width) 452 self._need_release = [self.result_b] 463 self._result_b = cuda.mem_alloc(width) 453 464 454 465 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): … … 458 469 values_b = cuda.to_device(values) 459 470 460 kernel = self.kernel[1 if magnetic else 0] 461 args = [ 471 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 472 kernel = self._model.get_function(name) 473 kernel_args = [ 462 474 np.uint32(self.q_input.nq), None, None, 463 475 details_b, values_b, self.q_input.q_b, self.result_b, … … 496 508 Release resources associated with the kernel. 497 509 """ 498 for p in self._need_release: 499 p.free() 500 self._need_release = [] 510 self.q_input.release() 511 if self._result_b is not None: 512 self._result_b.free() 513 self._result_b = None 501 514 502 515 def __del__(self):
Note: See TracChangeset
for help on using the changeset viewer.