Changeset 7126c04 in sasmodels for sasmodels/kernelcl.py
- Timestamp:
- Nov 9, 2018 2:33:23 PM (5 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 0be86aa
- Parents:
- 63d4dd1
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
rf872fd1 r7126c04 265 265 # Cache for compiled programs, and for items in context 266 266 self.compiled = {} 267 self.cache = {}268 267 269 268 def has_type(self, dtype): … … 297 296 return program 298 297 299 def free_buffer(self, key):300 if key in self.cache:301 self.cache[key].release()302 del self.cache[key]303 304 def __del__(self):305 for v in self.cache.values():306 release = getattr(v, 'release', lambda: None)307 release()308 self.cache = {}309 310 298 _CURRENT_ID = 0 311 def unique_id():312 global _CURRENT_ID313 _CURRENT_ID += 1314 return _CURRENT_ID315 316 299 def _create_some_context(): 317 300 # type: () -> cl.Context … … 413 396 that the compiler is allowed to take shortcuts. 414 397 """ 398 info = None # type: ModelInfo 399 source = "" # type: str 400 dtype = None # type: np.dtype 401 fast = False # type: bool 402 _program = None # type: cl.Program 403 _kernels = None # type: Dict[str, cl.Kernel] 404 415 405 def __init__(self, source, model_info, dtype=generate.F32, fast=False): 416 406 # type: (Dict[str,str], ModelInfo, np.dtype, bool) -> None … … 419 409 self.dtype = dtype 420 410 self.fast = fast 421 self.timestamp = generate.ocl_timestamp(self.info)422 self._cache_key = unique_id()423 411 424 412 def __getstate__(self): … … 429 417 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 430 418 self.info, self.source, self.dtype, self.fast = state 419 self._program = self._kernels = None 431 420 432 421 def make_kernel(self, q_vectors): … … 434 423 return GpuKernel(self, q_vectors) 435 424 436 @property 437 def Iq(self): 438 return self._fetch_kernel('Iq') 439 440 def fetch_kernel(self, name): 425 def get_function(self, name): 441 426 # type: (str) -> cl.Kernel 442 427 """ … … 444 429 does not already exist. 445 430 """ 446 gpu = environment() 447 key = self._cache_key 448 if key not in gpu.cache: 449 program = gpu.compile_program( 450 self.info.name, 451 self.source['opencl'], 452 self.dtype, 453 self.fast, 454 self.timestamp) 455 variants = ['Iq', 'Iqxy', 'Imagnetic'] 456 names = [generate.kernel_name(self.info, k) for k in variants] 457 kernels = [getattr(program, k) for k in names] 458 data = dict((k, v) for k, v in zip(variants, kernels)) 459 # keep a handle to program so GC doesn't collect 460 data['program'] = program 461 gpu.cache[key] = data 462 else: 463 data = gpu.cache[key] 464 return data[name] 431 if self._program is None: 432 self._prepare_program() 433 return self._kernels[name] 434 435 def _prepare_program(self): 436 # type: (str) -> None 437 env = environment() 438 timestamp = generate.ocl_timestamp(self.info) 439 program = env.compile_program( 440 self.info.name, 441 self.source['opencl'], 442 self.dtype, 443 self.fast, 444 timestamp) 445 variants = ['Iq', 'Iqxy', 'Imagnetic'] 446 names = [generate.kernel_name(self.info, k) for k in variants] 447 handles = [getattr(program, k) for k in names] 448 self._kernels = {k: v for k, v in zip(variants, handles)} 449 # keep a handle to program so GC doesn't collect 450 self._program = program 465 451 466 452 # TODO: check that we don't need a destructor for buffers which go out of scope … … 504 490 self.q[:self.nq] = q_vectors[0] 505 491 self.global_size = [self.q.shape[0]] 506 self._cache_key = unique_id() 507 508 @property 509 def q_b(self): 510 """Lazy creation of q buffer so it can survive context reset""" 492 #print("creating inputs of size", self.global_size) 493 494 # transfer input value to gpu 511 495 env = environment() 512 key = self._cache_key 513 if key not in env.cache: 514 context = env.context[self.dtype] 515 #print("creating inputs of size", self.global_size) 516 buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 517 hostbuf=self.q) 518 env.cache[key] = buffer 519 return env.cache[key] 496 context = env.context[self.dtype] 497 self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 498 hostbuf=self.q) 520 499 521 500 def release(self): … … 524 503 Free the buffer associated with the q value 525 504 """ 526 environment().free_buffer(id(self)) 505 if self.q_b is not None: 506 self.q_b.release() 507 self.q_b = None 527 508 528 509 def __del__(self): … … 536 517 *model* is the GpuModel object to call 537 518 538 The following attributes are defined: 539 540 *info* is the module information 541 542 *dtype* is the kernel precision 543 544 *dim* is '1d' or '2d' 545 546 *result* is a vector to contain the results of the call 547 548 The resulting call method takes the *pars*, a list of values for 549 the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) 550 vectors for the polydisperse parameters. *cutoff* determines the 551 integration limits: any points with combined weight less than *cutoff* 552 will not be calculated. 519 The kernel is derived from :class:`Kernel`, providing the 520 :meth:`call_kernel` method to evaluate the kernel for a given set of 521 parameters. Because of the need to move the q values to the GPU before 522 evaluation, the kernel is instantiated for a particular set of q vectors, 523 and can be called many times without transfering q each time. 553 524 554 525 Call :meth:`release` when done with the kernel instance. 555 526 """ 527 #: SAS model information structure 528 info = None # type: ModelInfo 529 #: kernel precision 530 dtype = None # type: np.dtype 531 #: kernel dimensions (1d or 2d) 532 dim = "" # type: str 533 #: calculation results, updated after each call to :meth:`_call_kernel` 534 result = None # type: np.ndarray 535 556 536 def __init__(self, model, q_vectors): 557 # type: ( cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None537 # type: (GpuModel, List[np.ndarray]) -> None 558 538 dtype = model.dtype 559 539 self.q_input = GpuInput(q_vectors, dtype) … … 561 541 # F16 isn't sufficient, so don't support it 562 542 self._as_dtype = np.float64 if dtype == generate.F64 else np.float32 563 self._cache_key = unique_id()564 543 565 544 # attributes accessed from the outside … … 573 552 self.result = np.empty(self.q_input.nq*nout+extra_q, dtype) 574 553 575 @property 576 def _result_b(self): 577 """Lazy creation of result buffer so it can survive context reset""" 554 # allocate result value on gpu 578 555 env = environment() 579 key = self._cache_key 580 if key not in env.cache: 581 context = env.context[self.dtype] 582 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 583 buffer = cl.Buffer(context, mf.READ_WRITE, width) 584 env.cache[key] = buffer 585 return env.cache[key] 556 context = env.context[self.dtype] 557 width = ((self.result.size+31)//32)*32 * self.dtype.itemsize 558 self._result_b = cl.Buffer(context, mf.READ_WRITE, width) 586 559 587 560 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): … … 592 565 593 566 # Arrange data transfer to/from card 594 q_b = self.q_input.q_b595 result_b = self._result_b596 567 details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 597 568 hostbuf=call_details.buffer) … … 600 571 601 572 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 602 kernel = self._model. fetch_kernel(name)573 kernel = self._model.get_function(name) 603 574 kernel_args = [ 604 575 np.uint32(self.q_input.nq), None, None, 605 details_b, values_b, q_b,result_b,576 details_b, values_b, self.q_input.q_b, self._result_b, 606 577 self._as_dtype(cutoff), 607 578 np.uint32(effective_radius_type), … … 626 597 time.sleep(0.001) 627 598 last_nap = current_time 628 cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for)599 cl.enqueue_copy(queue, self.result, self._result_b, wait_for=wait_for) 629 600 #print("result", self.result) 630 601 631 602 # Free buffers 632 for v in (details_b, values_b): 633 if v is not None: 634 v.release() 603 details_b.release() 604 values_b.release() 635 605 636 606 def release(self): … … 639 609 Release resources associated with the kernel. 640 610 """ 641 environment().free_buffer(id(self))642 611 self.q_input.release() 612 if self._result_b is not None: 613 self._result_b.release() 614 self._result_b = None 643 615 644 616 def __del__(self):
Note: See TracChangeset
for help on using the changeset viewer.