Changes in / [2a12d8d8:df87acf] in sasmodels
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
r95f62aa rd86f0fc 74 74 75 75 from . import generate 76 from .generate import F32, F6477 76 from .kernel import KernelModel, Kernel 78 77 … … 163 162 Return true if device supports the requested precision. 164 163 """ 165 if dtype == F32:164 if dtype == generate.F32: 166 165 return True 167 166 elif dtype == generate.F64: … … 240 239 """ 241 240 GPU context, with possibly many devices, and one queue per device. 242 243 Because the environment can be reset during a live program (e.g., if the244 user changes the active GPU device in the GUI), everything associated245 with the device context must be cached in the environment and recreated246 if the environment changes. The *cache* attribute is a simple dictionary247 which holds keys and references to objects, such as compiled kernels and248 allocated buffers. The running program should check in the cache for249 long lived objects and create them if they are not there. The program250 should not hold onto cached objects, but instead only keep them active251 for the duration of a function call. When the environment is destroyed252 then the *release* method for each active cache item is called before253 the environment is freed. This means that each cl buffer should be254 in its own cache entry.255 241 """ 256 242 def __init__(self): 257 243 # type: () -> None 258 244 # find gpu context 259 context_list = _create_some_context() 260 261 # Find a context for F32 and for F64 (maybe the same one). 262 # F16 isn't good enough. 263 self.context = {} 264 for dtype in (F32, F64): 265 for context in context_list: 266 if has_type(context.devices[0], dtype): 267 self.context[dtype] = context 268 break 269 else: 270 self.context[dtype] = None 271 272 # Build a queue for each context 273 self.queue = {} 274 context = self.context[F32] 275 self.queue[F32] = cl.CommandQueue(context, context.devices[0]) 276 if self.context[F64] == self.context[F32]: 277 self.queue[F64] = self.queue[F32] 278 else: 279 context = self.context[F64] 280 self.queue[F64] = cl.CommandQueue(context, context.devices[0]) 245 #self.context = cl.create_some_context() 246 247 self.context = None 248 if 'SAS_OPENCL' in os.environ: 249 #Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 250 os.environ["PYOPENCL_CTX"] = os.environ["SAS_OPENCL"] 251 if 'PYOPENCL_CTX' in os.environ: 252 self._create_some_context() 253 254 if not self.context: 255 self.context = _get_default_context() 281 256 282 257 # Byte boundary for data alignment 283 #self.data_boundary = max( context.devices[0].min_data_type_align_size284 # for context in self.context.values())285 286 # Cache for compiled programs, and for items in context258 #self.data_boundary = max(d.min_data_type_align_size 259 # for d in self.context.devices) 260 self.queues = [cl.CommandQueue(context, context.devices[0]) 261 for context in self.context] 287 262 self.compiled = {} 288 self.cache = {}289 263 290 264 def has_type(self, dtype): … … 293 267 Return True if all devices support a given type. 294 268 """ 295 return self.context.get(dtype, None) is not None 269 return any(has_type(d, dtype) 270 for context in self.context 271 for d in context.devices) 272 273 def get_queue(self, dtype): 274 # type: (np.dtype) -> cl.CommandQueue 275 """ 276 Return a command queue for the kernels of type dtype. 277 """ 278 for context, queue in zip(self.context, self.queues): 279 if all(has_type(d, dtype) for d in context.devices): 280 return queue 281 282 def get_context(self, dtype): 283 # type: (np.dtype) -> cl.Context 284 """ 285 Return a OpenCL context for the kernels of type dtype. 286 """ 287 for context in self.context: 288 if all(has_type(d, dtype) for d in context.devices): 289 return context 290 291 def _create_some_context(self): 292 # type: () -> cl.Context 293 """ 294 Protected call to cl.create_some_context without interactivity. Use 295 this if SAS_OPENCL is set in the environment. Sets the *context* 296 attribute. 297 """ 298 try: 299 self.context = [cl.create_some_context(interactive=False)] 300 except Exception as exc: 301 warnings.warn(str(exc)) 302 warnings.warn("pyopencl.create_some_context() failed") 303 warnings.warn("the environment variable 'SAS_OPENCL' might not be set correctly") 296 304 297 305 def compile_program(self, name, source, dtype, fast, timestamp): … … 310 318 del self.compiled[key] 311 319 if key not in self.compiled: 312 context = self. context[dtype]320 context = self.get_context(dtype) 313 321 logging.info("building %s for OpenCL %s", key, 314 322 context.devices[0].name.strip()) 315 program = compile_model(self. context[dtype],323 program = compile_model(self.get_context(dtype), 316 324 str(source), dtype, fast) 317 325 self.compiled[key] = (program, timestamp) 318 326 return program 319 320 def free_buffer(self, key):321 if key in self.cache:322 self.cache[key].release()323 del self.cache[key]324 325 def __del__(self):326 for v in self.cache.values():327 release = getattr(v, 'release', lambda: None)328 release()329 self.cache = {}330 331 _CURRENT_ID = 0332 def unique_id():333 global _CURRENT_ID334 _CURRENT_ID += 1335 return _CURRENT_ID336 337 def _create_some_context():338 # type: () -> cl.Context339 """340 Protected call to cl.create_some_context without interactivity.341 342 Uses SAS_OPENCL or PYOPENCL_CTX if they are set in the environment,343 otherwise scans for the most appropriate device using344 :func:`_get_default_context`345 """346 if 'SAS_OPENCL' in os.environ:347 #Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context348 os.environ["PYOPENCL_CTX"] = os.environ["SAS_OPENCL"]349 350 if 'PYOPENCL_CTX' in os.environ:351 try:352 return [cl.create_some_context(interactive=False)]353 except Exception as exc:354 warnings.warn(str(exc))355 warnings.warn("pyopencl.create_some_context() failed")356 warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly")357 358 return _get_default_context()359 327 360 328 def _get_default_context(): … … 436 404 self.dtype = dtype 437 405 self.fast = fast 438 self. timestamp = generate.ocl_timestamp(self.info)439 self._ cache_key = unique_id()406 self.program = None # delay program creation 407 self._kernels = None 440 408 441 409 def __getstate__(self): … … 446 414 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 447 415 self.info, self.source, self.dtype, self.fast = state 416 self.program = None 448 417 449 418 def make_kernel(self, q_vectors): 450 419 # type: (List[np.ndarray]) -> "GpuKernel" 451 return GpuKernel(self, q_vectors) 452 453 @property 454 def Iq(self): 455 return self._fetch_kernel('Iq') 456 457 def fetch_kernel(self, name): 458 # type: (str) -> cl.Kernel 459 """ 460 Fetch the kernel from the environment by name, compiling it if it 461 does not already exist. 462 """ 463 gpu = environment() 464 key = self._cache_key 465 if key not in gpu.cache: 466 program = gpu.compile_program( 420 if self.program is None: 421 compile_program = environment().compile_program 422 timestamp = generate.ocl_timestamp(self.info) 423 self.program = compile_program( 467 424 self.info.name, 468 425 self.source['opencl'], 469 426 self.dtype, 470 427 self.fast, 471 self.timestamp)428 timestamp) 472 429 variants = ['Iq', 'Iqxy', 'Imagnetic'] 473 430 names = [generate.kernel_name(self.info, k) for k in variants] 474 kernels = [getattr( program, k) for k in names]475 data= dict((k, v) for k, v in zip(variants, kernels))476 # keep a handle to program so GC doesn't collect477 data['program'] = program478 gpu.cache[key] = data431 kernels = [getattr(self.program, k) for k in names] 432 self._kernels = dict((k, v) for k, v in zip(variants, kernels)) 433 is_2d = len(q_vectors) == 2 434 if is_2d: 435 kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']] 479 436 else: 480 data = gpu.cache[key] 481 return data[name] 437 kernel = [self._kernels['Iq']]*2 438 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 439 440 def release(self): 441 # type: () -> None 442 """ 443 Free the resources associated with the model. 444 """ 445 if self.program is not None: 446 self.program = None 447 448 def __del__(self): 449 # type: () -> None 450 self.release() 482 451 483 452 # TODO: check that we don't need a destructor for buffers which go out of scope … … 504 473 # type: (List[np.ndarray], np.dtype) -> None 505 474 # TODO: do we ever need double precision q? 475 env = environment() 506 476 self.nq = q_vectors[0].size 507 477 self.dtype = np.dtype(dtype) … … 523 493 self.q[:self.nq] = q_vectors[0] 524 494 self.global_size = [self.q.shape[0]] 525 self._cache_key = unique_id() 526 527 @property 528 def q_b(self): 529 """Lazy creation of q buffer so it can survive context reset""" 530 env = environment() 531 key = self._cache_key 532 if key not in env.cache: 533 context = env.context[self.dtype] 534 #print("creating inputs of size", self.global_size) 535 buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 536 hostbuf=self.q) 537 env.cache[key] = buffer 538 return env.cache[key] 495 context = env.get_context(self.dtype) 496 #print("creating inputs of size", self.global_size) 497 self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 498 hostbuf=self.q) 539 499 540 500 def release(self): 541 501 # type: () -> None 542 502 """ 543 Free the buffer associated with the q value 544 """ 545 environment().free_buffer(id(self)) 503 Free the memory. 504 """ 505 if self.q_b is not None: 506 self.q_b.release() 507 self.q_b = None 546 508 547 509 def __del__(self): … … 553 515 Callable SAS kernel. 554 516 555 * model* is the GpuModel object to call556 557 The following attributes are defined:558 559 * info* is the module information517 *kernel* is the GpuKernel object to call 518 519 *model_info* is the module information 520 521 *q_vectors* is the q vectors at which the kernel should be evaluated 560 522 561 523 *dtype* is the kernel precision 562 563 *dim* is '1d' or '2d'564 565 *result* is a vector to contain the results of the call566 524 567 525 The resulting call method takes the *pars*, a list of values for … … 573 531 Call :meth:`release` when done with the kernel instance. 574 532 """ 575 def __init__(self, model, q_vectors):533 def __init__(self, kernel, dtype, model_info, q_vectors): 576 534 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 577 dtype = model.dtype 578 self.q_input = GpuInput(q_vectors, dtype) 579 self._model = model 580 self._as_dtype = (np.float32 if dtype == generate.F32 581 else np.float64 if dtype == generate.F64 582 else np.float16 if dtype == generate.F16 583 else np.float32) # will never get here, so use np.float32 584 self._cache_key = unique_id() 585 586 # attributes accessed from the outside 587 self.dim = '2d' if self.q_input.is_2d else '1d' 588 self.info = model.info 589 self.dtype = model.dtype 590 591 # holding place for the returned value 592 # plus one for the normalization values 593 self.result = np.empty(self.q_input.nq+1, dtype) 594 595 @property 596 def _result_b(self): 597 """Lazy creation of result buffer so it can survive context reset""" 535 q_input = GpuInput(q_vectors, dtype) 536 self.kernel = kernel 537 self.info = model_info 538 self.dtype = dtype 539 self.dim = '2d' if q_input.is_2d else '1d' 540 # plus three for the normalization values 541 self.result = np.empty(q_input.nq+1, dtype) 542 543 # Inputs and outputs for each kernel call 544 # Note: res may be shorter than res_b if global_size != nq 598 545 env = environment() 599 key = self._cache_key 600 if key not in env.cache: 601 context = env.context[self.dtype] 602 #print("creating inputs of size", self.global_size) 603 buffer = cl.Buffer(context, mf.READ_WRITE, 604 self.q_input.global_size[0] * self.dtype.itemsize) 605 env.cache[key] = buffer 606 return env.cache[key] 546 self.queue = env.get_queue(dtype) 547 548 self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 549 q_input.global_size[0] * dtype.itemsize) 550 self.q_input = q_input # allocated by GpuInput above 551 552 self._need_release = [self.result_b, self.q_input] 553 self.real = (np.float32 if dtype == generate.F32 554 else np.float64 if dtype == generate.F64 555 else np.float16 if dtype == generate.F16 556 else np.float32) # will never get here, so use np.float32 607 557 608 558 def __call__(self, call_details, values, cutoff, magnetic): 609 559 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 610 env = environment() 611 queue = env.queue[self._model.dtype] 612 context = queue.context 613 614 # Arrange data transfer to/from card 615 q_b = self.q_input.q_b 616 result_b = self._result_b 560 context = self.queue.context 561 # Arrange data transfer to card 617 562 details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 618 563 hostbuf=call_details.buffer) … … 620 565 hostbuf=values) 621 566 622 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 623 kernel = self._model.fetch_kernel(name) 624 kernel_args = [ 567 kernel = self.kernel[1 if magnetic else 0] 568 args = [ 625 569 np.uint32(self.q_input.nq), None, None, 626 details_b, values_b, q_b,result_b,627 self. _as_dtype(cutoff),570 details_b, values_b, self.q_input.q_b, self.result_b, 571 self.real(cutoff), 628 572 ] 629 573 #print("Calling OpenCL") … … 636 580 stop = min(start + step, call_details.num_eval) 637 581 #print("queuing",start,stop) 638 kernel_args[1:3] = [np.int32(start), np.int32(stop)]639 wait_for = [kernel( queue, self.q_input.global_size, None,640 * kernel_args, wait_for=wait_for)]582 args[1:3] = [np.int32(start), np.int32(stop)] 583 wait_for = [kernel(self.queue, self.q_input.global_size, None, 584 *args, wait_for=wait_for)] 641 585 if stop < call_details.num_eval: 642 586 # Allow other processes to run … … 646 590 time.sleep(0.05) 647 591 last_nap = current_time 648 cl.enqueue_copy( queue, self.result, result_b, wait_for=wait_for)592 cl.enqueue_copy(self.queue, self.result, self.result_b) 649 593 #print("result", self.result) 650 594 … … 666 610 Release resources associated with the kernel. 667 611 """ 668 environment().free_buffer(id(self)) 669 self.q_input.release() 612 for v in self._need_release: 613 v.release() 614 self._need_release = [] 670 615 671 616 def __del__(self):
Note: See TracChangeset
for help on using the changeset viewer.