Changes in / [c6084f1:599993b9] in sasmodels
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
rd86f0fc r95f62aa 74 74 75 75 from . import generate 76 from .generate import F32, F64 76 77 from .kernel import KernelModel, Kernel 77 78 … … 162 163 Return true if device supports the requested precision. 163 164 """ 164 if dtype == generate.F32:165 if dtype == F32: 165 166 return True 166 167 elif dtype == generate.F64: … … 239 240 """ 240 241 GPU context, with possibly many devices, and one queue per device. 242 243 Because the environment can be reset during a live program (e.g., if the 244 user changes the active GPU device in the GUI), everything associated 245 with the device context must be cached in the environment and recreated 246 if the environment changes. The *cache* attribute is a simple dictionary 247 which holds keys and references to objects, such as compiled kernels and 248 allocated buffers. The running program should check in the cache for 249 long lived objects and create them if they are not there. The program 250 should not hold onto cached objects, but instead only keep them active 251 for the duration of a function call. When the environment is destroyed 252 then the *release* method for each active cache item is called before 253 the environment is freed. This means that each cl buffer should be 254 in its own cache entry. 241 255 """ 242 256 def __init__(self): 243 257 # type: () -> None 244 258 # find gpu context 245 #self.context = cl.create_some_context() 246 247 self.context = None 248 if 'SAS_OPENCL' in os.environ: 249 #Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 250 os.environ["PYOPENCL_CTX"] = os.environ["SAS_OPENCL"] 251 if 'PYOPENCL_CTX' in os.environ: 252 self._create_some_context() 253 254 if not self.context: 255 self.context = _get_default_context() 259 context_list = _create_some_context() 260 261 # Find a context for F32 and for F64 (maybe the same one). 262 # F16 isn't good enough. 263 self.context = {} 264 for dtype in (F32, F64): 265 for context in context_list: 266 if has_type(context.devices[0], dtype): 267 self.context[dtype] = context 268 break 269 else: 270 self.context[dtype] = None 271 272 # Build a queue for each context 273 self.queue = {} 274 context = self.context[F32] 275 self.queue[F32] = cl.CommandQueue(context, context.devices[0]) 276 if self.context[F64] == self.context[F32]: 277 self.queue[F64] = self.queue[F32] 278 else: 279 context = self.context[F64] 280 self.queue[F64] = cl.CommandQueue(context, context.devices[0]) 256 281 257 282 # Byte boundary for data alignment 258 #self.data_boundary = max( d.min_data_type_align_size259 # for d in self.context.devices)260 self.queues = [cl.CommandQueue(context, context.devices[0]) 261 for context in self.context]283 #self.data_boundary = max(context.devices[0].min_data_type_align_size 284 # for context in self.context.values()) 285 286 # Cache for compiled programs, and for items in context 262 287 self.compiled = {} 288 self.cache = {} 263 289 264 290 def has_type(self, dtype): … … 267 293 Return True if all devices support a given type. 268 294 """ 269 return any(has_type(d, dtype) 270 for context in self.context 271 for d in context.devices) 272 273 def get_queue(self, dtype): 274 # type: (np.dtype) -> cl.CommandQueue 275 """ 276 Return a command queue for the kernels of type dtype. 277 """ 278 for context, queue in zip(self.context, self.queues): 279 if all(has_type(d, dtype) for d in context.devices): 280 return queue 281 282 def get_context(self, dtype): 283 # type: (np.dtype) -> cl.Context 284 """ 285 Return a OpenCL context for the kernels of type dtype. 286 """ 287 for context in self.context: 288 if all(has_type(d, dtype) for d in context.devices): 289 return context 290 291 def _create_some_context(self): 292 # type: () -> cl.Context 293 """ 294 Protected call to cl.create_some_context without interactivity. Use 295 this if SAS_OPENCL is set in the environment. Sets the *context* 296 attribute. 297 """ 298 try: 299 self.context = [cl.create_some_context(interactive=False)] 300 except Exception as exc: 301 warnings.warn(str(exc)) 302 warnings.warn("pyopencl.create_some_context() failed") 303 warnings.warn("the environment variable 'SAS_OPENCL' might not be set correctly") 295 return self.context.get(dtype, None) is not None 304 296 305 297 def compile_program(self, name, source, dtype, fast, timestamp): … … 318 310 del self.compiled[key] 319 311 if key not in self.compiled: 320 context = self. get_context(dtype)312 context = self.context[dtype] 321 313 logging.info("building %s for OpenCL %s", key, 322 314 context.devices[0].name.strip()) 323 program = compile_model(self. get_context(dtype),315 program = compile_model(self.context[dtype], 324 316 str(source), dtype, fast) 325 317 self.compiled[key] = (program, timestamp) 326 318 return program 319 320 def free_buffer(self, key): 321 if key in self.cache: 322 self.cache[key].release() 323 del self.cache[key] 324 325 def __del__(self): 326 for v in self.cache.values(): 327 release = getattr(v, 'release', lambda: None) 328 release() 329 self.cache = {} 330 331 _CURRENT_ID = 0 332 def unique_id(): 333 global _CURRENT_ID 334 _CURRENT_ID += 1 335 return _CURRENT_ID 336 337 def _create_some_context(): 338 # type: () -> cl.Context 339 """ 340 Protected call to cl.create_some_context without interactivity. 341 342 Uses SAS_OPENCL or PYOPENCL_CTX if they are set in the environment, 343 otherwise scans for the most appropriate device using 344 :func:`_get_default_context` 345 """ 346 if 'SAS_OPENCL' in os.environ: 347 #Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 348 os.environ["PYOPENCL_CTX"] = os.environ["SAS_OPENCL"] 349 350 if 'PYOPENCL_CTX' in os.environ: 351 try: 352 return [cl.create_some_context(interactive=False)] 353 except Exception as exc: 354 warnings.warn(str(exc)) 355 warnings.warn("pyopencl.create_some_context() failed") 356 warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly") 357 358 return _get_default_context() 327 359 328 360 def _get_default_context(): … … 404 436 self.dtype = dtype 405 437 self.fast = fast 406 self. program = None # delay program creation407 self._ kernels = None438 self.timestamp = generate.ocl_timestamp(self.info) 439 self._cache_key = unique_id() 408 440 409 441 def __getstate__(self): … … 414 446 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 415 447 self.info, self.source, self.dtype, self.fast = state 416 self.program = None417 448 418 449 def make_kernel(self, q_vectors): 419 450 # type: (List[np.ndarray]) -> "GpuKernel" 420 if self.program is None: 421 compile_program = environment().compile_program 422 timestamp = generate.ocl_timestamp(self.info) 423 self.program = compile_program( 451 return GpuKernel(self, q_vectors) 452 453 @property 454 def Iq(self): 455 return self._fetch_kernel('Iq') 456 457 def fetch_kernel(self, name): 458 # type: (str) -> cl.Kernel 459 """ 460 Fetch the kernel from the environment by name, compiling it if it 461 does not already exist. 462 """ 463 gpu = environment() 464 key = self._cache_key 465 if key not in gpu.cache: 466 program = gpu.compile_program( 424 467 self.info.name, 425 468 self.source['opencl'], 426 469 self.dtype, 427 470 self.fast, 428 timestamp)471 self.timestamp) 429 472 variants = ['Iq', 'Iqxy', 'Imagnetic'] 430 473 names = [generate.kernel_name(self.info, k) for k in variants] 431 kernels = [getattr( self.program, k) for k in names]432 self._kernels= dict((k, v) for k, v in zip(variants, kernels))433 is_2d = len(q_vectors) == 2434 if is_2d:435 kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']]474 kernels = [getattr(program, k) for k in names] 475 data = dict((k, v) for k, v in zip(variants, kernels)) 476 # keep a handle to program so GC doesn't collect 477 data['program'] = program 478 gpu.cache[key] = data 436 479 else: 437 kernel = [self._kernels['Iq']]*2 438 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 439 440 def release(self): 441 # type: () -> None 442 """ 443 Free the resources associated with the model. 444 """ 445 if self.program is not None: 446 self.program = None 447 448 def __del__(self): 449 # type: () -> None 450 self.release() 480 data = gpu.cache[key] 481 return data[name] 451 482 452 483 # TODO: check that we don't need a destructor for buffers which go out of scope … … 473 504 # type: (List[np.ndarray], np.dtype) -> None 474 505 # TODO: do we ever need double precision q? 475 env = environment()476 506 self.nq = q_vectors[0].size 477 507 self.dtype = np.dtype(dtype) … … 493 523 self.q[:self.nq] = q_vectors[0] 494 524 self.global_size = [self.q.shape[0]] 495 context = env.get_context(self.dtype) 496 #print("creating inputs of size", self.global_size) 497 self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 498 hostbuf=self.q) 525 self._cache_key = unique_id() 526 527 @property 528 def q_b(self): 529 """Lazy creation of q buffer so it can survive context reset""" 530 env = environment() 531 key = self._cache_key 532 if key not in env.cache: 533 context = env.context[self.dtype] 534 #print("creating inputs of size", self.global_size) 535 buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 536 hostbuf=self.q) 537 env.cache[key] = buffer 538 return env.cache[key] 499 539 500 540 def release(self): 501 541 # type: () -> None 502 542 """ 503 Free the memory. 504 """ 505 if self.q_b is not None: 506 self.q_b.release() 507 self.q_b = None 543 Free the buffer associated with the q value 544 """ 545 environment().free_buffer(id(self)) 508 546 509 547 def __del__(self): … … 515 553 Callable SAS kernel. 516 554 517 * kernel* is the GpuKernel object to call518 519 *model_info* is the module information520 521 * q_vectors* is the q vectors at which the kernel should be evaluated555 *model* is the GpuModel object to call 556 557 The following attributes are defined: 558 559 *info* is the module information 522 560 523 561 *dtype* is the kernel precision 562 563 *dim* is '1d' or '2d' 564 565 *result* is a vector to contain the results of the call 524 566 525 567 The resulting call method takes the *pars*, a list of values for … … 531 573 Call :meth:`release` when done with the kernel instance. 532 574 """ 533 def __init__(self, kernel, dtype, model_info, q_vectors):575 def __init__(self, model, q_vectors): 534 576 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 535 q_input = GpuInput(q_vectors, dtype) 536 self.kernel = kernel 537 self.info = model_info 538 self.dtype = dtype 539 self.dim = '2d' if q_input.is_2d else '1d' 540 # plus three for the normalization values 541 self.result = np.empty(q_input.nq+1, dtype) 542 543 # Inputs and outputs for each kernel call 544 # Note: res may be shorter than res_b if global_size != nq 577 dtype = model.dtype 578 self.q_input = GpuInput(q_vectors, dtype) 579 self._model = model 580 self._as_dtype = (np.float32 if dtype == generate.F32 581 else np.float64 if dtype == generate.F64 582 else np.float16 if dtype == generate.F16 583 else np.float32) # will never get here, so use np.float32 584 self._cache_key = unique_id() 585 586 # attributes accessed from the outside 587 self.dim = '2d' if self.q_input.is_2d else '1d' 588 self.info = model.info 589 self.dtype = model.dtype 590 591 # holding place for the returned value 592 # plus one for the normalization values 593 self.result = np.empty(self.q_input.nq+1, dtype) 594 595 @property 596 def _result_b(self): 597 """Lazy creation of result buffer so it can survive context reset""" 545 598 env = environment() 546 self.queue = env.get_queue(dtype) 547 548 self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 549 q_input.global_size[0] * dtype.itemsize) 550 self.q_input = q_input # allocated by GpuInput above 551 552 self._need_release = [self.result_b, self.q_input] 553 self.real = (np.float32 if dtype == generate.F32 554 else np.float64 if dtype == generate.F64 555 else np.float16 if dtype == generate.F16 556 else np.float32) # will never get here, so use np.float32 599 key = self._cache_key 600 if key not in env.cache: 601 context = env.context[self.dtype] 602 #print("creating inputs of size", self.global_size) 603 buffer = cl.Buffer(context, mf.READ_WRITE, 604 self.q_input.global_size[0] * self.dtype.itemsize) 605 env.cache[key] = buffer 606 return env.cache[key] 557 607 558 608 def __call__(self, call_details, values, cutoff, magnetic): 559 609 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 560 context = self.queue.context 561 # Arrange data transfer to card 610 env = environment() 611 queue = env.queue[self._model.dtype] 612 context = queue.context 613 614 # Arrange data transfer to/from card 615 q_b = self.q_input.q_b 616 result_b = self._result_b 562 617 details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 563 618 hostbuf=call_details.buffer) … … 565 620 hostbuf=values) 566 621 567 kernel = self.kernel[1 if magnetic else 0] 568 args = [ 622 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 623 kernel = self._model.fetch_kernel(name) 624 kernel_args = [ 569 625 np.uint32(self.q_input.nq), None, None, 570 details_b, values_b, self.q_input.q_b, self.result_b,571 self. real(cutoff),626 details_b, values_b, q_b, result_b, 627 self._as_dtype(cutoff), 572 628 ] 573 629 #print("Calling OpenCL") … … 580 636 stop = min(start + step, call_details.num_eval) 581 637 #print("queuing",start,stop) 582 args[1:3] = [np.int32(start), np.int32(stop)]583 wait_for = [kernel( self.queue, self.q_input.global_size, None,584 * args, wait_for=wait_for)]638 kernel_args[1:3] = [np.int32(start), np.int32(stop)] 639 wait_for = [kernel(queue, self.q_input.global_size, None, 640 *kernel_args, wait_for=wait_for)] 585 641 if stop < call_details.num_eval: 586 642 # Allow other processes to run … … 590 646 time.sleep(0.05) 591 647 last_nap = current_time 592 cl.enqueue_copy( self.queue, self.result, self.result_b)648 cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for) 593 649 #print("result", self.result) 594 650 … … 610 666 Release resources associated with the kernel. 611 667 """ 612 for v in self._need_release: 613 v.release() 614 self._need_release = [] 668 environment().free_buffer(id(self)) 669 self.q_input.release() 615 670 616 671 def __del__(self):
Note: See TracChangeset
for help on using the changeset viewer.