Changeset d5ce7fa in sasmodels
- Timestamp:
- Oct 25, 2018 2:20:45 PM (6 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- c11d09f
- Parents:
- 8b31efa (diff), 2a12d8d8 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - Files:
-
- 1 added
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
doc/guide/magnetism/magnetism.rst
rbefe905 rdf87acf 89 89 90 90 =========== ================================================================ 91 M0:sld$D_M M_0$92 mtheta:sld$\theta_M$93 mphi:sld$\phi_M$94 up :angle $\theta_\mathrm{up}$95 up :frac_i $u_i$ = (spin up)/(spin up + spin down) *before* the sample96 up :frac_f $u_f$ = (spin up)/(spin up + spin down) *after* the sample91 sld_M0 $D_M M_0$ 92 sld_mtheta $\theta_M$ 93 sld_mphi $\phi_M$ 94 up_frac_i $u_i$ = (spin up)/(spin up + spin down) *before* the sample 95 up_frac_f $u_f$ = (spin up)/(spin up + spin down) *after* the sample 96 up_angle $\theta_\mathrm{up}$ 97 97 =========== ================================================================ 98 98 99 99 .. note:: 100 The values of the 'up :frac_i' and 'up:frac_f' must be in the range 0 to 1.100 The values of the 'up_frac_i' and 'up_frac_f' must be in the range 0 to 1. 101 101 102 102 *Document History* -
sasmodels/kernelcl.py
r8b31efa rd5ce7fa 76 76 77 77 from . import generate 78 from .generate import F32, F64 78 79 from .kernel import KernelModel, Kernel 79 80 … … 165 166 Return true if device supports the requested precision. 166 167 """ 167 if dtype == generate.F32:168 if dtype == F32: 168 169 return True 169 170 elif dtype == generate.F64: … … 219 220 """ 220 221 GPU context, with possibly many devices, and one queue per device. 222 223 Because the environment can be reset during a live program (e.g., if the 224 user changes the active GPU device in the GUI), everything associated 225 with the device context must be cached in the environment and recreated 226 if the environment changes. The *cache* attribute is a simple dictionary 227 which holds keys and references to objects, such as compiled kernels and 228 allocated buffers. The running program should check in the cache for 229 long lived objects and create them if they are not there. The program 230 should not hold onto cached objects, but instead only keep them active 231 for the duration of a function call. When the environment is destroyed 232 then the *release* method for each active cache item is called before 233 the environment is freed. This means that each cl buffer should be 234 in its own cache entry. 221 235 """ 222 236 def __init__(self): 223 237 # type: () -> None 224 238 # find gpu context 225 #self.context = cl.create_some_context() 226 227 self.context = None 228 if 'SAS_OPENCL' in os.environ: 229 # Set the PyOpenCL environment variable PYOPENCL_CTX 230 # from SAS_OPENCL=driver:device. Ignore the generic 231 # SAS_OPENCL=opencl, which is used to select the default 232 # OpenCL device. Don't need to check for "none" or 233 # "cuda" since use_opencl() would return False if they 234 # were defined, and we wouldn't get here. 235 dev_str = os.environ["SAS_OPENCL"] 236 if dev_str and dev_str.lower() != "opencl": 237 os.environ["PYOPENCL_CTX"] = dev_str 238 239 if 'PYOPENCL_CTX' in os.environ: 240 self._create_some_context() 241 242 if not self.context: 243 self.context = _get_default_context() 239 context_list = _create_some_context() 240 241 # Find a context for F32 and for F64 (maybe the same one). 242 # F16 isn't good enough. 243 self.context = {} 244 for dtype in (F32, F64): 245 for context in context_list: 246 if has_type(context.devices[0], dtype): 247 self.context[dtype] = context 248 break 249 else: 250 self.context[dtype] = None 251 252 # Build a queue for each context 253 self.queue = {} 254 context = self.context[F32] 255 self.queue[F32] = cl.CommandQueue(context, context.devices[0]) 256 if self.context[F64] == self.context[F32]: 257 self.queue[F64] = self.queue[F32] 258 else: 259 context = self.context[F64] 260 self.queue[F64] = cl.CommandQueue(context, context.devices[0]) 244 261 245 262 # Byte boundary for data alignment 246 #self.data_boundary = max( d.min_data_type_align_size247 # for d in self.context.devices)248 self.queues = [cl.CommandQueue(context, context.devices[0]) 249 for context in self.context]263 #self.data_boundary = max(context.devices[0].min_data_type_align_size 264 # for context in self.context.values()) 265 266 # Cache for compiled programs, and for items in context 250 267 self.compiled = {} 268 self.cache = {} 251 269 252 270 def has_type(self, dtype): … … 255 273 Return True if all devices support a given type. 256 274 """ 257 return any(has_type(d, dtype) 258 for context in self.context 259 for d in context.devices) 260 261 def get_queue(self, dtype): 262 # type: (np.dtype) -> cl.CommandQueue 263 """ 264 Return a command queue for the kernels of type dtype. 265 """ 266 for context, queue in zip(self.context, self.queues): 267 if all(has_type(d, dtype) for d in context.devices): 268 return queue 269 270 def get_context(self, dtype): 271 # type: (np.dtype) -> cl.Context 272 """ 273 Return a OpenCL context for the kernels of type dtype. 274 """ 275 for context in self.context: 276 if all(has_type(d, dtype) for d in context.devices): 277 return context 278 279 def _create_some_context(self): 280 # type: () -> cl.Context 281 """ 282 Protected call to cl.create_some_context without interactivity. Use 283 this if SAS_OPENCL is set in the environment. Sets the *context* 284 attribute. 285 """ 286 try: 287 self.context = [cl.create_some_context(interactive=False)] 288 except Exception as exc: 289 warnings.warn(str(exc)) 290 warnings.warn("pyopencl.create_some_context() failed") 291 warnings.warn("the environment variable 'SAS_OPENCL' might not be set correctly") 275 return self.context.get(dtype, None) is not None 292 276 293 277 def compile_program(self, name, source, dtype, fast, timestamp): … … 306 290 del self.compiled[key] 307 291 if key not in self.compiled: 308 context = self. get_context(dtype)292 context = self.context[dtype] 309 293 logging.info("building %s for OpenCL %s", key, 310 294 context.devices[0].name.strip()) 311 program = compile_model(self. get_context(dtype),295 program = compile_model(self.context[dtype], 312 296 str(source), dtype, fast) 313 297 self.compiled[key] = (program, timestamp) 314 298 return program 299 300 def free_buffer(self, key): 301 if key in self.cache: 302 self.cache[key].release() 303 del self.cache[key] 304 305 def __del__(self): 306 for v in self.cache.values(): 307 release = getattr(v, 'release', lambda: None) 308 release() 309 self.cache = {} 310 311 _CURRENT_ID = 0 312 def unique_id(): 313 global _CURRENT_ID 314 _CURRENT_ID += 1 315 return _CURRENT_ID 316 317 def _create_some_context(): 318 # type: () -> cl.Context 319 """ 320 Protected call to cl.create_some_context without interactivity. 321 322 Uses SAS_OPENCL or PYOPENCL_CTX if they are set in the environment, 323 otherwise scans for the most appropriate device using 324 :func:`_get_default_context`. Ignore *SAS_OPENCL=OpenCL*, which 325 indicates that an OpenCL device should be used without specifying 326 which one (and not a CUDA device, or no GPU). 327 """ 328 dev_str = os.environ["SAS_OPENCL"] 329 if dev_str and dev_str.lower() != "opencl": 330 #Setting PYOPENCL_CTX as a SAS_OPENCL to create cl context 331 os.environ["PYOPENCL_CTX"] = dev_str 332 333 if 'PYOPENCL_CTX' in os.environ: 334 try: 335 return [cl.create_some_context(interactive=False)] 336 except Exception as exc: 337 warnings.warn(str(exc)) 338 warnings.warn("pyopencl.create_some_context() failed") 339 warnings.warn("the environment variable 'SAS_OPENCL' or 'PYOPENCL_CTX' might not be set correctly") 340 341 return _get_default_context() 315 342 316 343 def _get_default_context(): … … 392 419 self.dtype = dtype 393 420 self.fast = fast 394 self. program = None # delay program creation395 self._ kernels = None421 self.timestamp = generate.ocl_timestamp(self.info) 422 self._cache_key = unique_id() 396 423 397 424 def __getstate__(self): … … 402 429 # type: (Tuple[ModelInfo, str, np.dtype, bool]) -> None 403 430 self.info, self.source, self.dtype, self.fast = state 404 self.program = None405 431 406 432 def make_kernel(self, q_vectors): 407 433 # type: (List[np.ndarray]) -> "GpuKernel" 408 if self.program is None: 409 compile_program = environment().compile_program 410 timestamp = generate.ocl_timestamp(self.info) 411 self.program = compile_program( 434 return GpuKernel(self, q_vectors) 435 436 @property 437 def Iq(self): 438 return self._fetch_kernel('Iq') 439 440 def fetch_kernel(self, name): 441 # type: (str) -> cl.Kernel 442 """ 443 Fetch the kernel from the environment by name, compiling it if it 444 does not already exist. 445 """ 446 gpu = environment() 447 key = self._cache_key 448 if key not in gpu.cache: 449 program = gpu.compile_program( 412 450 self.info.name, 413 451 self.source['opencl'], 414 452 self.dtype, 415 453 self.fast, 416 timestamp)454 self.timestamp) 417 455 variants = ['Iq', 'Iqxy', 'Imagnetic'] 418 456 names = [generate.kernel_name(self.info, k) for k in variants] 419 kernels = [getattr( self.program, k) for k in names]420 self._kernels= dict((k, v) for k, v in zip(variants, kernels))421 is_2d = len(q_vectors) == 2422 if is_2d:423 kernel = [self._kernels['Iqxy'], self._kernels['Imagnetic']]457 kernels = [getattr(program, k) for k in names] 458 data = dict((k, v) for k, v in zip(variants, kernels)) 459 # keep a handle to program so GC doesn't collect 460 data['program'] = program 461 gpu.cache[key] = data 424 462 else: 425 kernel = [self._kernels['Iq']]*2 426 return GpuKernel(kernel, self.dtype, self.info, q_vectors) 427 428 def release(self): 429 # type: () -> None 430 """ 431 Free the resources associated with the model. 432 """ 433 if self.program is not None: 434 self.program = None 435 436 def __del__(self): 437 # type: () -> None 438 self.release() 463 data = gpu.cache[key] 464 return data[name] 439 465 440 466 # TODO: check that we don't need a destructor for buffers which go out of scope … … 461 487 # type: (List[np.ndarray], np.dtype) -> None 462 488 # TODO: do we ever need double precision q? 463 env = environment()464 489 self.nq = q_vectors[0].size 465 490 self.dtype = np.dtype(dtype) … … 481 506 self.q[:self.nq] = q_vectors[0] 482 507 self.global_size = [self.q.shape[0]] 483 context = env.get_context(self.dtype) 484 #print("creating inputs of size", self.global_size) 485 self.q_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 486 hostbuf=self.q) 508 self._cache_key = unique_id() 509 510 @property 511 def q_b(self): 512 """Lazy creation of q buffer so it can survive context reset""" 513 env = environment() 514 key = self._cache_key 515 if key not in env.cache: 516 context = env.context[self.dtype] 517 #print("creating inputs of size", self.global_size) 518 buffer = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 519 hostbuf=self.q) 520 env.cache[key] = buffer 521 return env.cache[key] 487 522 488 523 def release(self): 489 524 # type: () -> None 490 525 """ 491 Free the memory. 492 """ 493 if self.q_b is not None: 494 self.q_b.release() 495 self.q_b = None 526 Free the buffer associated with the q value 527 """ 528 environment().free_buffer(id(self)) 496 529 497 530 def __del__(self): … … 503 536 Callable SAS kernel. 504 537 505 * kernel* is the GpuKernel object to call506 507 *model_info* is the module information508 509 * q_vectors* is the q vectors at which the kernel should be evaluated538 *model* is the GpuModel object to call 539 540 The following attributes are defined: 541 542 *info* is the module information 510 543 511 544 *dtype* is the kernel precision 545 546 *dim* is '1d' or '2d' 547 548 *result* is a vector to contain the results of the call 512 549 513 550 The resulting call method takes the *pars*, a list of values for … … 519 556 Call :meth:`release` when done with the kernel instance. 520 557 """ 521 def __init__(self, kernel, dtype, model_info, q_vectors):558 def __init__(self, model, q_vectors): 522 559 # type: (cl.Kernel, np.dtype, ModelInfo, List[np.ndarray]) -> None 523 q_input = GpuInput(q_vectors, dtype) 524 self.kernel = kernel 525 self.info = model_info 526 self.dtype = dtype 527 self.dim = '2d' if q_input.is_2d else '1d' 528 # plus three for the normalization values 529 self.result = np.empty(q_input.nq+1, dtype) 530 531 # Inputs and outputs for each kernel call 532 # Note: res may be shorter than res_b if global_size != nq 560 dtype = model.dtype 561 self.q_input = GpuInput(q_vectors, dtype) 562 self._model = model 563 self._as_dtype = (np.float32 if dtype == generate.F32 564 else np.float64 if dtype == generate.F64 565 else np.float16 if dtype == generate.F16 566 else np.float32) # will never get here, so use np.float32 567 self._cache_key = unique_id() 568 569 # attributes accessed from the outside 570 self.dim = '2d' if self.q_input.is_2d else '1d' 571 self.info = model.info 572 self.dtype = model.dtype 573 574 # holding place for the returned value 575 # plus one for the normalization values 576 self.result = np.empty(self.q_input.nq+1, dtype) 577 578 @property 579 def _result_b(self): 580 """Lazy creation of result buffer so it can survive context reset""" 533 581 env = environment() 534 self.queue = env.get_queue(dtype) 535 536 self.result_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 537 q_input.global_size[0] * dtype.itemsize) 538 self.q_input = q_input # allocated by GpuInput above 539 540 self._need_release = [self.result_b, self.q_input] 541 self.real = (np.float32 if dtype == generate.F32 542 else np.float64 if dtype == generate.F64 543 else np.float16 if dtype == generate.F16 544 else np.float32) # will never get here, so use np.float32 582 key = self._cache_key 583 if key not in env.cache: 584 context = env.context[self.dtype] 585 #print("creating inputs of size", self.global_size) 586 buffer = cl.Buffer(context, mf.READ_WRITE, 587 self.q_input.global_size[0] * self.dtype.itemsize) 588 env.cache[key] = buffer 589 return env.cache[key] 545 590 546 591 def __call__(self, call_details, values, cutoff, magnetic): 547 592 # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> np.ndarray 548 context = self.queue.context 549 # Arrange data transfer to card 593 env = environment() 594 queue = env.queue[self._model.dtype] 595 context = queue.context 596 597 # Arrange data transfer to/from card 598 q_b = self.q_input.q_b 599 result_b = self._result_b 550 600 details_b = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, 551 601 hostbuf=call_details.buffer) … … 553 603 hostbuf=values) 554 604 555 kernel = self.kernel[1 if magnetic else 0] 556 args = [ 605 name = 'Iq' if self.dim == '1d' else 'Imagnetic' if magnetic else 'Iqxy' 606 kernel = self._model.fetch_kernel(name) 607 kernel_args = [ 557 608 np.uint32(self.q_input.nq), None, None, 558 details_b, values_b, self.q_input.q_b, self.result_b,559 self. real(cutoff),609 details_b, values_b, q_b, result_b, 610 self._as_dtype(cutoff), 560 611 ] 561 612 #print("Calling OpenCL") … … 568 619 stop = min(start + step, call_details.num_eval) 569 620 #print("queuing",start,stop) 570 args[1:3] = [np.int32(start), np.int32(stop)]571 wait_for = [kernel( self.queue, self.q_input.global_size, None,572 * args, wait_for=wait_for)]621 kernel_args[1:3] = [np.int32(start), np.int32(stop)] 622 wait_for = [kernel(queue, self.q_input.global_size, None, 623 *kernel_args, wait_for=wait_for)] 573 624 if stop < call_details.num_eval: 574 625 # Allow other processes to run … … 578 629 time.sleep(0.001) 579 630 last_nap = current_time 580 cl.enqueue_copy( self.queue, self.result, self.result_b)631 cl.enqueue_copy(queue, self.result, result_b, wait_for=wait_for) 581 632 #print("result", self.result) 582 633 … … 598 649 Release resources associated with the kernel. 599 650 """ 600 for v in self._need_release: 601 v.release() 602 self._need_release = [] 651 environment().free_buffer(id(self)) 652 self.q_input.release() 603 653 604 654 def __del__(self): -
sasmodels/models/spinodal.py
r475ff58 r93fe8a1 12 12 where $x=q/q_0$, $q_0$ is the peak position, $I_{max}$ is the intensity 13 13 at $q_0$ (parameterised as the $scale$ parameter), and $B$ is a flat 14 background. The spinodal wavelength is given by $2\pi/q_0$. 14 background. The spinodal wavelength, $\Lambda$, is given by $2\pi/q_0$. 15 16 The definition of $I_{max}$ in the literature varies. Hashimoto *et al* (1991) 17 define it as 18 19 .. math:: 20 I_{max} = \Lambda^3\Delta\rho^2 21 22 whereas Meier & Strobl (1987) give 23 24 .. math:: 25 I_{max} = V_z\Delta\rho^2 26 27 where $V_z$ is the volume per monomer unit. 15 28 16 29 The exponent $\gamma$ is equal to $d+1$ for off-critical concentration … … 28 41 29 42 H. Furukawa. Dynamics-scaling theory for phase-separating unmixing mixtures: 30 Growth rates of droplets and scaling properties of autocorrelation functions. 31 Physica A 123,497 (1984). 43 Growth rates of droplets and scaling properties of autocorrelation functions. 44 Physica A 123, 497 (1984). 45 46 H. Meier & G. Strobl. Small-Angle X-ray Scattering Study of Spinodal 47 Decomposition in Polystyrene/Poly(styrene-co-bromostyrene) Blends. 48 Macromolecules 20, 649-654 (1987). 49 50 T. Hashimoto, M. Takenaka & H. Jinnai. Scattering Studies of Self-Assembling 51 Processes of Polymer Blends in Spinodal Decomposition. 52 J. Appl. Cryst. 24, 457-466 (1991). 32 53 33 54 Revision History … … 35 56 36 57 * **Author:** Dirk Honecker **Date:** Oct 7, 2016 37 * **Revised:** Steve King **Date:** Sep 7, 201858 * **Revised:** Steve King **Date:** Oct 25, 2018 38 59 """ 39 60 -
setup.py
r1f991d6 r783e76f 29 29 return version[1:-1] 30 30 raise RuntimeError("Could not read version from %s/__init__.py"%package) 31 32 install_requires = ['numpy', 'scipy'] 33 34 if sys.platform=='win32' or sys.platform=='cygwin': 35 install_requires.append('tinycc') 31 36 32 37 setup( … … 61 66 'sasmodels': ['*.c', '*.cl'], 62 67 }, 63 install_requires=[ 64 ], 68 install_requires=install_requires, 65 69 extras_require={ 70 'full': ['docutils', 'bumps', 'matplotlib'], 71 'server': ['bumps'], 66 72 'OpenCL': ["pyopencl"], 67 'Bumps': ["bumps"],68 'TinyCC': ["tinycc"],69 73 }, 70 74 build_requires=['setuptools'], -
doc/guide/gpu_setup.rst
r63602b1 r8b31efa 94 94 Device Selection 95 95 ================ 96 **OpenCL drivers** 97 96 98 If you have multiple GPU devices you can tell the program which device to use. 97 99 By default, the program looks for one GPU and one CPU device from available … … 104 106 was used to run the model. 105 107 106 **If you don't want to use OpenCL, you can set** *SAS_OPENCL=None* 107 **in your environment settings, and it will only use normal programs.** 108 109 If you want to use one of the other devices, you can run the following 108 If you want to use a specific driver and devices, you can run the following 110 109 from the python console:: 111 110 … … 115 114 This will provide a menu of different OpenCL drivers available. 116 115 When one is selected, it will say "set PYOPENCL_CTX=..." 117 Use that value as the value of *SAS_OPENCL*. 116 Use that value as the value of *SAS_OPENCL=driver:device*. 117 118 To use the default OpenCL device (rather than CUDA or None), 119 set *SAS_OPENCL=opencl*. 120 121 In batch queues, you may need to set *XDG_CACHE_HOME=~/.cache* 122 (Linux only) to a different directory, depending on how the filesystem 123 is configured. You should also set *SAS_DLL_PATH* for CPU-only modules. 124 125 -DSAS_MODELPATH=path sets directory containing custom models 126 -DSAS_OPENCL=vendor:device|cuda:device|none sets the target GPU device 127 -DXDG_CACHE_HOME=~/.cache sets the pyopencl cache root (linux only) 128 -DSAS_COMPILER=tinycc|msvc|mingw|unix sets the DLL compiler 129 -DSAS_OPENMP=1 turns on OpenMP for the DLLs 130 -DSAS_DLL_PATH=path sets the path to the compiled modules 131 132 133 **CUDA drivers** 134 135 If OpenCL drivers are not available on your system, but NVidia CUDA 136 drivers are available, then set *SAS_OPENCL=cuda* or 137 *SAS_OPENCL=cuda:n* for a particular device number *n*. If no device 138 number is specified, then the CUDA drivers looks for look for 139 *CUDA_DEVICE=n* or a file ~/.cuda-device containing n for the device number. 140 141 In batch queues, the SLURM command *sbatch --gres=gpu:1 ...* will set 142 *CUDA_VISIBLE_DEVICES=n*, which ought to set the correct device 143 number for *SAS_OPENCL=cuda*. If not, then set 144 *CUDA_DEVICE=$CUDA_VISIBLE_DEVICES* within the batch script. You may 145 need to set the CUDA cache directory to a folder accessible across the 146 cluster with *PYCUDA_CACHE_DIR* (or *PYCUDA_DISABLE_CACHE* to disable 147 caching), and you may need to set environment specific compiler flags 148 with *PYCUDA_DEFAULT_NVCC_FLAGS*. You should also set *SAS_DLL_PATH* 149 for CPU-only modules. 150 151 **No GPU support** 152 153 If you don't want to use OpenCL or CUDA, you can set *SAS_OPENCL=None* 154 in your environment settings, and it will only use normal programs. 155 156 In batch queues, you may need to set *SAS_DLL_PATH* to a directory 157 accessible on the compute node. 158 118 159 119 160 Device Testing … … 154 195 *Document History* 155 196 156 | 201 7-09-27Paul Kienzle197 | 2018-10-15 Paul Kienzle -
sasmodels/compare.py
r610ef23 r4de14584 41 41 from . import kerneldll 42 42 from . import kernelcl 43 from . import kernelcuda 43 44 from .data import plot_theory, empty_data1D, empty_data2D, load_data 44 45 from .direct_model import DirectModel, get_mesh … … 115 116 === environment variables === 116 117 -DSAS_MODELPATH=path sets directory containing custom models 117 -DSAS_OPENCL=vendor:device| none sets the target OpenCLdevice118 -DSAS_OPENCL=vendor:device|cuda:device|none sets the target GPU device 118 119 -DXDG_CACHE_HOME=~/.cache sets the pyopencl cache root (linux only) 119 120 -DSAS_COMPILER=tinycc|msvc|mingw|unix sets the DLL compiler … … 725 726 set_integration_size(model_info, ngauss) 726 727 727 if dtype != "default" and not dtype.endswith('!') and not kernelcl.use_opencl(): 728 if (dtype != "default" and not dtype.endswith('!') 729 and not (kernelcl.use_opencl() or kernelcuda.use_cuda())): 728 730 raise RuntimeError("OpenCL not available " + kernelcl.OPENCL_ERROR) 729 731 -
sasmodels/core.py
r2dcd6e7 rb0de252 21 21 from . import mixture 22 22 from . import kernelpy 23 from . import kernelcuda 23 24 from . import kernelcl 24 25 from . import kerneldll … … 210 211 #print("building dll", numpy_dtype) 211 212 return kerneldll.load_dll(source['dll'], model_info, numpy_dtype) 213 elif platform == "cuda": 214 return kernelcuda.GpuModel(source, model_info, numpy_dtype, fast=fast) 212 215 else: 213 216 #print("building ocl", numpy_dtype) … … 245 248 # type: (ModelInfo, str, str) -> (np.dtype, bool, str) 246 249 """ 247 Interpret dtype string, returning np.dtype and fast flag.250 Interpret dtype string, returning np.dtype, fast flag and platform. 248 251 249 252 Possible types include 'half', 'single', 'double' and 'quad'. If the … … 253 256 default for the model and platform. 254 257 255 Platform preference can be specfied ("ocl" vs "dll"), with the default 256 being OpenCL if it is availabe. If the dtype name ends with '!' then 257 platform is forced to be DLL rather than OpenCL. 258 Platform preference can be specfied ("ocl", "cuda", "dll"), with the 259 default being OpenCL or CUDA if available, otherwise DLL. If the dtype 260 name ends with '!' then platform is forced to be DLL rather than GPU. 261 The default platform is set by the environment variable SAS_OPENCL, 262 SAS_OPENCL=driver:device for OpenCL, SAS_OPENCL=cuda:device for CUDA 263 or SAS_OPENCL=none for DLL. 258 264 259 265 This routine ignores the preferences within the model definition. This … … 268 274 if platform is None: 269 275 platform = "ocl" 270 if not kernelcl.use_opencl() or not model_info.opencl:271 platform = "dll"272 276 273 277 # Check if type indicates dll regardless of which platform is given … … 275 279 platform = "dll" 276 280 dtype = dtype[:-1] 281 282 # Make sure model allows opencl/gpu 283 if not model_info.opencl: 284 platform = "dll" 285 286 # Make sure opencl is available, or fallback to cuda then to dll 287 if platform == "ocl" and not kernelcl.use_opencl(): 288 platform = "cuda" if kernelcuda.use_cuda() else "dll" 277 289 278 290 # Convert special type names "half", "fast", and "quad" … … 285 297 dtype = "float16" 286 298 287 # Convert dtype string to numpy dtype. 299 # Convert dtype string to numpy dtype. Use single precision for GPU 300 # if model allows it, otherwise use double precision. 288 301 if dtype is None or dtype == "default": 289 numpy_dtype = (generate.F32 if platform == "ocl" and model_info.single302 numpy_dtype = (generate.F32 if model_info.single and platform in ("ocl", "cuda") 290 303 else generate.F64) 291 304 else: 292 305 numpy_dtype = np.dtype(dtype) 293 306 294 # Make sure that the type is supported by opencl, otherwise use dll307 # Make sure that the type is supported by GPU, otherwise use dll 295 308 if platform == "ocl": 296 309 env = kernelcl.environment() 297 if not env.has_type(numpy_dtype): 298 platform = "dll" 299 if dtype is None: 300 numpy_dtype = generate.F64 310 elif platform == "cuda": 311 env = kernelcuda.environment() 312 else: 313 env = None 314 if env is not None and not env.has_type(numpy_dtype): 315 platform = "dll" 316 if dtype is None: 317 numpy_dtype = generate.F64 301 318 302 319 return numpy_dtype, fast, platform -
sasmodels/kernel_header.c
r108e70e r74e9b5f 1 1 #ifdef __OPENCL_VERSION__ 2 2 # define USE_OPENCL 3 #elif defined(__CUDACC__) 4 # define USE_CUDA 3 5 #elif defined(_OPENMP) 4 6 # define USE_OPENMP 5 7 #endif 8 9 // Use SAS_DOUBLE to force the use of double even for float kernels 10 #define SAS_DOUBLE dou ## ble 6 11 7 12 // If opencl is not available, then we are compiling a C function 8 13 // Note: if using a C++ compiler, then define kernel as extern "C" 9 14 #ifdef USE_OPENCL 15 16 #define USE_GPU 17 #define pglobal global 18 #define pconstant constant 19 10 20 typedef int int32_t; 11 # if defined(USE_SINCOS) 12 # define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar) 13 # else 14 # define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) 15 # endif 21 22 #if defined(USE_SINCOS) 23 # define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar) 24 #else 25 # define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) 26 #endif 16 27 // Intel CPU on Mac gives strange values for erf(); on the verified 17 28 // platforms (intel, nvidia, amd), the cephes erf() is significantly … … 24 35 # define erfcf erfc 25 36 #endif 26 #else // !USE_OPENCL 27 // Use SAS_DOUBLE to force the use of double even for float kernels 28 # define SAS_DOUBLE dou ## ble 29 # ifdef __cplusplus 37 38 #elif defined(USE_CUDA) 39 40 #define USE_GPU 41 #define local __shared__ 42 #define pglobal 43 #define constant __constant__ 44 #define pconstant const 45 #define kernel extern "C" __global__ 46 47 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0 48 // OpenCL pown(a,b) = C99 pow(a,b), b integer 49 #define powr(a,b) pow(a,b) 50 #define pown(a,b) pow(a,b) 51 //typedef int int32_t; 52 #if defined(USE_SINCOS) 53 # define SINCOS(angle,svar,cvar) sincos(angle,&svar,&cvar) 54 #else 55 # define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) 56 #endif 57 58 #else // !USE_OPENCL && !USE_CUDA 59 60 #define local 61 #define pglobal 62 #define constant const 63 #define pconstant const 64 65 #ifdef __cplusplus 30 66 #include <cstdio> 31 67 #include <cmath> … … 51 87 #endif 52 88 inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); } 53 #else // !__cplusplus89 #else // !__cplusplus 54 90 #include <inttypes.h> // C99 guarantees that int32_t types is here 55 91 #include <stdio.h> … … 76 112 #define kernel 77 113 #define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0) 78 # endif // !__cplusplus 79 # define global 80 # define local 81 # define constant const 82 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0 83 // OpenCL pown(a,b) = C99 pow(a,b), b integer 84 # define powr(a,b) pow(a,b) 85 # define pown(a,b) pow(a,b) 114 #endif // !__cplusplus 115 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0 116 // OpenCL pown(a,b) = C99 pow(a,b), b integer 117 #define powr(a,b) pow(a,b) 118 #define pown(a,b) pow(a,b) 119 86 120 #endif // !USE_OPENCL 87 121 -
sasmodels/kernel_iq.c
r70530778 r74e9b5f 278 278 const int32_t pd_start, // where we are in the dispersity loop 279 279 const int32_t pd_stop, // where we are stopping in the dispersity loop 280 global const ProblemDetails *details,281 global const double *values,282 global const double *q, // nq q values, with padding to boundary283 global double *result, // nq+1 return values, again with padding280 pglobal const ProblemDetails *details, 281 pglobal const double *values, 282 pglobal const double *q, // nq q values, with padding to boundary 283 pglobal double *result, // nq+1 return values, again with padding 284 284 const double cutoff // cutoff in the dispersity weight product 285 285 ) 286 286 { 287 #if def USE_OPENCL287 #if defined(USE_GPU) 288 288 // who we are and what element we are working with 289 #if defined(USE_OPENCL) 289 290 const int q_index = get_global_id(0); 291 #else // USE_CUDA 292 const int q_index = threadIdx.x + blockIdx.x * blockDim.x; 293 #endif 290 294 if (q_index >= nq) return; 291 295 #else … … 340 344 // seeing one q value (stored in the variable "this_result") while the dll 341 345 // version must loop over all q. 342 #if def USE_OPENCL346 #if defined(USE_GPU) 343 347 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 344 348 double this_result = (pd_start == 0 ? 0.0 : result[q_index]); 345 #else // !USE_ OPENCL349 #else // !USE_GPU 346 350 double pd_norm = (pd_start == 0 ? 0.0 : result[nq]); 347 351 if (pd_start == 0) { … … 352 356 } 353 357 //if (q_index==0) printf("start %d %g %g\n", pd_start, pd_norm, result[0]); 354 #endif // !USE_ OPENCL358 #endif // !USE_GPU 355 359 356 360 … … 375 379 const int n4 = pd_length[4]; 376 380 const int p4 = pd_par[4]; 377 global const double *v4 = pd_value + pd_offset[4];378 global const double *w4 = pd_weight + pd_offset[4];381 pglobal const double *v4 = pd_value + pd_offset[4]; 382 pglobal const double *w4 = pd_weight + pd_offset[4]; 379 383 int i4 = (pd_start/pd_stride[4])%n4; // position in level 4 at pd_start 380 384 … … 562 566 const int n##_LOOP = details->pd_length[_LOOP]; \ 563 567 const int p##_LOOP = details->pd_par[_LOOP]; \ 564 global const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \565 global const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \568 pglobal const double *v##_LOOP = pd_value + details->pd_offset[_LOOP]; \ 569 pglobal const double *w##_LOOP = pd_weight + details->pd_offset[_LOOP]; \ 566 570 int i##_LOOP = (pd_start/details->pd_stride[_LOOP])%n##_LOOP; 567 571 … … 587 591 // Pointers to the start of the dispersity and weight vectors, if needed. 588 592 #if MAX_PD>0 589 global const double *pd_value = values + NUM_VALUES;590 global const double *pd_weight = pd_value + details->num_weights;593 pglobal const double *pd_value = values + NUM_VALUES; 594 pglobal const double *pd_weight = pd_value + details->num_weights; 591 595 #endif 592 596 … … 648 652 BUILD_ROTATION(); 649 653 650 #if ndef USE_OPENCL654 #if !defined(USE_GPU) 651 655 // DLL needs to explicitly loop over the q values. 652 656 #ifdef USE_OPENMP … … 654 658 #endif 655 659 for (q_index=0; q_index<nq; q_index++) 656 #endif // !USE_ OPENCL660 #endif // !USE_GPU 657 661 { 658 662 … … 697 701 //printf("q_index:%d %g %g %g %g\n", q_index, scattering, weight0); 698 702 699 #if def USE_OPENCL703 #if defined(USE_GPU) 700 704 this_result += weight * scattering; 701 #else // !USE_ OPENCL705 #else // !USE_GPU 702 706 result[q_index] += weight * scattering; 703 #endif // !USE_ OPENCL707 #endif // !USE_GPU 704 708 } 705 709 } … … 725 729 726 730 // Remember the current result and the updated norm. 727 #if def USE_OPENCL731 #if defined(USE_GPU) 728 732 result[q_index] = this_result; 729 733 if (q_index == 0) result[nq] = pd_norm; 730 734 //if (q_index == 0) printf("res: %g/%g\n", result[0], pd_norm); 731 #else // !USE_ OPENCL735 #else // !USE_GPU 732 736 result[nq] = pd_norm; 733 737 //printf("res: %g/%g\n", result[0], pd_norm); 734 #endif // !USE_ OPENCL738 #endif // !USE_GPU 735 739 736 740 // ** clear the macros in preparation for the next kernel ** -
sasmodels/model_test.py
r012cd34 r74e9b5f 5 5 Usage:: 6 6 7 python -m sasmodels.model_test [opencl| dll|opencl_and_dll] model1 model2 ...7 python -m sasmodels.model_test [opencl|cuda|dll] model1 model2 ... 8 8 9 9 if model1 is 'all', then all except the remaining models will be tested … … 63 63 from .modelinfo import expand_pars 64 64 from .kernelcl import use_opencl 65 from .kernelcuda import use_cuda 65 66 66 67 # pylint: disable=unused-import … … 80 81 Construct the pyunit test suite. 81 82 82 *loaders* is the list of kernel drivers to use, which is one of 83 *["dll", "opencl"]*, *["dll"]* or *["opencl"]*. For python models, 84 the python driver is always used. 83 *loaders* is the list of kernel drivers to use (dll, opencl or cuda). 84 For python model the python driver is always used. 85 85 86 86 *models* is the list of models to test, or *["all"]* to test all models. … … 135 135 136 136 # test using dll if desired 137 if 'dll' in loaders or not use_opencl():137 if 'dll' in loaders: 138 138 test_name = "%s-dll"%model_name 139 139 test_method_name = "test_%s_dll" % model_info.id … … 156 156 test_method_name, 157 157 platform="ocl", dtype=None, 158 stash=stash) 159 #print("defining", test_name) 160 suite.addTest(test) 161 162 # test using cuda if desired and available 163 if 'cuda' in loaders and use_cuda(): 164 test_name = "%s-cuda"%model_name 165 test_method_name = "test_%s_cuda" % model_info.id 166 # Using dtype=None so that the models that are only 167 # correct for double precision are not tested using 168 # single precision. The choice is determined by the 169 # presence of *single=False* in the model file. 170 test = ModelTestCase(test_name, model_info, 171 test_method_name, 172 platform="cuda", dtype=None, 158 173 stash=stash) 159 174 #print("defining", test_name) … … 220 235 221 236 # Check for missing tests. Only do so for the "dll" tests 222 # to reduce noise from both opencl and dll, and because237 # to reduce noise from both opencl and cuda, and because 223 238 # python kernels use platform="dll". 224 239 if self.platform == "dll": … … 368 383 369 384 # Build a test suite containing just the model 370 loader s = ['opencl'] if use_opencl() else ['dll']385 loader = 'opencl' if use_opencl() else 'cuda' if use_cuda() else 'dll' 371 386 models = [model] 372 387 try: 373 suite = make_suite( loaders, models)388 suite = make_suite([loader], models) 374 389 except Exception: 375 390 import traceback … … 434 449 loaders = ['opencl'] 435 450 models = models[1:] 451 elif models and models[0] == 'cuda': 452 if not use_cuda(): 453 print("cuda is not available") 454 return 1 455 loaders = ['cuda'] 456 models = models[1:] 436 457 elif models and models[0] == 'dll': 437 458 # TODO: test if compiler is available? 438 459 loaders = ['dll'] 439 460 models = models[1:] 440 elif models and models[0] == 'opencl_and_dll':441 loaders = ['opencl', 'dll'] if use_opencl() else ['dll']442 models = models[1:]443 461 else: 444 loaders = ['opencl', 'dll'] if use_opencl() else ['dll'] 462 loaders = ['dll'] 463 if use_opencl(): 464 loaders.append('opencl') 465 if use_cuda(): 466 loaders.append('cuda') 445 467 if not models: 446 468 print("""\ 447 469 usage: 448 python -m sasmodels.model_test [-v] [opencl| dll] model1 model2 ...470 python -m sasmodels.model_test [-v] [opencl|cuda|dll] model1 model2 ... 449 471 450 472 If -v is included on the command line, then use verbose output. 451 473 452 If n either opencl nor dll is specified, then models will be tested with453 both OpenCL and dll; the compute target is ignored for pure python models.474 If no platform is specified, then models will be tested with dll, and 475 if available, OpenCL and CUDA; the compute target is ignored for pure python models. 454 476 455 477 If model1 is 'all', then all except the remaining models will be tested. … … 471 493 Run "nosetests sasmodels" on the command line to invoke it. 472 494 """ 473 loaders = ['opencl', 'dll'] if use_opencl() else ['dll'] 495 loaders = ['dll'] 496 if use_opencl(): 497 loaders.append('opencl') 498 if use_cuda(): 499 loaders.append('cuda') 474 500 tests = make_suite(loaders, ['all']) 475 501 def build_test(test): -
sasmodels/models/lib/gauss76.c
r99b84ec r74e9b5f 11 11 12 12 // Gaussians 13 constant double Gauss76Wt[76] ={13 constant double Gauss76Wt[76] = { 14 14 .00126779163408536, //0 15 15 .00294910295364247, … … 90 90 }; 91 91 92 constant double Gauss76Z[76] ={92 constant double Gauss76Z[76] = { 93 93 -.999505948362153, //0 94 94 -.997397786355355, -
sasmodels/models/lib/polevl.c
r447e9aa r74e9b5f 51 51 */ 52 52 53 double polevl( double x, constant double *coef, int N ); 54 double polevl( double x, constant double *coef, int N )53 static 54 double polevl( double x, pconstant double *coef, int N ) 55 55 { 56 56 … … 72 72 */ 73 73 74 double p1evl( double x, constant double *coef, int N ); 75 double p1evl( double x, constant double *coef, int N )74 static 75 double p1evl( double x, pconstant double *coef, int N ) 76 76 { 77 77 int i=0; -
sasmodels/models/lib/sas_J1.c
r5181ccc r74e9b5f 42 42 #if FLOAT_SIZE>4 43 43 //Cephes double pression function 44 double cephes_j1(double x);45 44 46 45 constant double RPJ1[8] = { … … 106 105 0.0 }; 107 106 107 static 108 108 double cephes_j1(double x) 109 109 { … … 155 155 #else 156 156 //Single precission version of cephes 157 float cephes_j1f(float x);158 159 157 constant float JPJ1[8] = { 160 158 -4.878788132172128E-009, … … 190 188 }; 191 189 190 static 192 191 float cephes_j1f(float xx) 193 192 { … … 240 239 241 240 //Finally J1c function that equals 2*J1(x)/x 242 double sas_2J1x_x(double x); 241 static 243 242 double sas_2J1x_x(double x) 244 243 {
Note: See TracChangeset
for help on using the changeset viewer.