Changeset 5d316e9 in sasmodels for sasmodels/kernelcl.py
- Timestamp:
- Dec 8, 2015 6:08:51 AM (8 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- cf404cb
- Parents:
- eaca9eb
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
r9404dd3 r5d316e9 62 62 63 63 from pyopencl import mem_flags as mf 64 from pyopencl.characterize import get_fast_inaccurate_build_options 64 65 65 66 from . import generate 66 67 F64_DEFS = """\68 #ifdef cl_khr_fp6469 # pragma OPENCL EXTENSION cl_khr_fp64: enable70 #endif71 """72 67 73 68 # The max loops number is limited by the amount of local memory available … … 92 87 return ENV 93 88 94 def has_double(device): 95 """ 96 Return true if device supports double precision. 97 """ 98 return "cl_khr_fp64" in device.extensions 89 def has_type(device, dtype): 90 """ 91 Return true if device supports the requested precision. 92 """ 93 if dtype == generate.F32: 94 return True 95 elif dtype == generate.F64: 96 return "cl_khr_fp64" in device.extensions 97 elif dtype == generate.F16: 98 return "cl_khr_fp16" in device.extensions 99 else: 100 return False 99 101 100 102 def get_warp(kernel, queue): … … 128 130 129 131 130 def compile_model(context, source, dtype ):132 def compile_model(context, source, dtype, fast=False): 131 133 """ 132 134 Build a model to run on the gpu. … … 137 139 """ 138 140 dtype = np.dtype(dtype) 139 if dtype == generate.F64 and not all(has_double(d) for d in context.devices): 140 raise RuntimeError("Double precision not supported for devices") 141 142 header = F64_DEFS if dtype == generate.F64 else "" 143 if dtype == generate.F32: 144 source = generate.use_single(source) 141 if not all(has_type(d, dtype) for d in context.devices): 142 raise RuntimeError("%s not supported for devices"%dtype) 143 144 source = generate.convert_type(source, dtype) 145 145 # Note: USE_SINCOS makes the intel cpu slower under opencl 146 146 if context.devices[0].type == cl.device_type.GPU: 147 header += "#define USE_SINCOS\n" 148 program = cl.Program(context, header + source).build() 147 source = "#define USE_SINCOS\n" + source 148 options = (get_fast_inaccurate_build_options(context.devices[0]) 149 if fast else []) 150 program = cl.Program(context, source).build(options=options) 149 151 return program 150 152 … … 178 180 self.queues = [cl.CommandQueue(self.context, d) 179 181 for d in self.context.devices] 180 self.has_double = all(has_double(d) for d in self.context.devices)181 182 self.compiled = {} 183 184 def has_type(self, dtype): 185 dtype = np.dtype(dtype) 186 return all(has_type(d, dtype) for d in self.context.devices) 182 187 183 188 def _create_some_context(self): … … 189 194 warnings.warn("the environment variable 'PYOPENCL_CTX' might not be set correctly") 190 195 191 def compile_program(self, name, source, dtype ):196 def compile_program(self, name, source, dtype, fast=False): 192 197 if name not in self.compiled: 193 198 #print("compiling",name) 194 self.compiled[name] = compile_model(self.context, source, dtype) 199 self.compiled[name] = compile_model(self.context, source, dtype, 200 fast) 195 201 return self.compiled[name] 196 202 … … 226 232 for single and 'd', 'float64' or 'double' for double. Double precision 227 233 is an optional extension which may not be available on all devices. 228 """ 229 def __init__(self, source, info, dtype=generate.F32): 234 235 *fast* is True if fast inaccurate math is acceptable (40% speed increase) 236 """ 237 def __init__(self, source, info, dtype=generate.F32, fast=False): 230 238 self.info = info 231 239 self.source = source 232 240 self.dtype = np.dtype(dtype) 241 self.fast = fast 233 242 self.program = None # delay program creation 234 243 … … 243 252 def __call__(self, q_input): 244 253 if self.dtype != q_input.dtype: 245 raise TypeError("data is %s kernel is %s" % (q_input.dtype, self.dtype)) 254 raise TypeError("data is %s kernel is %s" 255 % (q_input.dtype, self.dtype)) 246 256 if self.program is None: 247 257 compiler = environment().compile_program 248 self.program = compiler(self.info['name'], self.source, self.dtype) 258 self.program = compiler(self.info['name'], self.source, self.dtype, 259 self.fast) 249 260 kernel_name = generate.kernel_name(self.info, q_input.is_2D) 250 261 kernel = getattr(self.program, kernel_name) … … 347 358 348 359 def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 349 real = np.float32 if self.q_input.dtype == generate.F32 else np.float64 360 real = (np.float32 if self.q_input.dtype == generate.F32 361 else np.float64 if self.q_input.dtype == generate.F64 362 else np.float16 if self.q_input.dtype == generate.F16 363 else np.float32) # will never get here, so use np.float32 350 364 351 365 device_num = 0
Note: See TracChangeset
for help on using the changeset viewer.