Changeset 5d316e9 in sasmodels
- Timestamp:
- Dec 8, 2015 8:08:51 AM (9 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- cf404cb
- Parents:
- eaca9eb
- Location:
- sasmodels
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/compare.py
r9404dd3 r5d316e9 190 190 return value, average_time 191 191 192 def eval_opencl(model_definition, pars, data, dtype='single', Nevals=1, cutoff=0.): 192 def eval_opencl(model_definition, pars, data, dtype='single', Nevals=1, 193 cutoff=0., fast=False): 193 194 try: 194 model = core.load_model(model_definition, dtype=dtype, platform="ocl") 195 model = core.load_model(model_definition, dtype=dtype, 196 platform="ocl", fast=fast) 195 197 except Exception as exc: 196 198 print(exc) 197 199 print("... trying again with single precision") 198 model = core.load_model(model_definition, dtype='single', platform="ocl") 200 model = core.load_model(model_definition, dtype='single', 201 platform="ocl", fast=fast) 199 202 calculator = DirectModel(data, model, cutoff=cutoff) 200 203 value = None # silence the linter … … 259 262 dtype = ('longdouble' if '-quad' in opts 260 263 else 'double' if '-double' in opts 264 else 'half' if '-half' in opts 261 265 else 'single') 262 266 cutoff = float(opt_values.get('-cutoff','1e-5')) 267 fast = "-fast" in opts and dtype is 'single' 263 268 264 269 # randomize parameters … … 282 287 base_name = target.name 283 288 base, base_time = eval_ctypes(target, pars, data, 284 289 dtype='longdouble', cutoff=0., Nevals=Ncomp) 285 290 elif Nbase > 0 and "-ctypes" in opts and "-sasview" in opts: 286 291 try: … … 295 300 elif Nbase > 0: 296 301 base, base_time = eval_opencl(model_definition, pars, data, 297 dtype=dtype, cutoff=cutoff, Nevals=Nbase)302 dtype=dtype, cutoff=cutoff, Nevals=Nbase, fast=fast) 298 303 base_name = "ocl" 299 304 print("opencl t=%.1f ms, intensity=%.0f"%(base_time, sum(base))) … … 304 309 if Ncomp > 0 and "-ctypes" in opts: 305 310 comp, comp_time = eval_ctypes(model_definition, pars, data, 306 311 dtype=dtype, cutoff=cutoff, Nevals=Ncomp) 307 312 comp_name = "ctypes" 308 313 print("ctypes t=%.1f ms, intensity=%.0f"%(comp_time, sum(comp))) … … 398 403 399 404 -plot*/-noplot plots or suppress the plot of the model 400 - single*/-double/-quad use single/double/quad precision for comparison405 -half/-single*/-double/-quad/-fast sets the calculation precision 401 406 -lowq*/-midq/-highq/-exq use q values up to 0.05, 0.2, 1.0, 10.0 402 407 -Nq=128 sets the number of Q points in the data set … … 411 416 -hist/-nohist* plot histogram of relative error 412 417 -res=0 sets the resolution width dQ/Q if calculating with resolution 413 -accuracy=Low resolution accuracyLow, Mid, High, Xhigh418 -accuracy=Low accuracy of the resolution calculation Low, Mid, High, Xhigh 414 419 415 420 Key=value pairs allow you to set specific values to any of the model … … 421 426 422 427 NAME_OPTIONS = set([ 423 'plot', 'noplot',424 ' single','double','quad',425 'lowq', 'midq','highq','exq',426 '2d', '1d',427 'preset', 'random',428 'poly', 'mono',429 'sasview', 'ctypes',430 'nopars', 'pars',431 'rel', 'abs',428 'plot', 'noplot', 429 'half', 'single', 'double', 'quad', 'fast', 430 'lowq', 'midq', 'highq', 'exq', 431 '2d', '1d', 432 'preset', 'random', 433 'poly', 'mono', 434 'sasview', 'ctypes', 435 'nopars', 'pars', 436 'rel', 'abs', 432 437 'linear', 'log', 'q4', 433 'hist', 'nohist',438 'hist', 'nohist', 434 439 ]) 435 440 VALUE_OPTIONS = [ -
sasmodels/compare_many.py
r9404dd3 r5d316e9 87 87 single_value = value # remember for single/double comparison 88 88 elif precision == 'double': 89 if environment().has_ double:89 if environment().has_type('double'): 90 90 label = 'GPU double' 91 91 value = try_model(eval_opencl, dtype='double', cutoff=cutoff) -
sasmodels/core.py
r9404dd3 r5d316e9 65 65 return True 66 66 67 def load_model(model_definition, dtype="single", platform="ocl" ):67 def load_model(model_definition, dtype="single", platform="ocl", fast=False): 68 68 """ 69 69 Prepare the model for the default execution platform. … … 83 83 *platform* should be "dll" to force the dll to be used for C models, 84 84 otherwise it uses the default "ocl". 85 86 *fast* is True if fast inaccurate math is acceptable (40% speed increase). 85 87 """ 86 88 if isstr(model_definition): … … 102 104 if (platform=="dll" 103 105 or not HAVE_OPENCL 104 or (dtype == np.float64 and not kernelcl.environment().has_double)):106 or not kernelcl.environment().has_type(dtype)): 105 107 return kerneldll.load_dll(source, info, dtype) 106 108 else: 107 return kernelcl.GpuModel(source, info, dtype )109 return kernelcl.GpuModel(source, info, dtype, fast) 108 110 109 111 def make_kernel(model, q_vectors): -
sasmodels/generate.py
r9404dd3 r5d316e9 197 197 # TODO: identify model files which have changed since loading and reload them. 198 198 199 __all__ = ["make", "doc", "sources", " use_single", "use_long_double"]199 __all__ = ["make", "doc", "sources", "convert_type"] 200 200 201 201 import sys … … 208 208 C_KERNEL_TEMPLATE_PATH = joinpath(dirname(__file__), 'kernel_template.c') 209 209 210 F16 = np.dtype('float16') 210 211 F32 = np.dtype('float32') 211 212 F64 = np.dtype('float64') … … 316 317 return [_search(search_path, f) for f in info['source']] 317 318 318 def use_single(source): 319 """ 320 Convert code from double precision to single precision. 321 """ 322 # Convert double keyword to float. Accept an 'n' parameter for vector 323 # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented 324 # as cdouble which is typedef'd to double2. 319 # Pragmas for enable OpenCL features. Be sure to protect them so that they 320 # still compile even if OpenCL is not present. 321 _F16_PRAGMA = """\ 322 #ifdef cl_khr_fp16 323 # pragma OPENCL EXTENSION cl_khr_fp16: enable 324 #endif 325 """ 326 327 _F64_PRAGMA = """\ 328 #ifdef cl_khr_fp64 329 # pragma OPENCL EXTENSION cl_khr_fp64: enable 330 #endif 331 """ 332 333 def convert_type(source, dtype): 334 """ 335 Convert code from double precision to the desired type. 336 """ 337 if dtype == F16: 338 source = _F16_PRAGMA + _convert_type(source, "half", "f") 339 elif dtype == F32: 340 source = _convert_type(source, "float", "f") 341 elif dtype == F64: 342 source = _F64_PRAGMA + source # Source is already double 343 elif dtype == F128: 344 source = _convert_type(source, "long double", "L") 345 else: 346 raise ValueError("Unexpected dtype in source conversion: %s"%dtype) 347 return source 348 349 350 def _convert_type(source, type_name, constant_flag): 351 # Convert double keyword to float/long double/half. 352 # Accept an 'n' # parameter for vector # values, where n is 2, 4, 8 or 16. 353 # Assume complex numbers are represented as cdouble which is typedef'd 354 # to double2. 325 355 source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))', 326 r'\1 float\2', source)327 # Convert floating point constants to single by adding 'f' to the end .328 # OS/X drivercomplains if you don't do this.356 r'\1%s\2'%type_name, source) 357 # Convert floating point constants to single by adding 'f' to the end, 358 # or long double with an 'L' suffix. OS/X complains if you don't do this. 329 359 source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?', 330 r'\g<0>f', source) 331 return source 332 333 def use_long_double(source): 334 """ 335 Convert code from double precision to long double precision. 336 """ 337 # Convert double keyword to float. Accept an 'n' parameter for vector 338 # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented 339 # as cdouble which is typedef'd to double2. 340 source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))', 341 r'\1long double\2', source) 342 # Convert floating point constants to single by adding 'f' to the end. 343 # OS/X driver complains if you don't do this. 344 source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?', 345 r'\g<0>L', source) 360 r'\g<0>%s'%constant_flag, source) 346 361 return source 347 362 -
sasmodels/kernelcl.py
r9404dd3 r5d316e9 62 62 63 63 from pyopencl import mem_flags as mf 64 from pyopencl.characterize import get_fast_inaccurate_build_options 64 65 65 66 from . import generate 66 67 F64_DEFS = """\68 #ifdef cl_khr_fp6469 # pragma OPENCL EXTENSION cl_khr_fp64: enable70 #endif71 """72 67 73 68 # The max loops number is limited by the amount of local memory available … … 92 87 return ENV 93 88 94 def has_double(device): 95 """ 96 Return true if device supports double precision. 97 """ 98 return "cl_khr_fp64" in device.extensions 89 def has_type(device, dtype): 90 """ 91 Return true if device supports the requested precision. 92 """ 93 if dtype == generate.F32: 94 return True 95 elif dtype == generate.F64: 96 return "cl_khr_fp64" in device.extensions 97 elif dtype == generate.F16: 98 return "cl_khr_fp16" in device.extensions 99 else: 100 return False 99 101 100 102 def get_warp(kernel, queue): … … 128 130 129 131 130 def compile_model(context, source, dtype ):132 def compile_model(context, source, dtype, fast=False): 131 133 """ 132 134 Build a model to run on the gpu. … … 137 139 """ 138 140 dtype = np.dtype(dtype) 139 if dtype == generate.F64 and not all(has_double(d) for d in context.devices): 140 raise RuntimeError("Double precision not supported for devices") 141 142 header = F64_DEFS if dtype == generate.F64 else "" 143 if dtype == generate.F32: 144 source = generate.use_single(source) 141 if not all(has_type(d, dtype) for d in context.devices): 142 raise RuntimeError("%s not supported for devices"%dtype) 143 144 source = generate.convert_type(source, dtype) 145 145 # Note: USE_SINCOS makes the intel cpu slower under opencl 146 146 if context.devices[0].type == cl.device_type.GPU: 147 header += "#define USE_SINCOS\n" 148 program = cl.Program(context, header + source).build() 147 source = "#define USE_SINCOS\n" + source 148 options = (get_fast_inaccurate_build_options(context.devices[0]) 149 if fast else []) 150 program = cl.Program(context, source).build(options=options) 149 151 return program 150 152 … … 178 180 self.queues = [cl.CommandQueue(self.context, d) 179 181 for d in self.context.devices] 180 self.has_double = all(has_double(d) for d in self.context.devices)181 182 self.compiled = {} 183 184 def has_type(self, dtype): 185 dtype = np.dtype(dtype) 186 return all(has_type(d, dtype) for d in self.context.devices) 182 187 183 188 def _create_some_context(self): … … 189 194 warnings.warn("the environment variable 'PYOPENCL_CTX' might not be set correctly") 190 195 191 def compile_program(self, name, source, dtype ):196 def compile_program(self, name, source, dtype, fast=False): 192 197 if name not in self.compiled: 193 198 #print("compiling",name) 194 self.compiled[name] = compile_model(self.context, source, dtype) 199 self.compiled[name] = compile_model(self.context, source, dtype, 200 fast) 195 201 return self.compiled[name] 196 202 … … 226 232 for single and 'd', 'float64' or 'double' for double. Double precision 227 233 is an optional extension which may not be available on all devices. 228 """ 229 def __init__(self, source, info, dtype=generate.F32): 234 235 *fast* is True if fast inaccurate math is acceptable (40% speed increase) 236 """ 237 def __init__(self, source, info, dtype=generate.F32, fast=False): 230 238 self.info = info 231 239 self.source = source 232 240 self.dtype = np.dtype(dtype) 241 self.fast = fast 233 242 self.program = None # delay program creation 234 243 … … 243 252 def __call__(self, q_input): 244 253 if self.dtype != q_input.dtype: 245 raise TypeError("data is %s kernel is %s" % (q_input.dtype, self.dtype)) 254 raise TypeError("data is %s kernel is %s" 255 % (q_input.dtype, self.dtype)) 246 256 if self.program is None: 247 257 compiler = environment().compile_program 248 self.program = compiler(self.info['name'], self.source, self.dtype) 258 self.program = compiler(self.info['name'], self.source, self.dtype, 259 self.fast) 249 260 kernel_name = generate.kernel_name(self.info, q_input.is_2D) 250 261 kernel = getattr(self.program, kernel_name) … … 347 358 348 359 def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 349 real = np.float32 if self.q_input.dtype == generate.F32 else np.float64 360 real = (np.float32 if self.q_input.dtype == generate.F32 361 else np.float64 if self.q_input.dtype == generate.F64 362 else np.float16 if self.q_input.dtype == generate.F16 363 else np.float32) # will never get here, so use np.float32 350 364 351 365 device_num = 0 -
sasmodels/kerneldll.py
r9404dd3 r5d316e9 85 85 DLL_PATH = tempfile.gettempdir() 86 86 87 ALLOW_SINGLE_PRECISION_DLLS = False87 ALLOW_SINGLE_PRECISION_DLLS = True 88 88 89 89 … … 122 122 models are allowed as DLLs. 123 123 """ 124 if callable(info.get('Iq',None)): 125 return PyModel(info) 126 124 127 dtype = np.dtype(dtype) 128 if dtype == generate.F16: 129 raise ValueError("16 bit floats not supported") 125 130 if dtype == generate.F32 and not ALLOW_SINGLE_PRECISION_DLLS: 126 131 dtype = generate.F64 # Force 64-bit dll 127 132 128 if callable(info.get('Iq',None)):129 return PyModel(info)130 131 133 if dtype == generate.F32: # 32-bit dll 132 source = generate.use_single(source)133 134 tempfile_prefix = 'sas_'+info['name']+'32_' 134 135 elif dtype == generate.F64: 135 136 tempfile_prefix = 'sas_'+info['name']+'64_' 136 137 else: 137 source = generate.use_long_double(source)138 138 tempfile_prefix = 'sas_'+info['name']+'128_' 139 139 140 source = generate.convert_type(source, dtype) 140 141 source_files = generate.sources(info) + [info['filename']] 141 142 dll= dll_path(info, dtype)
Note: See TracChangeset
for help on using the changeset viewer.