← Previous Changeset
Next Changeset →

Changeset 5d316e9 in sasmodels

Timestamp:

Dec 8, 2015 8:08:51 AM (9 years ago)

Author:

Paul Kienzle <pkienzle@…>

Branches:

master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests

Children:

Parents:

Message:

support fast and loose single precision and half precision

Location:

Files:

: 6 edited

compare.py (modified) (8 diffs)
compare_many.py (modified) (1 diff)
core.py (modified) (3 diffs)
generate.py (modified) (3 diffs)
kernelcl.py (modified) (9 diffs)
kerneldll.py (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

sasmodels/compare.py

-                      r9404dd3
+                      r5d316e9
     return value, average_time
+def eval_opencl(model_definition, pars, data, dtype='single', Nevals=1, cutoff=0.):
+def eval_opencl(model_definition, pars, data, dtype='single', Nevals=1,
+                cutoff=0., fast=False):
     try:
+        model = core.load_model(model_definition, dtype=dtype, platform="ocl")
+        model = core.load_model(model_definition, dtype=dtype,
+                                platform="ocl", fast=fast)
     except Exception as exc:
         print(exc)
         print("... trying again with single precision")
+        model = core.load_model(model_definition, dtype='single', platform="ocl")
+        model = core.load_model(model_definition, dtype='single',
+                                platform="ocl", fast=fast)
     calculator = DirectModel(data, model, cutoff=cutoff)
     value = None  # silence the linter
 …
     dtype = ('longdouble' if '-quad' in opts
              else 'double' if '-double' in opts
+             else 'half' if '-half' in opts
              else 'single')
     cutoff = float(opt_values.get('-cutoff','1e-5'))
+    fast = "-fast" in opts and dtype is 'single'
     # randomize parameters
 …
         base_name = target.name
         base, base_time = eval_ctypes(target, pars, data,
                          dtype='longdouble', cutoff=0., Nevals=Ncomp)
+                dtype='longdouble', cutoff=0., Nevals=Ncomp)
     elif Nbase > 0 and "-ctypes" in opts and "-sasview" in opts:
         try:
 …
     elif Nbase > 0:
         base, base_time = eval_opencl(model_definition, pars, data,
                                     dtype=dtype, cutoff=cutoff, Nevals=Nbase)
+                dtype=dtype, cutoff=cutoff, Nevals=Nbase, fast=fast)
         base_name = "ocl"
         print("opencl t=%.1f ms, intensity=%.0f"%(base_time, sum(base)))
 …
     if Ncomp > 0 and "-ctypes" in opts:
         comp, comp_time = eval_ctypes(model_definition, pars, data,
                                     dtype=dtype, cutoff=cutoff, Nevals=Ncomp)
+                dtype=dtype, cutoff=cutoff, Nevals=Ncomp)
         comp_name = "ctypes"
         print("ctypes t=%.1f ms, intensity=%.0f"%(comp_time, sum(comp)))
 …
     -plot*/-noplot plots or suppress the plot of the model
     -single*/-double/-quad use single/double/quad precision for comparison
+    -half/-single*/-double/-quad/-fast sets the calculation precision
     -lowq*/-midq/-highq/-exq use q values up to 0.05, 0.2, 1.0, 10.0
     -Nq=128 sets the number of Q points in the data set
 …
     -hist/-nohist* plot histogram of relative error
     -res=0 sets the resolution width dQ/Q if calculating with resolution
     -accuracy=Low resolution accuracy Low, Mid, High, Xhigh
+    -accuracy=Low accuracy of the resolution calculation Low, Mid, High, Xhigh
 Key=value pairs allow you to set specific values to any of the model
 …
 NAME_OPTIONS = set([
     'plot','noplot',
     'single','double','quad',
     'lowq','midq','highq','exq',
     '2d','1d',
     'preset','random',
     'poly','mono',
     'sasview','ctypes',
     'nopars','pars',
     'rel','abs',
+    'plot', 'noplot',
+    'half', 'single', 'double', 'quad', 'fast',
+    'lowq', 'midq', 'highq', 'exq',
+    '2d', '1d',
+    'preset', 'random',
+    'poly', 'mono',
+    'sasview', 'ctypes',
+    'nopars', 'pars',
+    'rel', 'abs',
     'linear', 'log', 'q4',
     'hist','nohist',
+    'hist', 'nohist',
     ])
 VALUE_OPTIONS = [

sasmodels/compare_many.py

r9404dd3	r5d316e9
87	87	single_value = value # remember for single/double comparison
88	88	elif precision == 'double':
89		if environment().has_~~double~~:
	89	if environment().has_type('double'):
90	90	label = 'GPU double'
91	91	value = try_model(eval_opencl, dtype='double', cutoff=cutoff)

sasmodels/core.py

-                      r9404dd3
+                      r5d316e9
     return True
 def load_model(model_definition, dtype="single", platform="ocl"):
+def load_model(model_definition, dtype="single", platform="ocl", fast=False):
     """
     Prepare the model for the default execution platform.
 …
     *platform* should be "dll" to force the dll to be used for C models,
     otherwise it uses the default "ocl".
+    *fast* is True if fast inaccurate math is acceptable (40% speed increase).
     """
     if isstr(model_definition):
 …
     if (platform=="dll"
             or not HAVE_OPENCL
             or (dtype == np.float64 and not kernelcl.environment().has_double)):
+            or not kernelcl.environment().has_type(dtype)):
         return kerneldll.load_dll(source, info, dtype)
     else:
         return kernelcl.GpuModel(source, info, dtype)
+        return kernelcl.GpuModel(source, info, dtype, fast)
 def make_kernel(model, q_vectors):

sasmodels/generate.py

-                      r9404dd3
+                      r5d316e9
 # TODO: identify model files which have changed since loading and reload them.
 __all__ = ["make", "doc", "sources", "use_single", "use_long_double"]
+__all__ = ["make", "doc", "sources", "convert_type"]
 import sys
 …
 C_KERNEL_TEMPLATE_PATH = joinpath(dirname(__file__), 'kernel_template.c')
+F16 = np.dtype('float16')
 F32 = np.dtype('float32')
 F64 = np.dtype('float64')
 …
     return [_search(search_path, f) for f in info['source']]
+def use_single(source):
+    """
+    Convert code from double precision to single precision.
+    """
+    # Convert double keyword to float.  Accept an 'n' parameter for vector
+    # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented
+    # as cdouble which is typedef'd to double2.
+# Pragmas for enable OpenCL features.  Be sure to protect them so that they
+# still compile even if OpenCL is not present.
+_F16_PRAGMA = """\
+#ifdef cl_khr_fp16
+#  pragma OPENCL EXTENSION cl_khr_fp16: enable
+#endif
+"""
+_F64_PRAGMA = """\
+#ifdef cl_khr_fp64
+#  pragma OPENCL EXTENSION cl_khr_fp64: enable
+#endif
+"""
+def convert_type(source, dtype):
+    """
+    Convert code from double precision to the desired type.
+    """
+    if dtype == F16:
+        source = _F16_PRAGMA + _convert_type(source, "half", "f")
+    elif dtype == F32:
+        source = _convert_type(source, "float", "f")
+    elif dtype == F64:
+        source = _F64_PRAGMA + source  # Source is already double
+    elif dtype == F128:
+        source = _convert_type(source, "long double", "L")
+    else:
+        raise ValueError("Unexpected dtype in source conversion: %s"%dtype)
+    return source
+def _convert_type(source, type_name, constant_flag):
+    # Convert double keyword to float/long double/half.
+    # Accept an 'n' # parameter for vector # values, where n is 2, 4, 8 or 16.
+    # Assume complex numbers are represented as cdouble which is typedef'd
+    # to double2.
     source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))',
                     r'\1float\2', source)
     # Convert floating point constants to single by adding 'f' to the end.
     # OS/X driver complains if you don't do this.
+                    r'\1%s\2'%type_name, source)
+    # Convert floating point constants to single by adding 'f' to the end,
+    # or long double with an 'L' suffix.  OS/X complains if you don't do this.
     source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?',
+                    r'\g<0>f', source)
+    return source
+def use_long_double(source):
+    """
+    Convert code from double precision to long double precision.
+    """
+    # Convert double keyword to float.  Accept an 'n' parameter for vector
+    # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented
+    # as cdouble which is typedef'd to double2.
+    source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))',
+                    r'\1long double\2', source)
+    # Convert floating point constants to single by adding 'f' to the end.
+    # OS/X driver complains if you don't do this.
+    source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?',
+                    r'\g<0>L', source)
+                    r'\g<0>%s'%constant_flag, source)
     return source

sasmodels/kernelcl.py

-                      r9404dd3
+                      r5d316e9
 from pyopencl import mem_flags as mf
+from pyopencl.characterize import get_fast_inaccurate_build_options
 from . import generate
-F64_DEFS = """\
-#ifdef cl_khr_fp64
-#  pragma OPENCL EXTENSION cl_khr_fp64: enable
-#endif
-"""
 # The max loops number is limited by the amount of local memory available
 …
     return ENV
+def has_double(device):
+    """
+    Return true if device supports double precision.
+    """
+    return "cl_khr_fp64" in device.extensions
+def has_type(device, dtype):
+    """
+    Return true if device supports the requested precision.
+    """
+    if dtype == generate.F32:
+        return True
+    elif dtype == generate.F64:
+        return "cl_khr_fp64" in device.extensions
+    elif dtype == generate.F16:
+        return "cl_khr_fp16" in device.extensions
+    else:
+        return False
 def get_warp(kernel, queue):
 …
 def compile_model(context, source, dtype):
+def compile_model(context, source, dtype, fast=False):
     """
     Build a model to run on the gpu.
 …
     """
     dtype = np.dtype(dtype)
+    if dtype == generate.F64 and not all(has_double(d) for d in context.devices):
+        raise RuntimeError("Double precision not supported for devices")
+    header = F64_DEFS if dtype == generate.F64 else ""
+    if dtype == generate.F32:
+        source = generate.use_single(source)
+    if not all(has_type(d, dtype) for d in context.devices):
+        raise RuntimeError("%s not supported for devices"%dtype)
+    source = generate.convert_type(source, dtype)
     # Note: USE_SINCOS makes the intel cpu slower under opencl
     if context.devices[0].type == cl.device_type.GPU:
+        header += "#define USE_SINCOS\n"
+    program = cl.Program(context, header + source).build()
+        source = "#define USE_SINCOS\n" + source
+    options = (get_fast_inaccurate_build_options(context.devices[0])
+               if fast else [])
+    program = cl.Program(context, source).build(options=options)
     return program
 …
         self.queues = [cl.CommandQueue(self.context, d)
                        for d in self.context.devices]
-        self.has_double = all(has_double(d) for d in self.context.devices)
         self.compiled = {}
+    def has_type(self, dtype):
+        dtype = np.dtype(dtype)
+        return all(has_type(d, dtype) for d in self.context.devices)
     def _create_some_context(self):
 …
             warnings.warn("the environment variable 'PYOPENCL_CTX' might not be set correctly")
     def compile_program(self, name, source, dtype):
+    def compile_program(self, name, source, dtype, fast=False):
         if name not in self.compiled:
             #print("compiling",name)
+            self.compiled[name] = compile_model(self.context, source, dtype)
+            self.compiled[name] = compile_model(self.context, source, dtype,
+                                                fast)
         return self.compiled[name]
 …
     for single and 'd', 'float64' or 'double' for double.  Double precision
     is an optional extension which may not be available on all devices.
+    """
+    def __init__(self, source, info, dtype=generate.F32):
+    *fast* is True if fast inaccurate math is acceptable (40% speed increase)
+    """
+    def __init__(self, source, info, dtype=generate.F32, fast=False):
         self.info = info
         self.source = source
         self.dtype = np.dtype(dtype)
+        self.fast = fast
         self.program = None # delay program creation
 …
     def __call__(self, q_input):
         if self.dtype != q_input.dtype:
+            raise TypeError("data is %s kernel is %s" % (q_input.dtype, self.dtype))
+            raise TypeError("data is %s kernel is %s"
+                            % (q_input.dtype, self.dtype))
         if self.program is None:
             compiler = environment().compile_program
+            self.program = compiler(self.info['name'], self.source, self.dtype)
+            self.program = compiler(self.info['name'], self.source, self.dtype,
+                                    self.fast)
         kernel_name = generate.kernel_name(self.info, q_input.is_2D)
         kernel = getattr(self.program, kernel_name)
 …
     def __call__(self, fixed_pars, pd_pars, cutoff=1e-5):
+        real = np.float32 if self.q_input.dtype == generate.F32 else np.float64
+        real = (np.float32 if self.q_input.dtype == generate.F32
+                else np.float64 if self.q_input.dtype == generate.F64
+                else np.float16 if self.q_input.dtype == generate.F16
+                else np.float32)  # will never get here, so use np.float32
         device_num = 0

sasmodels/kerneldll.py

-                      r9404dd3
+                      r5d316e9
 DLL_PATH = tempfile.gettempdir()
 ALLOW_SINGLE_PRECISION_DLLS = False
+ALLOW_SINGLE_PRECISION_DLLS = True
 …
     models are allowed as DLLs.
     """
+    if callable(info.get('Iq',None)):
+        return PyModel(info)
     dtype = np.dtype(dtype)
+    if dtype == generate.F16:
+        raise ValueError("16 bit floats not supported")
     if dtype == generate.F32 and not ALLOW_SINGLE_PRECISION_DLLS:
         dtype = generate.F64  # Force 64-bit dll
-    if callable(info.get('Iq',None)):
-        return PyModel(info)
     if dtype == generate.F32: # 32-bit dll
-        source = generate.use_single(source)
         tempfile_prefix = 'sas_'+info['name']+'32_'
     elif dtype == generate.F64:
         tempfile_prefix = 'sas_'+info['name']+'64_'
     else:
-        source = generate.use_long_double(source)
         tempfile_prefix = 'sas_'+info['name']+'128_'
+    source = generate.convert_type(source, dtype)
     source_files = generate.sources(info) + [info['filename']]
     dll= dll_path(info, dtype)

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: