Changeset 5d316e9 in sasmodels


Ignore:
Timestamp:
Dec 8, 2015 8:08:51 AM (9 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
cf404cb
Parents:
eaca9eb
Message:

support fast and loose single precision and half precision

Location:
sasmodels
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/compare.py

    r9404dd3 r5d316e9  
    190190    return value, average_time 
    191191 
    192 def eval_opencl(model_definition, pars, data, dtype='single', Nevals=1, cutoff=0.): 
     192def eval_opencl(model_definition, pars, data, dtype='single', Nevals=1, 
     193                cutoff=0., fast=False): 
    193194    try: 
    194         model = core.load_model(model_definition, dtype=dtype, platform="ocl") 
     195        model = core.load_model(model_definition, dtype=dtype, 
     196                                platform="ocl", fast=fast) 
    195197    except Exception as exc: 
    196198        print(exc) 
    197199        print("... trying again with single precision") 
    198         model = core.load_model(model_definition, dtype='single', platform="ocl") 
     200        model = core.load_model(model_definition, dtype='single', 
     201                                platform="ocl", fast=fast) 
    199202    calculator = DirectModel(data, model, cutoff=cutoff) 
    200203    value = None  # silence the linter 
     
    259262    dtype = ('longdouble' if '-quad' in opts 
    260263             else 'double' if '-double' in opts 
     264             else 'half' if '-half' in opts 
    261265             else 'single') 
    262266    cutoff = float(opt_values.get('-cutoff','1e-5')) 
     267    fast = "-fast" in opts and dtype is 'single' 
    263268 
    264269    # randomize parameters 
     
    282287        base_name = target.name 
    283288        base, base_time = eval_ctypes(target, pars, data, 
    284                          dtype='longdouble', cutoff=0., Nevals=Ncomp) 
     289                dtype='longdouble', cutoff=0., Nevals=Ncomp) 
    285290    elif Nbase > 0 and "-ctypes" in opts and "-sasview" in opts: 
    286291        try: 
     
    295300    elif Nbase > 0: 
    296301        base, base_time = eval_opencl(model_definition, pars, data, 
    297                                     dtype=dtype, cutoff=cutoff, Nevals=Nbase) 
     302                dtype=dtype, cutoff=cutoff, Nevals=Nbase, fast=fast) 
    298303        base_name = "ocl" 
    299304        print("opencl t=%.1f ms, intensity=%.0f"%(base_time, sum(base))) 
     
    304309    if Ncomp > 0 and "-ctypes" in opts: 
    305310        comp, comp_time = eval_ctypes(model_definition, pars, data, 
    306                                     dtype=dtype, cutoff=cutoff, Nevals=Ncomp) 
     311                dtype=dtype, cutoff=cutoff, Nevals=Ncomp) 
    307312        comp_name = "ctypes" 
    308313        print("ctypes t=%.1f ms, intensity=%.0f"%(comp_time, sum(comp))) 
     
    398403 
    399404    -plot*/-noplot plots or suppress the plot of the model 
    400     -single*/-double/-quad use single/double/quad precision for comparison 
     405    -half/-single*/-double/-quad/-fast sets the calculation precision 
    401406    -lowq*/-midq/-highq/-exq use q values up to 0.05, 0.2, 1.0, 10.0 
    402407    -Nq=128 sets the number of Q points in the data set 
     
    411416    -hist/-nohist* plot histogram of relative error 
    412417    -res=0 sets the resolution width dQ/Q if calculating with resolution 
    413     -accuracy=Low resolution accuracy Low, Mid, High, Xhigh 
     418    -accuracy=Low accuracy of the resolution calculation Low, Mid, High, Xhigh 
    414419 
    415420Key=value pairs allow you to set specific values to any of the model 
     
    421426 
    422427NAME_OPTIONS = set([ 
    423     'plot','noplot', 
    424     'single','double','quad', 
    425     'lowq','midq','highq','exq', 
    426     '2d','1d', 
    427     'preset','random', 
    428     'poly','mono', 
    429     'sasview','ctypes', 
    430     'nopars','pars', 
    431     'rel','abs', 
     428    'plot', 'noplot', 
     429    'half', 'single', 'double', 'quad', 'fast', 
     430    'lowq', 'midq', 'highq', 'exq', 
     431    '2d', '1d', 
     432    'preset', 'random', 
     433    'poly', 'mono', 
     434    'sasview', 'ctypes', 
     435    'nopars', 'pars', 
     436    'rel', 'abs', 
    432437    'linear', 'log', 'q4', 
    433     'hist','nohist', 
     438    'hist', 'nohist', 
    434439    ]) 
    435440VALUE_OPTIONS = [ 
  • sasmodels/compare_many.py

    r9404dd3 r5d316e9  
    8787            single_value = value  # remember for single/double comparison 
    8888        elif precision == 'double': 
    89             if environment().has_double: 
     89            if environment().has_type('double'): 
    9090                label = 'GPU double' 
    9191                value = try_model(eval_opencl, dtype='double', cutoff=cutoff) 
  • sasmodels/core.py

    r9404dd3 r5d316e9  
    6565    return True 
    6666 
    67 def load_model(model_definition, dtype="single", platform="ocl"): 
     67def load_model(model_definition, dtype="single", platform="ocl", fast=False): 
    6868    """ 
    6969    Prepare the model for the default execution platform. 
     
    8383    *platform* should be "dll" to force the dll to be used for C models, 
    8484    otherwise it uses the default "ocl". 
     85 
     86    *fast* is True if fast inaccurate math is acceptable (40% speed increase). 
    8587    """ 
    8688    if isstr(model_definition): 
     
    102104    if (platform=="dll" 
    103105            or not HAVE_OPENCL 
    104             or (dtype == np.float64 and not kernelcl.environment().has_double)): 
     106            or not kernelcl.environment().has_type(dtype)): 
    105107        return kerneldll.load_dll(source, info, dtype) 
    106108    else: 
    107         return kernelcl.GpuModel(source, info, dtype) 
     109        return kernelcl.GpuModel(source, info, dtype, fast) 
    108110 
    109111def make_kernel(model, q_vectors): 
  • sasmodels/generate.py

    r9404dd3 r5d316e9  
    197197# TODO: identify model files which have changed since loading and reload them. 
    198198 
    199 __all__ = ["make", "doc", "sources", "use_single", "use_long_double"] 
     199__all__ = ["make", "doc", "sources", "convert_type"] 
    200200 
    201201import sys 
     
    208208C_KERNEL_TEMPLATE_PATH = joinpath(dirname(__file__), 'kernel_template.c') 
    209209 
     210F16 = np.dtype('float16') 
    210211F32 = np.dtype('float32') 
    211212F64 = np.dtype('float64') 
     
    316317    return [_search(search_path, f) for f in info['source']] 
    317318 
    318 def use_single(source): 
    319     """ 
    320     Convert code from double precision to single precision. 
    321     """ 
    322     # Convert double keyword to float.  Accept an 'n' parameter for vector 
    323     # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented 
    324     # as cdouble which is typedef'd to double2. 
     319# Pragmas for enable OpenCL features.  Be sure to protect them so that they 
     320# still compile even if OpenCL is not present. 
     321_F16_PRAGMA = """\ 
     322#ifdef cl_khr_fp16 
     323#  pragma OPENCL EXTENSION cl_khr_fp16: enable 
     324#endif 
     325""" 
     326 
     327_F64_PRAGMA = """\ 
     328#ifdef cl_khr_fp64 
     329#  pragma OPENCL EXTENSION cl_khr_fp64: enable 
     330#endif 
     331""" 
     332 
     333def convert_type(source, dtype): 
     334    """ 
     335    Convert code from double precision to the desired type. 
     336    """ 
     337    if dtype == F16: 
     338        source = _F16_PRAGMA + _convert_type(source, "half", "f") 
     339    elif dtype == F32: 
     340        source = _convert_type(source, "float", "f") 
     341    elif dtype == F64: 
     342        source = _F64_PRAGMA + source  # Source is already double 
     343    elif dtype == F128: 
     344        source = _convert_type(source, "long double", "L") 
     345    else: 
     346        raise ValueError("Unexpected dtype in source conversion: %s"%dtype) 
     347    return source 
     348 
     349 
     350def _convert_type(source, type_name, constant_flag): 
     351    # Convert double keyword to float/long double/half. 
     352    # Accept an 'n' # parameter for vector # values, where n is 2, 4, 8 or 16. 
     353    # Assume complex numbers are represented as cdouble which is typedef'd 
     354    # to double2. 
    325355    source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))', 
    326                     r'\1float\2', source) 
    327     # Convert floating point constants to single by adding 'f' to the end. 
    328     # OS/X driver complains if you don't do this. 
     356                    r'\1%s\2'%type_name, source) 
     357    # Convert floating point constants to single by adding 'f' to the end, 
     358    # or long double with an 'L' suffix.  OS/X complains if you don't do this. 
    329359    source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?', 
    330                     r'\g<0>f', source) 
    331     return source 
    332  
    333 def use_long_double(source): 
    334     """ 
    335     Convert code from double precision to long double precision. 
    336     """ 
    337     # Convert double keyword to float.  Accept an 'n' parameter for vector 
    338     # values, where n is 2, 4, 8 or 16. Assume complex numbers are represented 
    339     # as cdouble which is typedef'd to double2. 
    340     source = re.sub(r'(^|[^a-zA-Z0-9_]c?)double(([248]|16)?($|[^a-zA-Z0-9_]))', 
    341                     r'\1long double\2', source) 
    342     # Convert floating point constants to single by adding 'f' to the end. 
    343     # OS/X driver complains if you don't do this. 
    344     source = re.sub(r'[^a-zA-Z_](\d*[.]\d+|\d+[.]\d*)([eE][+-]?\d+)?', 
    345                     r'\g<0>L', source) 
     360                    r'\g<0>%s'%constant_flag, source) 
    346361    return source 
    347362 
  • sasmodels/kernelcl.py

    r9404dd3 r5d316e9  
    6262 
    6363from pyopencl import mem_flags as mf 
     64from pyopencl.characterize import get_fast_inaccurate_build_options 
    6465 
    6566from . import generate 
    66  
    67 F64_DEFS = """\ 
    68 #ifdef cl_khr_fp64 
    69 #  pragma OPENCL EXTENSION cl_khr_fp64: enable 
    70 #endif 
    71 """ 
    7267 
    7368# The max loops number is limited by the amount of local memory available 
     
    9287    return ENV 
    9388 
    94 def has_double(device): 
    95     """ 
    96     Return true if device supports double precision. 
    97     """ 
    98     return "cl_khr_fp64" in device.extensions 
     89def has_type(device, dtype): 
     90    """ 
     91    Return true if device supports the requested precision. 
     92    """ 
     93    if dtype == generate.F32: 
     94        return True 
     95    elif dtype == generate.F64: 
     96        return "cl_khr_fp64" in device.extensions 
     97    elif dtype == generate.F16: 
     98        return "cl_khr_fp16" in device.extensions 
     99    else: 
     100        return False 
    99101 
    100102def get_warp(kernel, queue): 
     
    128130 
    129131 
    130 def compile_model(context, source, dtype): 
     132def compile_model(context, source, dtype, fast=False): 
    131133    """ 
    132134    Build a model to run on the gpu. 
     
    137139    """ 
    138140    dtype = np.dtype(dtype) 
    139     if dtype == generate.F64 and not all(has_double(d) for d in context.devices): 
    140         raise RuntimeError("Double precision not supported for devices") 
    141  
    142     header = F64_DEFS if dtype == generate.F64 else "" 
    143     if dtype == generate.F32: 
    144         source = generate.use_single(source) 
     141    if not all(has_type(d, dtype) for d in context.devices): 
     142        raise RuntimeError("%s not supported for devices"%dtype) 
     143 
     144    source = generate.convert_type(source, dtype) 
    145145    # Note: USE_SINCOS makes the intel cpu slower under opencl 
    146146    if context.devices[0].type == cl.device_type.GPU: 
    147         header += "#define USE_SINCOS\n" 
    148     program = cl.Program(context, header + source).build() 
     147        source = "#define USE_SINCOS\n" + source 
     148    options = (get_fast_inaccurate_build_options(context.devices[0]) 
     149               if fast else []) 
     150    program = cl.Program(context, source).build(options=options) 
    149151    return program 
    150152 
     
    178180        self.queues = [cl.CommandQueue(self.context, d) 
    179181                       for d in self.context.devices] 
    180         self.has_double = all(has_double(d) for d in self.context.devices) 
    181182        self.compiled = {} 
     183 
     184    def has_type(self, dtype): 
     185        dtype = np.dtype(dtype) 
     186        return all(has_type(d, dtype) for d in self.context.devices) 
    182187 
    183188    def _create_some_context(self): 
     
    189194            warnings.warn("the environment variable 'PYOPENCL_CTX' might not be set correctly") 
    190195 
    191     def compile_program(self, name, source, dtype): 
     196    def compile_program(self, name, source, dtype, fast=False): 
    192197        if name not in self.compiled: 
    193198            #print("compiling",name) 
    194             self.compiled[name] = compile_model(self.context, source, dtype) 
     199            self.compiled[name] = compile_model(self.context, source, dtype, 
     200                                                fast) 
    195201        return self.compiled[name] 
    196202 
     
    226232    for single and 'd', 'float64' or 'double' for double.  Double precision 
    227233    is an optional extension which may not be available on all devices. 
    228     """ 
    229     def __init__(self, source, info, dtype=generate.F32): 
     234 
     235    *fast* is True if fast inaccurate math is acceptable (40% speed increase) 
     236    """ 
     237    def __init__(self, source, info, dtype=generate.F32, fast=False): 
    230238        self.info = info 
    231239        self.source = source 
    232240        self.dtype = np.dtype(dtype) 
     241        self.fast = fast 
    233242        self.program = None # delay program creation 
    234243 
     
    243252    def __call__(self, q_input): 
    244253        if self.dtype != q_input.dtype: 
    245             raise TypeError("data is %s kernel is %s" % (q_input.dtype, self.dtype)) 
     254            raise TypeError("data is %s kernel is %s" 
     255                            % (q_input.dtype, self.dtype)) 
    246256        if self.program is None: 
    247257            compiler = environment().compile_program 
    248             self.program = compiler(self.info['name'], self.source, self.dtype) 
     258            self.program = compiler(self.info['name'], self.source, self.dtype, 
     259                                    self.fast) 
    249260        kernel_name = generate.kernel_name(self.info, q_input.is_2D) 
    250261        kernel = getattr(self.program, kernel_name) 
     
    347358 
    348359    def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 
    349         real = np.float32 if self.q_input.dtype == generate.F32 else np.float64 
     360        real = (np.float32 if self.q_input.dtype == generate.F32 
     361                else np.float64 if self.q_input.dtype == generate.F64 
     362                else np.float16 if self.q_input.dtype == generate.F16 
     363                else np.float32)  # will never get here, so use np.float32 
    350364 
    351365        device_num = 0 
  • sasmodels/kerneldll.py

    r9404dd3 r5d316e9  
    8585DLL_PATH = tempfile.gettempdir() 
    8686 
    87 ALLOW_SINGLE_PRECISION_DLLS = False 
     87ALLOW_SINGLE_PRECISION_DLLS = True 
    8888 
    8989 
     
    122122    models are allowed as DLLs. 
    123123    """ 
     124    if callable(info.get('Iq',None)): 
     125        return PyModel(info) 
     126 
    124127    dtype = np.dtype(dtype) 
     128    if dtype == generate.F16: 
     129        raise ValueError("16 bit floats not supported") 
    125130    if dtype == generate.F32 and not ALLOW_SINGLE_PRECISION_DLLS: 
    126131        dtype = generate.F64  # Force 64-bit dll 
    127132 
    128     if callable(info.get('Iq',None)): 
    129         return PyModel(info) 
    130  
    131133    if dtype == generate.F32: # 32-bit dll 
    132         source = generate.use_single(source) 
    133134        tempfile_prefix = 'sas_'+info['name']+'32_' 
    134135    elif dtype == generate.F64: 
    135136        tempfile_prefix = 'sas_'+info['name']+'64_' 
    136137    else: 
    137         source = generate.use_long_double(source) 
    138138        tempfile_prefix = 'sas_'+info['name']+'128_' 
    139139 
     140    source = generate.convert_type(source, dtype) 
    140141    source_files = generate.sources(info) + [info['filename']] 
    141142    dll= dll_path(info, dtype) 
Note: See TracChangeset for help on using the changeset viewer.