Changeset c9e31e2 in sasmodels for sasmodels/kernelcl.py


Ignore:
Timestamp:
Mar 6, 2015 10:54:22 AM (9 years ago)
Author:
richardh
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
9f6f2f8, ab87a12
Parents:
d60b433 (diff), 3c56da87 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of https://github.com/SasView/sasmodels

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernelcl.py

    r676351f r3c56da87  
    3030try: 
    3131    import pyopencl as cl 
    32 except ImportError,exc: 
     32    # Ask OpenCL for the default context so that we know that one exists 
     33    cl.create_some_context(interactive=False) 
     34except Exception, exc: 
    3335    warnings.warn(str(exc)) 
    3436    raise RuntimeError("OpenCL not available") 
    3537 
    36 try: 
    37     context = cl.create_some_context(interactive=False) 
    38     del context 
    39 except cl.RuntimeError, exc: 
    40     warnings.warn(str(exc)) 
    41     raise RuntimeError("OpenCl not available") 
    42  
    4338from pyopencl import mem_flags as mf 
    4439 
    4540from . import generate 
    46 from .kernelpy import PyInput, PyKernel 
     41from .kernelpy import PyModel 
    4742 
    4843F64_DEFS = """\ 
     
    6863    """ 
    6964    source, info = generate.make(kernel_module) 
     65    if callable(info.get('Iq', None)): 
     66        return PyModel(info) 
    7067    ## for debugging, save source to a .cl file, edit it, and reload as model 
    7168    #open(info['name']+'.cl','w').write(source) 
     
    113110    device.min_data_type_align_size//4. 
    114111    """ 
    115     remainder = vector.size%boundary 
     112    remainder = vector.size % boundary 
    116113    if remainder != 0: 
    117114        size = vector.size + (boundary - remainder) 
    118         vector = np.hstack((vector, [extra]*(size-vector.size))) 
     115        vector = np.hstack((vector, [extra] * (size - vector.size))) 
    119116    return np.ascontiguousarray(vector, dtype=dtype) 
    120117 
     
    129126    """ 
    130127    dtype = np.dtype(dtype) 
    131     if dtype==generate.F64 and not all(has_double(d) for d in context.devices): 
     128    if dtype == generate.F64 and not all(has_double(d) for d in context.devices): 
    132129        raise RuntimeError("Double precision not supported for devices") 
    133130 
     
    138135    if context.devices[0].type == cl.device_type.GPU: 
    139136        header += "#define USE_SINCOS\n" 
    140     program  = cl.Program(context, header+source).build() 
     137    program = cl.Program(context, header + source).build() 
    141138    return program 
    142139 
     
    163160 
    164161        if not self.context: 
    165             self.context = self._find_context() 
     162            self.context = _get_default_context() 
    166163 
    167164        # Byte boundary for data alignment 
     
    176173        try: 
    177174            self.context = cl.create_some_context(interactive=False) 
    178         except Exception,exc: 
     175        except Exception, exc: 
    179176            warnings.warn(str(exc)) 
    180177            warnings.warn("pyopencl.create_some_context() failed") 
    181178            warnings.warn("the environment variable 'PYOPENCL_CTX' might not be set correctly") 
    182  
    183     def _find_context(self): 
    184         default = None 
    185         for platform in cl.get_platforms(): 
    186             for device in platform.get_devices(): 
    187                 if device.type == cl.device_type.GPU: 
    188                     return cl.Context([device]) 
    189                 if default is None: 
    190                     default = device 
    191  
    192         if not default: 
    193             raise RuntimeError("OpenCL device not found") 
    194  
    195         return cl.Context([default]) 
    196179 
    197180    def compile_program(self, name, source, dtype): 
     
    206189            del self.compiled[name] 
    207190 
     191def _get_default_context(): 
     192    default = None 
     193    for platform in cl.get_platforms(): 
     194        for device in platform.get_devices(): 
     195            if device.type == cl.device_type.GPU: 
     196                return cl.Context([device]) 
     197            if default is None: 
     198                default = device 
     199 
     200    if not default: 
     201        raise RuntimeError("OpenCL device not found") 
     202 
     203    return cl.Context([default]) 
     204 
    208205 
    209206class GpuModel(object): 
     
    233230        self.__dict__ = state.copy() 
    234231 
    235     def __call__(self, input): 
    236         # Support pure python kernel call 
    237         if input.is_2D and callable(self.info['Iqxy']): 
    238             return PyKernel(self.info['Iqxy'], self.info, input) 
    239         elif not input.is_2D and callable(self.info['Iq']): 
    240             return PyKernel(self.info['Iq'], self.info, input) 
    241  
    242         if self.dtype != input.dtype: 
     232    def __call__(self, input_value): 
     233        if self.dtype != input_value.dtype: 
    243234            raise TypeError("data and kernel have different types") 
    244235        if self.program is None: 
    245             self.program = environment().compile_program(self.info['name'],self.source, self.dtype) 
    246         kernel_name = generate.kernel_name(self.info, input.is_2D) 
     236            compiler = environment().compile_program 
     237            self.program = compiler(self.info['name'], self.source, self.dtype) 
     238        kernel_name = generate.kernel_name(self.info, input_value.is_2D) 
    247239        kernel = getattr(self.program, kernel_name) 
    248         return GpuKernel(kernel, self.info, input) 
     240        return GpuKernel(kernel, self.info, input_value) 
    249241 
    250242    def release(self): 
     
    261253        ctypes and some may be pure python. 
    262254        """ 
    263         # Support pure python kernel call 
    264         if len(q_vectors) == 1 and callable(self.info['Iq']): 
    265             return PyInput(q_vectors, dtype=self.dtype) 
    266         elif callable(self.info['Iqxy']): 
    267             return PyInput(q_vectors, dtype=self.dtype) 
    268         else: 
    269             return GpuInput(q_vectors, dtype=self.dtype) 
     255        return GpuInput(q_vectors, dtype=self.dtype) 
    270256 
    271257# TODO: check that we don't need a destructor for buffers which go out of scope 
     
    300286        self.q_vectors = [_stretch_input(q, self.dtype, 32) for q in q_vectors] 
    301287        self.q_buffers = [ 
    302             cl.Buffer(env.context,  mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q) 
     288            cl.Buffer(env.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q) 
    303289            for q in self.q_vectors 
    304290        ] 
     
    318304    *info* is the module information 
    319305 
    320     *input* is the DllInput q vectors at which the kernel should be 
     306    *q_input* is the DllInput q vectors at which the kernel should be 
    321307    evaluated. 
    322308 
     
    329315    Call :meth:`release` when done with the kernel instance. 
    330316    """ 
    331     def __init__(self, kernel, info, input): 
    332         self.input = input 
     317    def __init__(self, kernel, info, q_input): 
     318        self.q_input = q_input 
    333319        self.kernel = kernel 
    334320        self.info = info 
    335         self.res = np.empty(input.nq, input.dtype) 
    336         dim = '2d' if input.is_2D else '1d' 
    337         self.fixed_pars = info['partype']['fixed-'+dim] 
    338         self.pd_pars = info['partype']['pd-'+dim] 
     321        self.res = np.empty(q_input.nq, q_input.dtype) 
     322        dim = '2d' if q_input.is_2D else '1d' 
     323        self.fixed_pars = info['partype']['fixed-' + dim] 
     324        self.pd_pars = info['partype']['pd-' + dim] 
    339325 
    340326        # Inputs and outputs for each kernel call 
     
    342328        env = environment() 
    343329        self.loops_b = [cl.Buffer(env.context, mf.READ_WRITE, 
    344                                   2*MAX_LOOPS*input.dtype.itemsize) 
     330                                  2 * MAX_LOOPS * q_input.dtype.itemsize) 
    345331                        for _ in env.queues] 
    346332        self.res_b = [cl.Buffer(env.context, mf.READ_WRITE, 
    347                                 input.global_size[0]*input.dtype.itemsize) 
     333                                q_input.global_size[0] * q_input.dtype.itemsize) 
    348334                      for _ in env.queues] 
    349335 
    350336 
    351     def __call__(self, pars, pd_pars, cutoff=1e-5): 
    352         real = np.float32 if self.input.dtype == generate.F32 else np.float64 
    353         fixed = [real(p) for p in pars] 
    354         cutoff = real(cutoff) 
    355         loops = np.hstack(pd_pars) 
    356         loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 
    357         Nloops = [np.uint32(len(p[0])) for p in pd_pars] 
    358         #print "loops",Nloops, loops 
    359  
    360         #import sys; print >>sys.stderr,"opencl eval",pars 
    361         #print "opencl eval",pars 
    362         if len(loops) > 2*MAX_LOOPS: 
    363             raise ValueError("too many polydispersity points") 
     337    def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 
     338        real = np.float32 if self.q_input.dtype == generate.F32 else np.float64 
     339 
    364340        device_num = 0 
     341        queuei = environment().queues[device_num] 
    365342        res_bi = self.res_b[device_num] 
    366         queuei = environment().queues[device_num] 
    367         loops_bi = self.loops_b[device_num] 
    368         loops_l = cl.LocalMemory(len(loops.data)) 
    369         cl.enqueue_copy(queuei, loops_bi, loops) 
    370         #ctx = environment().context 
    371         #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 
    372         args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + Nloops 
    373         self.kernel(queuei, self.input.global_size, None, *args) 
     343        nq = np.uint32(self.q_input.nq) 
     344        if pd_pars: 
     345            cutoff = real(cutoff) 
     346            loops_N = [np.uint32(len(p[0])) for p in pd_pars] 
     347            loops = np.hstack(pd_pars) \ 
     348                if pd_pars else np.empty(0, dtype=self.q_input.dtype) 
     349            loops = np.ascontiguousarray(loops.T, self.q_input.dtype).flatten() 
     350            #print "loops",Nloops, loops 
     351 
     352            #import sys; print >>sys.stderr,"opencl eval",pars 
     353            #print "opencl eval",pars 
     354            if len(loops) > 2 * MAX_LOOPS: 
     355                raise ValueError("too many polydispersity points") 
     356 
     357            loops_bi = self.loops_b[device_num] 
     358            cl.enqueue_copy(queuei, loops_bi, loops) 
     359            loops_l = cl.LocalMemory(len(loops.data)) 
     360            #ctx = environment().context 
     361            #loops_bi = cl.Buffer(ctx, mf.READ_ONLY|mf.COPY_HOST_PTR, hostbuf=loops) 
     362            dispersed = [loops_bi, loops_l, cutoff] + loops_N 
     363        else: 
     364            dispersed = [] 
     365        fixed = [real(p) for p in fixed_pars] 
     366        args = self.q_input.q_buffers + [res_bi, nq] + dispersed + fixed 
     367        self.kernel(queuei, self.q_input.global_size, None, *args) 
    374368        cl.enqueue_copy(queuei, self.res, res_bi) 
    375369 
Note: See TracChangeset for help on using the changeset viewer.