Changeset 5d4777d in sasmodels for sasmodels/gpu.py


Ignore:
Timestamp:
Sep 1, 2014 11:24:38 PM (10 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
f4cf580
Parents:
ff7119b
Message:

reorganize, check and update models

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/gpu.py

    rff7119b r5d4777d  
    2525 
    2626""" 
    27 import warnings 
    28  
    2927import numpy as np 
    3028import pyopencl as cl 
     
    3230 
    3331from . import gen 
    34  
    35 from .gen import F32, F64 
    3632 
    3733F32_DEFS = """\ 
     
    5248# larger than necessary given that cost grows as npts^k where k is the number 
    5349# of polydisperse parameters. 
    54 MAX_LOOPS = 1024 
     50MAX_LOOPS = 2048 
     51 
     52def load_model(kernel_module, dtype="single"): 
     53    """ 
     54    Load the OpenCL model defined by *kernel_module*. 
     55 
     56    Access to the OpenCL device is delayed until the kernel is called 
     57    so models can be defined without using too many resources. 
     58    """ 
     59    source, info = gen.make(kernel_module) 
     60    ## for debugging, save source to a .cl file, edit it, and reload as model 
     61    open(info['name']+'.cl','w').write(source) 
     62    #source = open(info['name']+'.cl','r').read() 
     63    return GpuModel(source, info, dtype) 
    5564 
    5665ENV = None 
     
    103112    """ 
    104113    dtype = np.dtype(dtype) 
    105     if dtype==F64 and not all(has_double(d) for d in context.devices): 
     114    if dtype==gen.F64 and not all(has_double(d) for d in context.devices): 
    106115        raise RuntimeError("Double precision not supported for devices") 
    107116 
    108     header = F64_DEFS if dtype == F64 else F32_DEFS 
     117    header = F64_DEFS if dtype == gen.F64 else F32_DEFS 
    109118    # Note: USE_SINCOS makes the intel cpu slower under opencl 
    110119    if context.devices[0].type == cl.device_type.GPU: 
     
    158167    is an optional extension which may not be available on all devices. 
    159168    """ 
    160     def __init__(self, source, info, dtype=F32): 
     169    def __init__(self, source, info, dtype=gen.F32): 
    161170        self.info = info 
    162171        self.source = source 
     
    221230    buffer will be released when the data object is freed. 
    222231    """ 
    223     def __init__(self, q_vectors, dtype=F32): 
     232    def __init__(self, q_vectors, dtype=gen.F32): 
    224233        env = environment() 
    225234        self.nq = q_vectors[0].size 
     
    273282        env = environment() 
    274283        self.loops_b = [cl.Buffer(env.context, mf.READ_WRITE, 
    275                                   MAX_LOOPS*input.dtype.itemsize) 
     284                                  2*MAX_LOOPS*input.dtype.itemsize) 
    276285                        for _ in env.queues] 
    277286        self.res_b = [cl.Buffer(env.context, mf.READ_WRITE, 
     
    281290 
    282291    def __call__(self, pars, pd_pars, cutoff=1e-5): 
    283         real = np.float32 if self.input.dtype == F32 else np.float64 
     292        real = np.float32 if self.input.dtype == gen.F32 else np.float64 
    284293        fixed = [real(p) for p in pars] 
    285294        cutoff = real(cutoff) 
    286295        loops = np.hstack(pd_pars) 
    287296        loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 
    288         loops_N = [np.uint32(len(p[0])) for p in pd_pars] 
     297        Nloops = [np.uint32(len(p[0])) for p in pd_pars] 
     298        #print "loops",Nloops, loops 
    289299 
    290300        #import sys; print >>sys.stderr,"opencl eval",pars 
    291301        #print "opencl eval",pars 
    292         if len(loops) > MAX_LOOPS: 
     302        if len(loops) > 2*MAX_LOOPS: 
    293303            raise ValueError("too many polydispersity points") 
    294304        device_num = 0 
     
    300310        #ctx = environment().context 
    301311        #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 
    302         args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + loops_N 
     312        args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + Nloops 
    303313        self.kernel(queuei, self.input.global_size, None, *args) 
    304314        cl.enqueue_copy(queuei, self.res, res_bi) 
Note: See TracChangeset for help on using the changeset viewer.