Changeset f734e7d in sasmodels for sasmodels/kernelcl.py


Ignore:
Timestamp:
Feb 22, 2015 1:44:54 AM (9 years ago)
Author:
pkienzle
Branches:
master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
6137124
Parents:
711d8e2
Message:

restructure c code generation for maintainability; extend test harness to allow opencl and ctypes tests

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kernelcl.py

    rf1ecfa92 rf734e7d  
    4444 
    4545from . import generate 
    46 from .kernelpy import PyInput, PyKernel 
     46from .kernelpy import PyInput, PyModel 
    4747 
    4848F64_DEFS = """\ 
     
    6868    """ 
    6969    source, info = generate.make(kernel_module) 
     70    if callable(info.get('Iq',None)): 
     71        return PyModel(info) 
    7072    ## for debugging, save source to a .cl file, edit it, and reload as model 
    7173    #open(info['name']+'.cl','w').write(source) 
     
    234236 
    235237    def __call__(self, input): 
    236         # Support pure python kernel call 
    237         if input.is_2D and callable(self.info['Iqxy']): 
    238             return PyKernel(self.info['Iqxy'], self.info, input) 
    239         elif not input.is_2D and callable(self.info['Iq']): 
    240             return PyKernel(self.info['Iq'], self.info, input) 
    241  
    242238        if self.dtype != input.dtype: 
    243239            raise TypeError("data and kernel have different types") 
     
    261257        ctypes and some may be pure python. 
    262258        """ 
    263         # Support pure python kernel call 
    264         if len(q_vectors) == 1 and callable(self.info['Iq']): 
    265             return PyInput(q_vectors, dtype=self.dtype) 
    266         elif callable(self.info['Iqxy']): 
    267             return PyInput(q_vectors, dtype=self.dtype) 
    268         else: 
    269             return GpuInput(q_vectors, dtype=self.dtype) 
     259        return GpuInput(q_vectors, dtype=self.dtype) 
    270260 
    271261# TODO: check that we don't need a destructor for buffers which go out of scope 
     
    349339 
    350340 
    351     def __call__(self, pars, pd_pars, cutoff=1e-5): 
     341    def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 
    352342        real = np.float32 if self.input.dtype == generate.F32 else np.float64 
    353         fixed = [real(p) for p in pars] 
    354         cutoff = real(cutoff) 
    355         loops = np.hstack(pd_pars) if pd_pars else np.empty(0,dtype=self.input.dtype) 
    356         loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 
    357         Nloops = [np.uint32(len(p[0])) for p in pd_pars] 
    358         #print "loops",Nloops, loops 
    359  
    360         #import sys; print >>sys.stderr,"opencl eval",pars 
    361         #print "opencl eval",pars 
    362         if len(loops) > 2*MAX_LOOPS: 
    363             raise ValueError("too many polydispersity points") 
     343 
    364344        device_num = 0 
     345        queuei = environment().queues[device_num] 
    365346        res_bi = self.res_b[device_num] 
    366         queuei = environment().queues[device_num] 
    367         loops_bi = self.loops_b[device_num] 
    368         loops_l = cl.LocalMemory(len(loops.data)) 
    369         cl.enqueue_copy(queuei, loops_bi, loops) 
    370         #ctx = environment().context 
    371         #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 
    372         args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + Nloops 
     347        nq = np.uint32(self.input.nq) 
     348        if pd_pars: 
     349            cutoff = real(cutoff) 
     350            loops_N = [np.uint32(len(p[0])) for p in pd_pars] 
     351            loops = np.hstack(pd_pars) if pd_pars else np.empty(0,dtype=self.input.dtype) 
     352            loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 
     353            #print "loops",Nloops, loops 
     354 
     355            #import sys; print >>sys.stderr,"opencl eval",pars 
     356            #print "opencl eval",pars 
     357            if len(loops) > 2*MAX_LOOPS: 
     358                raise ValueError("too many polydispersity points") 
     359 
     360            loops_bi = self.loops_b[device_num] 
     361            cl.enqueue_copy(queuei, loops_bi, loops) 
     362            loops_l = cl.LocalMemory(len(loops.data)) 
     363            #ctx = environment().context 
     364            #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 
     365            dispersed = [loops_bi, loops_l, cutoff] + loops_N 
     366        else: 
     367            dispersed = [] 
     368        fixed = [real(p) for p in fixed_pars] 
     369        args = self.input.q_buffers + [res_bi, nq] + dispersed + fixed 
    373370        self.kernel(queuei, self.input.global_size, None, *args) 
    374371        cl.enqueue_copy(queuei, self.res, res_bi) 
Note: See TracChangeset for help on using the changeset viewer.