Changeset f734e7d in sasmodels for sasmodels/kernelcl.py
- Timestamp:
- Feb 22, 2015 1:44:54 AM (9 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 6137124
- Parents:
- 711d8e2
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
rf1ecfa92 rf734e7d 44 44 45 45 from . import generate 46 from .kernelpy import PyInput, Py Kernel46 from .kernelpy import PyInput, PyModel 47 47 48 48 F64_DEFS = """\ … … 68 68 """ 69 69 source, info = generate.make(kernel_module) 70 if callable(info.get('Iq',None)): 71 return PyModel(info) 70 72 ## for debugging, save source to a .cl file, edit it, and reload as model 71 73 #open(info['name']+'.cl','w').write(source) … … 234 236 235 237 def __call__(self, input): 236 # Support pure python kernel call237 if input.is_2D and callable(self.info['Iqxy']):238 return PyKernel(self.info['Iqxy'], self.info, input)239 elif not input.is_2D and callable(self.info['Iq']):240 return PyKernel(self.info['Iq'], self.info, input)241 242 238 if self.dtype != input.dtype: 243 239 raise TypeError("data and kernel have different types") … … 261 257 ctypes and some may be pure python. 262 258 """ 263 # Support pure python kernel call 264 if len(q_vectors) == 1 and callable(self.info['Iq']): 265 return PyInput(q_vectors, dtype=self.dtype) 266 elif callable(self.info['Iqxy']): 267 return PyInput(q_vectors, dtype=self.dtype) 268 else: 269 return GpuInput(q_vectors, dtype=self.dtype) 259 return GpuInput(q_vectors, dtype=self.dtype) 270 260 271 261 # TODO: check that we don't need a destructor for buffers which go out of scope … … 349 339 350 340 351 def __call__(self, pars, pd_pars, cutoff=1e-5):341 def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 352 342 real = np.float32 if self.input.dtype == generate.F32 else np.float64 353 fixed = [real(p) for p in pars] 354 cutoff = real(cutoff) 355 loops = np.hstack(pd_pars) if pd_pars else np.empty(0,dtype=self.input.dtype) 356 loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 357 Nloops = [np.uint32(len(p[0])) for p in pd_pars] 358 #print "loops",Nloops, loops 359 360 #import sys; print >>sys.stderr,"opencl eval",pars 361 #print "opencl eval",pars 362 if len(loops) > 2*MAX_LOOPS: 363 raise ValueError("too many polydispersity points") 343 364 344 device_num = 0 345 queuei = environment().queues[device_num] 365 346 res_bi = self.res_b[device_num] 366 queuei = environment().queues[device_num] 367 loops_bi = self.loops_b[device_num] 368 loops_l = cl.LocalMemory(len(loops.data)) 369 cl.enqueue_copy(queuei, loops_bi, loops) 370 #ctx = environment().context 371 #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 372 args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + Nloops 347 nq = np.uint32(self.input.nq) 348 if pd_pars: 349 cutoff = real(cutoff) 350 loops_N = [np.uint32(len(p[0])) for p in pd_pars] 351 loops = np.hstack(pd_pars) if pd_pars else np.empty(0,dtype=self.input.dtype) 352 loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 353 #print "loops",Nloops, loops 354 355 #import sys; print >>sys.stderr,"opencl eval",pars 356 #print "opencl eval",pars 357 if len(loops) > 2*MAX_LOOPS: 358 raise ValueError("too many polydispersity points") 359 360 loops_bi = self.loops_b[device_num] 361 cl.enqueue_copy(queuei, loops_bi, loops) 362 loops_l = cl.LocalMemory(len(loops.data)) 363 #ctx = environment().context 364 #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 365 dispersed = [loops_bi, loops_l, cutoff] + loops_N 366 else: 367 dispersed = [] 368 fixed = [real(p) for p in fixed_pars] 369 args = self.input.q_buffers + [res_bi, nq] + dispersed + fixed 373 370 self.kernel(queuei, self.input.global_size, None, *args) 374 371 cl.enqueue_copy(queuei, self.res, res_bi)
Note: See TracChangeset
for help on using the changeset viewer.