Changeset 3c56da87 in sasmodels for sasmodels/kernelcl.py
- Timestamp:
- Mar 4, 2015 10:55:38 PM (9 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 3a45c2c
- Parents:
- b89f519
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
rc85db69 r3c56da87 30 30 try: 31 31 import pyopencl as cl 32 context = cl.create_some_context(interactive=False)33 del context32 # Ask OpenCL for the default context so that we know that one exists 33 cl.create_some_context(interactive=False) 34 34 except Exception, exc: 35 35 warnings.warn(str(exc)) … … 160 160 161 161 if not self.context: 162 self.context = self._find_context()162 self.context = _get_default_context() 163 163 164 164 # Byte boundary for data alignment … … 178 178 warnings.warn("the environment variable 'PYOPENCL_CTX' might not be set correctly") 179 179 180 def _find_context(self):181 default = None182 for platform in cl.get_platforms():183 for device in platform.get_devices():184 if device.type == cl.device_type.GPU:185 return cl.Context([device])186 if default is None:187 default = device188 189 if not default:190 raise RuntimeError("OpenCL device not found")191 192 return cl.Context([default])193 194 180 def compile_program(self, name, source, dtype): 195 181 if name not in self.compiled: … … 203 189 del self.compiled[name] 204 190 191 def _get_default_context(): 192 default = None 193 for platform in cl.get_platforms(): 194 for device in platform.get_devices(): 195 if device.type == cl.device_type.GPU: 196 return cl.Context([device]) 197 if default is None: 198 default = device 199 200 if not default: 201 raise RuntimeError("OpenCL device not found") 202 203 return cl.Context([default]) 204 205 205 206 206 class GpuModel(object): … … 234 234 raise TypeError("data and kernel have different types") 235 235 if self.program is None: 236 self.program = environment().compile_program(self.info['name'], self.source, self.dtype) 236 compiler = environment().compile_program 237 self.program = compiler(self.info['name'], self.source, self.dtype) 237 238 kernel_name = generate.kernel_name(self.info, input_value.is_2D) 238 239 kernel = getattr(self.program, kernel_name) … … 303 304 *info* is the module information 304 305 305 * input* is the DllInput q vectors at which the kernel should be306 *q_input* is the DllInput q vectors at which the kernel should be 306 307 evaluated. 307 308 … … 314 315 Call :meth:`release` when done with the kernel instance. 315 316 """ 316 def __init__(self, kernel, info, input):317 self. input =input317 def __init__(self, kernel, info, q_input): 318 self.q_input = q_input 318 319 self.kernel = kernel 319 320 self.info = info 320 self.res = np.empty( input.nq,input.dtype)321 dim = '2d' if input.is_2D else '1d'321 self.res = np.empty(q_input.nq, q_input.dtype) 322 dim = '2d' if q_input.is_2D else '1d' 322 323 self.fixed_pars = info['partype']['fixed-' + dim] 323 324 self.pd_pars = info['partype']['pd-' + dim] … … 327 328 env = environment() 328 329 self.loops_b = [cl.Buffer(env.context, mf.READ_WRITE, 329 2 * MAX_LOOPS * input.dtype.itemsize)330 2 * MAX_LOOPS * q_input.dtype.itemsize) 330 331 for _ in env.queues] 331 332 self.res_b = [cl.Buffer(env.context, mf.READ_WRITE, 332 input.global_size[0] *input.dtype.itemsize)333 q_input.global_size[0] * q_input.dtype.itemsize) 333 334 for _ in env.queues] 334 335 335 336 336 337 def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 337 real = np.float32 if self. input.dtype == generate.F32 else np.float64338 real = np.float32 if self.q_input.dtype == generate.F32 else np.float64 338 339 339 340 device_num = 0 340 341 queuei = environment().queues[device_num] 341 342 res_bi = self.res_b[device_num] 342 nq = np.uint32(self. input.nq)343 nq = np.uint32(self.q_input.nq) 343 344 if pd_pars: 344 345 cutoff = real(cutoff) 345 346 loops_N = [np.uint32(len(p[0])) for p in pd_pars] 346 loops = np.hstack(pd_pars) if pd_pars else np.empty(0, dtype=self.input.dtype) 347 loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 347 loops = np.hstack(pd_pars) \ 348 if pd_pars else np.empty(0, dtype=self.q_input.dtype) 349 loops = np.ascontiguousarray(loops.T, self.q_input.dtype).flatten() 348 350 #print "loops",Nloops, loops 349 351 … … 357 359 loops_l = cl.LocalMemory(len(loops.data)) 358 360 #ctx = environment().context 359 #loops_bi = cl.Buffer(ctx, mf.READ_ONLY |mf.COPY_HOST_PTR, hostbuf=loops)361 #loops_bi = cl.Buffer(ctx, mf.READ_ONLY|mf.COPY_HOST_PTR, hostbuf=loops) 360 362 dispersed = [loops_bi, loops_l, cutoff] + loops_N 361 363 else: 362 364 dispersed = [] 363 365 fixed = [real(p) for p in fixed_pars] 364 args = self. input.q_buffers + [res_bi, nq] + dispersed + fixed365 self.kernel(queuei, self. input.global_size, None, *args)366 args = self.q_input.q_buffers + [res_bi, nq] + dispersed + fixed 367 self.kernel(queuei, self.q_input.global_size, None, *args) 366 368 cl.enqueue_copy(queuei, self.res, res_bi) 367 369
Note: See TracChangeset
for help on using the changeset viewer.