Changeset c9e31e2 in sasmodels for sasmodels/kernelcl.py
- Timestamp:
- Mar 6, 2015 10:54:22 AM (9 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 9f6f2f8, ab87a12
- Parents:
- d60b433 (diff), 3c56da87 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
r676351f r3c56da87 30 30 try: 31 31 import pyopencl as cl 32 except ImportError,exc: 32 # Ask OpenCL for the default context so that we know that one exists 33 cl.create_some_context(interactive=False) 34 except Exception, exc: 33 35 warnings.warn(str(exc)) 34 36 raise RuntimeError("OpenCL not available") 35 37 36 try:37 context = cl.create_some_context(interactive=False)38 del context39 except cl.RuntimeError, exc:40 warnings.warn(str(exc))41 raise RuntimeError("OpenCl not available")42 43 38 from pyopencl import mem_flags as mf 44 39 45 40 from . import generate 46 from .kernelpy import Py Input, PyKernel41 from .kernelpy import PyModel 47 42 48 43 F64_DEFS = """\ … … 68 63 """ 69 64 source, info = generate.make(kernel_module) 65 if callable(info.get('Iq', None)): 66 return PyModel(info) 70 67 ## for debugging, save source to a .cl file, edit it, and reload as model 71 68 #open(info['name']+'.cl','w').write(source) … … 113 110 device.min_data_type_align_size//4. 114 111 """ 115 remainder = vector.size %boundary112 remainder = vector.size % boundary 116 113 if remainder != 0: 117 114 size = vector.size + (boundary - remainder) 118 vector = np.hstack((vector, [extra] *(size-vector.size)))115 vector = np.hstack((vector, [extra] * (size - vector.size))) 119 116 return np.ascontiguousarray(vector, dtype=dtype) 120 117 … … 129 126 """ 130 127 dtype = np.dtype(dtype) 131 if dtype ==generate.F64 and not all(has_double(d) for d in context.devices):128 if dtype == generate.F64 and not all(has_double(d) for d in context.devices): 132 129 raise RuntimeError("Double precision not supported for devices") 133 130 … … 138 135 if context.devices[0].type == cl.device_type.GPU: 139 136 header += "#define USE_SINCOS\n" 140 program = cl.Program(context, header+source).build()137 program = cl.Program(context, header + source).build() 141 138 return program 142 139 … … 163 160 164 161 if not self.context: 165 self.context = self._find_context()162 self.context = _get_default_context() 166 163 167 164 # Byte boundary for data alignment … … 176 173 try: 177 174 self.context = cl.create_some_context(interactive=False) 178 except Exception, exc:175 except Exception, exc: 179 176 warnings.warn(str(exc)) 180 177 warnings.warn("pyopencl.create_some_context() failed") 181 178 warnings.warn("the environment variable 'PYOPENCL_CTX' might not be set correctly") 182 183 def _find_context(self):184 default = None185 for platform in cl.get_platforms():186 for device in platform.get_devices():187 if device.type == cl.device_type.GPU:188 return cl.Context([device])189 if default is None:190 default = device191 192 if not default:193 raise RuntimeError("OpenCL device not found")194 195 return cl.Context([default])196 179 197 180 def compile_program(self, name, source, dtype): … … 206 189 del self.compiled[name] 207 190 191 def _get_default_context(): 192 default = None 193 for platform in cl.get_platforms(): 194 for device in platform.get_devices(): 195 if device.type == cl.device_type.GPU: 196 return cl.Context([device]) 197 if default is None: 198 default = device 199 200 if not default: 201 raise RuntimeError("OpenCL device not found") 202 203 return cl.Context([default]) 204 208 205 209 206 class GpuModel(object): … … 233 230 self.__dict__ = state.copy() 234 231 235 def __call__(self, input): 236 # Support pure python kernel call 237 if input.is_2D and callable(self.info['Iqxy']): 238 return PyKernel(self.info['Iqxy'], self.info, input) 239 elif not input.is_2D and callable(self.info['Iq']): 240 return PyKernel(self.info['Iq'], self.info, input) 241 242 if self.dtype != input.dtype: 232 def __call__(self, input_value): 233 if self.dtype != input_value.dtype: 243 234 raise TypeError("data and kernel have different types") 244 235 if self.program is None: 245 self.program = environment().compile_program(self.info['name'],self.source, self.dtype) 246 kernel_name = generate.kernel_name(self.info, input.is_2D) 236 compiler = environment().compile_program 237 self.program = compiler(self.info['name'], self.source, self.dtype) 238 kernel_name = generate.kernel_name(self.info, input_value.is_2D) 247 239 kernel = getattr(self.program, kernel_name) 248 return GpuKernel(kernel, self.info, input )240 return GpuKernel(kernel, self.info, input_value) 249 241 250 242 def release(self): … … 261 253 ctypes and some may be pure python. 262 254 """ 263 # Support pure python kernel call 264 if len(q_vectors) == 1 and callable(self.info['Iq']): 265 return PyInput(q_vectors, dtype=self.dtype) 266 elif callable(self.info['Iqxy']): 267 return PyInput(q_vectors, dtype=self.dtype) 268 else: 269 return GpuInput(q_vectors, dtype=self.dtype) 255 return GpuInput(q_vectors, dtype=self.dtype) 270 256 271 257 # TODO: check that we don't need a destructor for buffers which go out of scope … … 300 286 self.q_vectors = [_stretch_input(q, self.dtype, 32) for q in q_vectors] 301 287 self.q_buffers = [ 302 cl.Buffer(env.context, 288 cl.Buffer(env.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q) 303 289 for q in self.q_vectors 304 290 ] … … 318 304 *info* is the module information 319 305 320 * input* is the DllInput q vectors at which the kernel should be306 *q_input* is the DllInput q vectors at which the kernel should be 321 307 evaluated. 322 308 … … 329 315 Call :meth:`release` when done with the kernel instance. 330 316 """ 331 def __init__(self, kernel, info, input):332 self. input =input317 def __init__(self, kernel, info, q_input): 318 self.q_input = q_input 333 319 self.kernel = kernel 334 320 self.info = info 335 self.res = np.empty( input.nq,input.dtype)336 dim = '2d' if input.is_2D else '1d'337 self.fixed_pars = info['partype']['fixed-' +dim]338 self.pd_pars = info['partype']['pd-' +dim]321 self.res = np.empty(q_input.nq, q_input.dtype) 322 dim = '2d' if q_input.is_2D else '1d' 323 self.fixed_pars = info['partype']['fixed-' + dim] 324 self.pd_pars = info['partype']['pd-' + dim] 339 325 340 326 # Inputs and outputs for each kernel call … … 342 328 env = environment() 343 329 self.loops_b = [cl.Buffer(env.context, mf.READ_WRITE, 344 2 *MAX_LOOPS*input.dtype.itemsize)330 2 * MAX_LOOPS * q_input.dtype.itemsize) 345 331 for _ in env.queues] 346 332 self.res_b = [cl.Buffer(env.context, mf.READ_WRITE, 347 input.global_size[0]*input.dtype.itemsize)333 q_input.global_size[0] * q_input.dtype.itemsize) 348 334 for _ in env.queues] 349 335 350 336 351 def __call__(self, pars, pd_pars, cutoff=1e-5): 352 real = np.float32 if self.input.dtype == generate.F32 else np.float64 353 fixed = [real(p) for p in pars] 354 cutoff = real(cutoff) 355 loops = np.hstack(pd_pars) 356 loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 357 Nloops = [np.uint32(len(p[0])) for p in pd_pars] 358 #print "loops",Nloops, loops 359 360 #import sys; print >>sys.stderr,"opencl eval",pars 361 #print "opencl eval",pars 362 if len(loops) > 2*MAX_LOOPS: 363 raise ValueError("too many polydispersity points") 337 def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): 338 real = np.float32 if self.q_input.dtype == generate.F32 else np.float64 339 364 340 device_num = 0 341 queuei = environment().queues[device_num] 365 342 res_bi = self.res_b[device_num] 366 queuei = environment().queues[device_num] 367 loops_bi = self.loops_b[device_num] 368 loops_l = cl.LocalMemory(len(loops.data)) 369 cl.enqueue_copy(queuei, loops_bi, loops) 370 #ctx = environment().context 371 #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 372 args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + Nloops 373 self.kernel(queuei, self.input.global_size, None, *args) 343 nq = np.uint32(self.q_input.nq) 344 if pd_pars: 345 cutoff = real(cutoff) 346 loops_N = [np.uint32(len(p[0])) for p in pd_pars] 347 loops = np.hstack(pd_pars) \ 348 if pd_pars else np.empty(0, dtype=self.q_input.dtype) 349 loops = np.ascontiguousarray(loops.T, self.q_input.dtype).flatten() 350 #print "loops",Nloops, loops 351 352 #import sys; print >>sys.stderr,"opencl eval",pars 353 #print "opencl eval",pars 354 if len(loops) > 2 * MAX_LOOPS: 355 raise ValueError("too many polydispersity points") 356 357 loops_bi = self.loops_b[device_num] 358 cl.enqueue_copy(queuei, loops_bi, loops) 359 loops_l = cl.LocalMemory(len(loops.data)) 360 #ctx = environment().context 361 #loops_bi = cl.Buffer(ctx, mf.READ_ONLY|mf.COPY_HOST_PTR, hostbuf=loops) 362 dispersed = [loops_bi, loops_l, cutoff] + loops_N 363 else: 364 dispersed = [] 365 fixed = [real(p) for p in fixed_pars] 366 args = self.q_input.q_buffers + [res_bi, nq] + dispersed + fixed 367 self.kernel(queuei, self.q_input.global_size, None, *args) 374 368 cl.enqueue_copy(queuei, self.res, res_bi) 375 369
Note: See TracChangeset
for help on using the changeset viewer.