Changeset eafc9fa in sasmodels for sasmodels/kernelcl.py
- Timestamp:
- Jan 30, 2016 7:39:59 PM (8 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 5925e90
- Parents:
- 823e620
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kernelcl.py
r823e620 reafc9fa 1 1 """ 2 GPU support through OpenCL2 GPU driver for C kernels 3 3 4 4 There should be a single GPU environment running on the system. This … … 152 152 153 153 154 def make_result(self, size):155 self.res = np.empty(size, dtype=self.dtype)156 self.res_b = cl.Buffer(self.program.context, mf.READ_WRITE, self.res.nbytes)157 return self.res, self.res_b158 159 160 154 # for now, this returns one device in the context 161 155 # TODO: create a context that contains all devices on all platforms … … 183 177 184 178 def has_type(self, dtype): 179 """ 180 Return True if all devices support a given type. 181 """ 185 182 dtype = generate.F32 if dtype == 'fast' else np.dtype(dtype) 186 183 return all(has_type(d, dtype) for d in self.context.devices) 187 184 188 185 def _create_some_context(self): 186 """ 187 Protected call to cl.create_some_context without interactivity. Use 188 this if PYOPENCL_CTX is set in the environment. Sets the *context* 189 attribute. 190 """ 189 191 try: 190 192 self.context = cl.create_some_context(interactive=False) … … 195 197 196 198 def compile_program(self, name, source, dtype, fast=False): 199 """ 200 Compile the program for the device in the given context. 201 """ 197 202 key = "%s-%s-%s"%(name, dtype, fast) 198 203 if key not in self.compiled: … … 204 209 205 210 def release_program(self, name): 211 """ 212 Free memory associated with the program on the device. 213 """ 206 214 if name in self.compiled: 207 215 self.compiled[name].release() … … 209 217 210 218 def _get_default_context(): 219 """ 220 Get an OpenCL context, preferring GPU over CPU. 221 """ 211 222 default = None 212 223 for platform in cl.get_platforms(): … … 246 257 247 258 def __getstate__(self): 248 state = self.__dict__.copy() 249 state['program'] = None 250 return state 259 return self.info, self.source, self.dtype, self.fast 251 260 252 261 def __setstate__(self, state): 253 self.__dict__ = state.copy() 254 255 def __call__(self, q_input): 256 if self.dtype != q_input.dtype: 257 raise TypeError("data is %s kernel is %s" 258 % (q_input.dtype, self.dtype)) 262 self.info, self.source, self.dtype, self.fast = state 263 self.program = None 264 265 def __call__(self, q_vectors): 259 266 if self.program is None: 260 267 compiler = environment().compile_program 261 268 self.program = compiler(self.info['name'], self.source, self.dtype, 262 269 self.fast) 263 kernel_name = generate.kernel_name(self.info, q_input.is_2D) 270 is_2d = len(q_vectors) == 2 271 kernel_name = generate.kernel_name(self.info, is_2d) 264 272 kernel = getattr(self.program, kernel_name) 265 return GpuKernel(kernel, self.info, q_ input)273 return GpuKernel(kernel, self.info, q_vectors, self.dtype) 266 274 267 275 def release(self): 276 """ 277 Free the resources associated with the model. 278 """ 268 279 if self.program is not None: 269 280 environment().release_program(self.info['name']) 270 281 self.program = None 271 282 272 def make_input(self, q_vectors): 273 """ 274 Make q input vectors available to the model. 275 276 Note that each model needs its own q vector even if the case of 277 mixture models because some models may be OpenCL, some may be 278 ctypes and some may be pure python. 279 """ 280 return GpuInput(q_vectors, dtype=self.dtype) 283 def __del__(self): 284 self.release() 281 285 282 286 # TODO: check that we don't need a destructor for buffers which go out of scope … … 304 308 self.nq = q_vectors[0].size 305 309 self.dtype = np.dtype(dtype) 306 self.is_2 D= (len(q_vectors) == 2)310 self.is_2d = (len(q_vectors) == 2) 307 311 # TODO: stretch input based on get_warp() 308 312 # not doing it now since warp depends on kernel, which is not known … … 317 321 318 322 def release(self): 323 """ 324 Free the memory. 325 """ 319 326 for b in self.q_buffers: 320 327 b.release() 321 328 self.q_buffers = [] 322 329 330 def __del__(self): 331 self.release() 332 323 333 class GpuKernel(object): 324 334 """ 325 335 Callable SAS kernel. 326 336 327 *kernel* is the GpuKernel object to call .337 *kernel* is the GpuKernel object to call 328 338 329 339 *info* is the module information 330 340 331 *q_input* is the DllInput q vectors at which the kernel should be 332 evaluated. 341 *q_vectors* is the q vectors at which the kernel should be evaluated 342 343 *dtype* is the kernel precision 333 344 334 345 The resulting call method takes the *pars*, a list of values for … … 340 351 Call :meth:`release` when done with the kernel instance. 341 352 """ 342 def __init__(self, kernel, info, q_ input):343 self.q_input = q_input353 def __init__(self, kernel, info, q_vectors, dtype): 354 q_input = GpuInput(q_vectors, dtype) 344 355 self.kernel = kernel 345 356 self.info = info 346 357 self.res = np.empty(q_input.nq, q_input.dtype) 347 dim = '2d' if q_input.is_2 Delse '1d'358 dim = '2d' if q_input.is_2d else '1d' 348 359 self.fixed_pars = info['partype']['fixed-' + dim] 349 360 self.pd_pars = info['partype']['pd-' + dim] … … 358 369 q_input.global_size[0] * q_input.dtype.itemsize) 359 370 for _ in env.queues] 360 371 self.q_input = q_input 361 372 362 373 def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): … … 399 410 400 411 def release(self): 412 """ 413 Release resources associated with the kernel. 414 """ 401 415 for b in self.loops_b: 402 416 b.release() … … 405 419 b.release() 406 420 self.res_b = [] 421 self.q_input.release() 407 422 408 423 def __del__(self):
Note: See TracChangeset
for help on using the changeset viewer.