Changeset d18582e in sasmodels
- Timestamp:
- Feb 4, 2016 12:44:23 PM (9 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 5054e80
- Parents:
- bb6f0f3
- Location:
- sasmodels
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/core.py
reafc9fa rd18582e 73 73 return True 74 74 75 def load_model(model_definition, dtype= "single", platform="ocl"):75 def load_model(model_definition, dtype=None, platform="ocl"): 76 76 """ 77 77 Prepare the model for the default execution platform. … … 87 87 for the calculation. Any valid numpy single or double precision identifier 88 88 is valid, such as 'single', 'f', 'f32', or np.float32 for single, or 89 'double', 'd', 'f64' and np.float64 for double. 89 'double', 'd', 'f64' and np.float64 for double. If *None*, then use 90 'single' unless the model defines single=False. 90 91 91 92 *platform* should be "dll" to force the dll to be used for C models, … … 94 95 if isstr(model_definition): 95 96 model_definition = load_model_definition(model_definition) 97 if dtype is None: 98 dtype = 'single' if getattr(model_definition, 'single', True) else 'double' 96 99 source, info = generate.make(model_definition) 97 100 if callable(info.get('Iq', None)): -
sasmodels/data.py
r5c962df rd18582e 242 242 243 243 244 def empty_data1D(q, resolution=0.0 5):244 def empty_data1D(q, resolution=0.0): 245 245 """ 246 246 Create empty 1D data using the given *q* as the x value. … … 252 252 #dIq = np.sqrt(Iq) 253 253 Iq, dIq = None, None 254 q = np.asarray(q) 254 255 data = Data1D(q, Iq, dx=resolution * q, dy=dIq) 255 256 data.filename = "fake data" … … 257 258 258 259 259 def empty_data2D(qx, qy=None, resolution=0.0 5):260 def empty_data2D(qx, qy=None, resolution=0.0): 260 261 """ 261 262 Create empty 2D data using the given mesh. … … 267 268 if qy is None: 268 269 qy = qx 270 qx, qy = np.asarray(qx), np.asarray(qy) 269 271 # 5% dQ/Q resolution 270 272 Qx, Qy = np.meshgrid(qx, qy) -
sasmodels/direct_model.py
reafc9fa rd18582e 234 234 235 235 model_definition = load_model_definition(model_name) 236 model = load_model(model_definition , dtype='single')236 model = load_model(model_definition) 237 237 calculator = DirectModel(data, model) 238 238 pars = dict((k, float(v)) -
sasmodels/kernelcl.py
reafc9fa rd18582e 172 172 #self.data_boundary = max(d.min_data_type_align_size 173 173 # for d in self.context.devices) 174 self.queues = [cl.CommandQueue( self.context, d)175 for d in self.context.devices]174 self.queues = [cl.CommandQueue(context, context.devices[0]) 175 for context in self.context] 176 176 self.compiled = {} 177 177 … … 181 181 """ 182 182 dtype = generate.F32 if dtype == 'fast' else np.dtype(dtype) 183 return all(has_type(d, dtype) for d in self.context.devices) 183 return any(has_type(d, dtype) 184 for context in self.context 185 for d in context.devices) 186 187 def get_queue(self, dtype): 188 """ 189 Return a command queue for the kernels of type dtype. 190 """ 191 for context, queue in zip(self.context, self.queues): 192 if all(has_type(d, dtype) for d in context.devices): 193 return queue 194 195 def get_context(self, dtype): 196 """ 197 Return a OpenCL context for the kernels of type dtype. 198 """ 199 for context, queue in zip(self.context, self.queues): 200 if all(has_type(d, dtype) for d in context.devices): 201 return context 184 202 185 203 def _create_some_context(self): … … 190 208 """ 191 209 try: 192 self.context = cl.create_some_context(interactive=False)210 self.context = [cl.create_some_context(interactive=False)] 193 211 except Exception as exc: 194 212 warnings.warn(str(exc)) … … 204 222 #print("compiling",name) 205 223 dtype = np.dtype(dtype) 206 program = compile_model(self. context, source, dtype, fast)224 program = compile_model(self.get_context(dtype), source, dtype, fast) 207 225 self.compiled[key] = program 208 226 return self.compiled[key] … … 218 236 def _get_default_context(): 219 237 """ 220 Get an OpenCL context, preferring GPU over CPU. 221 """ 222 default = None 238 Get an OpenCL context, preferring GPU over CPU, and preferring Intel 239 drivers over AMD drivers. 240 """ 241 # Note: on mobile devices there is automatic clock scaling if either the 242 # CPU or the GPU is underutilized; probably doesn't affect us, but we if 243 # it did, it would mean that putting a busy loop on the CPU while the GPU 244 # is running may increase throughput. 245 # 246 # Macbook pro, base install: 247 # {'Apple': [Intel CPU, NVIDIA GPU]} 248 # Macbook pro, base install: 249 # {'Apple': [Intel CPU, Intel GPU]} 250 # 2 x nvidia 295 with Intel and NVIDIA opencl drivers installed 251 # {'Intel': [CPU], 'NVIDIA': [GPU, GPU, GPU, GPU]} 252 gpu, cpu = None, None 223 253 for platform in cl.get_platforms(): 224 254 for device in platform.get_devices(): 225 255 if device.type == cl.device_type.GPU: 226 return cl.Context([device])227 if default is None:228 default= device229 230 if not default:231 raise RuntimeError("OpenCL device not found")232 233 return cl.Context([default])256 gpu = device 257 else: 258 cpu = device 259 single = gpu if gpu is not None else cpu 260 double = gpu if has_type(gpu, np.dtype('double')) else cpu 261 single_context = cl.Context([single]) 262 double_context = cl.Context([double]) if single != double else single_context 263 return single_context, double_context 234 264 235 265 … … 314 344 # architectures tested so far. 315 345 self.q_vectors = [_stretch_input(q, self.dtype, 32) for q in q_vectors] 346 context = env.get_context(self.dtype) 316 347 self.q_buffers = [ 317 cl.Buffer( env.context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q)348 cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q) 318 349 for q in self.q_vectors 319 350 ] … … 363 394 # Note: res may be shorter than res_b if global_size != nq 364 395 env = environment() 365 self.loops_b = [cl.Buffer(env.context, mf.READ_WRITE, 366 2 * MAX_LOOPS * q_input.dtype.itemsize) 367 for _ in env.queues] 368 self.res_b = [cl.Buffer(env.context, mf.READ_WRITE, 369 q_input.global_size[0] * q_input.dtype.itemsize) 370 for _ in env.queues] 396 self.queue = env.get_queue(dtype) 397 self.loops_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 398 2 * MAX_LOOPS * q_input.dtype.itemsize) 399 self.res_b = cl.Buffer(self.queue.context, mf.READ_WRITE, 400 q_input.global_size[0] * q_input.dtype.itemsize) 371 401 self.q_input = q_input 402 403 self._need_release = [self.loops_b, self.res_b, self.q_input] 372 404 373 405 def __call__(self, fixed_pars, pd_pars, cutoff=1e-5): … … 377 409 else np.float32) # will never get here, so use np.float32 378 410 379 device_num = 0 380 queuei = environment().queues[device_num] 381 res_bi = self.res_b[device_num] 411 res_bi = self.res_b 382 412 nq = np.uint32(self.q_input.nq) 383 413 if pd_pars: … … 394 424 raise ValueError("too many polydispersity points") 395 425 396 loops_bi = self.loops_b [device_num]397 cl.enqueue_copy( queuei, loops_bi, loops)426 loops_bi = self.loops_b 427 cl.enqueue_copy(self.queue, loops_bi, loops) 398 428 loops_l = cl.LocalMemory(len(loops.data)) 399 429 #ctx = environment().context … … 404 434 fixed = [real(p) for p in fixed_pars] 405 435 args = self.q_input.q_buffers + [res_bi, nq] + dispersed + fixed 406 self.kernel( queuei, self.q_input.global_size, None, *args)407 cl.enqueue_copy( queuei, self.res, res_bi)436 self.kernel(self.queue, self.q_input.global_size, None, *args) 437 cl.enqueue_copy(self.queue, self.res, res_bi) 408 438 409 439 return self.res … … 413 443 Release resources associated with the kernel. 414 444 """ 415 for b in self.loops_b: 416 b.release() 417 self.loops_b = [] 418 for b in self.res_b: 419 b.release() 420 self.res_b = [] 421 self.q_input.release() 445 for v in self._need_release: 446 v.release() 447 self._need_release = [] 422 448 423 449 def __del__(self): -
sasmodels/models/pearl_necklace.py
rf12357f rd18582e 96 96 97 97 source = ["lib/Si.c", "pearl_necklace.c"] 98 # new flag to let the compiler know to never use single precision 99 single = False 98 single = False # use double precision unless told otherwise 100 99 101 100 def volume(radius, edge_separation, string_thickness, number_of_pearls):
Note: See TracChangeset
for help on using the changeset viewer.