Changeset 5d4777d in sasmodels for sasmodels/gpu.py
- Timestamp:
- Sep 1, 2014 11:24:38 PM (10 years ago)
- Branches:
- master, core_shell_microgels, costrafo411, magnetic_model, release_v0.94, release_v0.95, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- f4cf580
- Parents:
- ff7119b
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/gpu.py
rff7119b r5d4777d 25 25 26 26 """ 27 import warnings28 29 27 import numpy as np 30 28 import pyopencl as cl … … 32 30 33 31 from . import gen 34 35 from .gen import F32, F6436 32 37 33 F32_DEFS = """\ … … 52 48 # larger than necessary given that cost grows as npts^k where k is the number 53 49 # of polydisperse parameters. 54 MAX_LOOPS = 1024 50 MAX_LOOPS = 2048 51 52 def load_model(kernel_module, dtype="single"): 53 """ 54 Load the OpenCL model defined by *kernel_module*. 55 56 Access to the OpenCL device is delayed until the kernel is called 57 so models can be defined without using too many resources. 58 """ 59 source, info = gen.make(kernel_module) 60 ## for debugging, save source to a .cl file, edit it, and reload as model 61 open(info['name']+'.cl','w').write(source) 62 #source = open(info['name']+'.cl','r').read() 63 return GpuModel(source, info, dtype) 55 64 56 65 ENV = None … … 103 112 """ 104 113 dtype = np.dtype(dtype) 105 if dtype== F64 and not all(has_double(d) for d in context.devices):114 if dtype==gen.F64 and not all(has_double(d) for d in context.devices): 106 115 raise RuntimeError("Double precision not supported for devices") 107 116 108 header = F64_DEFS if dtype == F64 else F32_DEFS117 header = F64_DEFS if dtype == gen.F64 else F32_DEFS 109 118 # Note: USE_SINCOS makes the intel cpu slower under opencl 110 119 if context.devices[0].type == cl.device_type.GPU: … … 158 167 is an optional extension which may not be available on all devices. 159 168 """ 160 def __init__(self, source, info, dtype= F32):169 def __init__(self, source, info, dtype=gen.F32): 161 170 self.info = info 162 171 self.source = source … … 221 230 buffer will be released when the data object is freed. 222 231 """ 223 def __init__(self, q_vectors, dtype= F32):232 def __init__(self, q_vectors, dtype=gen.F32): 224 233 env = environment() 225 234 self.nq = q_vectors[0].size … … 273 282 env = environment() 274 283 self.loops_b = [cl.Buffer(env.context, mf.READ_WRITE, 275 MAX_LOOPS*input.dtype.itemsize)284 2*MAX_LOOPS*input.dtype.itemsize) 276 285 for _ in env.queues] 277 286 self.res_b = [cl.Buffer(env.context, mf.READ_WRITE, … … 281 290 282 291 def __call__(self, pars, pd_pars, cutoff=1e-5): 283 real = np.float32 if self.input.dtype == F32 else np.float64292 real = np.float32 if self.input.dtype == gen.F32 else np.float64 284 293 fixed = [real(p) for p in pars] 285 294 cutoff = real(cutoff) 286 295 loops = np.hstack(pd_pars) 287 296 loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() 288 loops_N = [np.uint32(len(p[0])) for p in pd_pars] 297 Nloops = [np.uint32(len(p[0])) for p in pd_pars] 298 #print "loops",Nloops, loops 289 299 290 300 #import sys; print >>sys.stderr,"opencl eval",pars 291 301 #print "opencl eval",pars 292 if len(loops) > MAX_LOOPS:302 if len(loops) > 2*MAX_LOOPS: 293 303 raise ValueError("too many polydispersity points") 294 304 device_num = 0 … … 300 310 #ctx = environment().context 301 311 #loops_bi = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=loops) 302 args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + loops_N312 args = self.input.q_buffers + [res_bi,loops_bi,loops_l,cutoff] + fixed + Nloops 303 313 self.kernel(queuei, self.input.global_size, None, *args) 304 314 cl.enqueue_copy(queuei, self.res, res_bi)
Note: See TracChangeset
for help on using the changeset viewer.