Changes in sasmodels/kerneldll.py [3199b17:e44432d] in sasmodels
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kerneldll.py
r3199b17 re44432d 100 100 # pylint: enable=unused-import 101 101 102 # Compiler output is a byte stream that needs to be decode in python 3 .102 # Compiler output is a byte stream that needs to be decode in python 3 103 103 decode = (lambda s: s) if sys.version_info[0] < 3 else (lambda s: s.decode('utf8')) 104 104 … … 115 115 COMPILER = "tinycc" 116 116 elif "VCINSTALLDIR" in os.environ: 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the 118 # environment and we can use the MSVC compiler. Otherwise, if 119 # tinycc is available then use it. Otherwise, hope that mingw 120 # is available. 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the environment 118 # and we can use the MSVC compiler. Otherwise, if tinycc is available 119 # the use it. Otherwise, hope that mingw is available. 121 120 COMPILER = "msvc" 122 121 else: … … 125 124 COMPILER = "unix" 126 125 127 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86 .126 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86 128 127 if COMPILER == "unix": 129 # Generic unix compile .130 # On Mac users will need the X code command line tools installed.128 # Generic unix compile 129 # On mac users will need the X code command line tools installed 131 130 #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp" 132 131 CC = "cc -shared -fPIC -std=c99 -O2 -Wall".split() 133 # Add OpenMP support if not running on a Mac.132 # add openmp support if not running on a mac 134 133 if sys.platform != "darwin": 135 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) .134 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) 136 135 # Shut it off for all unix until we can investigate. 137 136 #CC.append("-fopenmp") … … 145 144 # vcomp90.dll on the path. One may be found here: 146 145 # C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll 147 # Copy this to the python directory and uncomment the OpenMP COMPILE .148 # TODO: Remove intermediate OBJ file created in the directory.149 # TODO: Maybe don't use randomized name for the c file.150 # TODO: Maybe ask distutils to find MSVC.146 # Copy this to the python directory and uncomment the OpenMP COMPILE 147 # TODO: remove intermediate OBJ file created in the directory 148 # TODO: maybe don't use randomized name for the c file 149 # TODO: maybe ask distutils to find MSVC 151 150 CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG".split() 152 151 if "SAS_OPENMP" in os.environ: … … 173 172 ALLOW_SINGLE_PRECISION_DLLS = True 174 173 175 176 174 def compile(source, output): 177 175 # type: (str, str) -> None … … 185 183 logging.info(command_str) 186 184 try: 187 # Need shell=True on windows to keep console box from popping up.185 # need shell=True on windows to keep console box from popping up 188 186 shell = (os.name == 'nt') 189 187 subprocess.check_output(command, shell=shell, stderr=subprocess.STDOUT) … … 194 192 raise RuntimeError("compile failed. File is in %r"%source) 195 193 196 197 194 def dll_name(model_info, dtype): 198 195 # type: (ModelInfo, np.dtype) -> str … … 205 202 basename += ARCH + ".so" 206 203 207 # Hack to find precompiled dlls .204 # Hack to find precompiled dlls 208 205 path = joinpath(generate.DATA_PATH, '..', 'compiled_models', basename) 209 206 if os.path.exists(path): … … 245 242 raise ValueError("16 bit floats not supported") 246 243 if dtype == F32 and not ALLOW_SINGLE_PRECISION_DLLS: 247 dtype = F64 # Force 64-bit dll .248 # Note: dtype may be F128 for long double precision .244 dtype = F64 # Force 64-bit dll 245 # Note: dtype may be F128 for long double precision 249 246 250 247 dll = dll_path(model_info, dtype) … … 257 254 need_recompile = dll_time < newest_source 258 255 if need_recompile: 259 # Make sure the DLL path exists .256 # Make sure the DLL path exists 260 257 if not os.path.exists(SAS_DLL_PATH): 261 258 os.makedirs(SAS_DLL_PATH) … … 266 263 file_handle.write(source) 267 264 compile(source=filename, output=dll) 268 # Comment the following to keep the generated C file.269 # Note: If there is a syntax error then compile raises an error265 # comment the following to keep the generated c file 266 # Note: if there is a syntax error then compile raises an error 270 267 # and the source file will not be deleted. 271 268 os.unlink(filename) … … 306 303 self.dllpath = dllpath 307 304 self._dll = None # type: ct.CDLL 308 self._kernels = None 305 self._kernels = None # type: List[Callable, Callable] 309 306 self.dtype = np.dtype(dtype) 310 307 … … 341 338 # type: (List[np.ndarray]) -> DllKernel 342 339 q_input = PyInput(q_vectors, self.dtype) 343 # Note: DLL is lazy loaded.340 # Note: pickle not supported for DllKernel 344 341 if self._dll is None: 345 342 self._load_dll() … … 361 358 self._dll = None 362 359 363 364 360 class DllKernel(Kernel): 365 361 """ … … 383 379 def __init__(self, kernel, model_info, q_input): 384 380 # type: (Callable[[], np.ndarray], ModelInfo, PyInput) -> None 385 dtype = q_input.dtype 381 #,model_info,q_input) 382 self.kernel = kernel 383 self.info = model_info 386 384 self.q_input = q_input 387 self.kernel = kernel 388 389 # Attributes accessed from the outside. 385 self.dtype = q_input.dtype 390 386 self.dim = '2d' if q_input.is_2d else '1d' 391 self.info = model_info 392 self.dtype = dtype 393 394 # Converter to translate input to target type. 395 self._as_dtype = (np.float32 if dtype == generate.F32 396 else np.float64 if dtype == generate.F64 397 else np.float128) 398 399 # Holding place for the returned value. 387 # leave room for f1/f2 results in case we need to compute beta for 1d models 400 388 nout = 2 if self.info.have_Fq else 1 401 extra_q = 4 # Total weight, form volume, shell volume and R_eff.402 self.result = np.empty( self.q_input.nq*nout + extra_q,dtype)403 404 def _call_kernel(self, call_details, values, cutoff, magnetic,405 e ffective_radius_type):406 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 407 408 # Setup kernel function and arguments.389 # +4 for total weight, shell volume, effective radius, form volume 390 self.result = np.empty(q_input.nq*nout + 4, self.dtype) 391 self.real = (np.float32 if self.q_input.dtype == generate.F32 392 else np.float64 if self.q_input.dtype == generate.F64 393 else np.float128) 394 395 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 396 # type: (CallDetails, np.ndarray, np.ndarray, float, bool, int) -> np.ndarray 409 397 kernel = self.kernel[1 if magnetic else 0] 410 kernel_args = [411 self.q_input.nq, # Number of inputs.412 None, # Placeholder for pd_start.413 None, # Placeholder for pd_stop.414 call_details.buffer.ctypes.data, # Problem definition.415 values.ctypes.data, # Parameter values.416 self.q_input.q.ctypes.data, # Q values.417 self.result.ctypes.data, # Result storage.418 self. _as_dtype(cutoff), # Probability cutoff.419 effective_radius_type, # R_eff mode.398 args = [ 399 self.q_input.nq, # nq 400 None, # pd_start 401 None, # pd_stop pd_stride[MAX_PD] 402 call_details.buffer.ctypes.data, # problem 403 values.ctypes.data, # pars 404 self.q_input.q.ctypes.data, # q 405 self.result.ctypes.data, # results 406 self.real(cutoff), # cutoff 407 effective_radius_type, # cutoff 420 408 ] 421 422 # Call kernel and retrieve results.423 409 #print("Calling DLL") 424 410 #call_details.show(values) 425 411 step = 100 426 # TODO: Do we need the explicit sleep like the OpenCL and CUDA loops?427 412 for start in range(0, call_details.num_eval, step): 428 413 stop = min(start + step, call_details.num_eval) 429 kernel_args[1:3] = [start, stop]430 kernel(* kernel_args) # type: ignore414 args[1:3] = [start, stop] 415 kernel(*args) # type: ignore 431 416 432 417 def release(self): 433 418 # type: () -> None 434 419 """ 435 Release resources associated with the kernel.420 Release any resources associated with the kernel. 436 421 """ 437 # TODO: OpenCL/CUDA allocate q_input in __init__ and free it in release. 438 # Should we be doing the same for DLL? 439 #self.q_input.release() 440 pass 441 442 def __del__(self): 443 # type: () -> None 444 self.release() 422 self.q_input.release()
Note: See TracChangeset
for help on using the changeset viewer.