Changeset 3199b17 in sasmodels for sasmodels/kerneldll.py
- Timestamp:
- Mar 6, 2019 2:24:03 PM (5 years ago)
- Branches:
- master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
- Children:
- 4453136
- Parents:
- 00afc15
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
sasmodels/kerneldll.py
re44432d r3199b17 100 100 # pylint: enable=unused-import 101 101 102 # Compiler output is a byte stream that needs to be decode in python 3 102 # Compiler output is a byte stream that needs to be decode in python 3. 103 103 decode = (lambda s: s) if sys.version_info[0] < 3 else (lambda s: s.decode('utf8')) 104 104 … … 115 115 COMPILER = "tinycc" 116 116 elif "VCINSTALLDIR" in os.environ: 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the environment 118 # and we can use the MSVC compiler. Otherwise, if tinycc is available 119 # the use it. Otherwise, hope that mingw is available. 117 # If vcvarsall.bat has been called, then VCINSTALLDIR is in the 118 # environment and we can use the MSVC compiler. Otherwise, if 119 # tinycc is available then use it. Otherwise, hope that mingw 120 # is available. 120 121 COMPILER = "msvc" 121 122 else: … … 124 125 COMPILER = "unix" 125 126 126 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86 127 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86" # 4 byte pointers on x86. 127 128 if COMPILER == "unix": 128 # Generic unix compile 129 # On mac users will need the X code command line tools installed129 # Generic unix compile. 130 # On Mac users will need the X code command line tools installed. 130 131 #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp" 131 132 CC = "cc -shared -fPIC -std=c99 -O2 -Wall".split() 132 # add openmp support if not running on a mac133 # Add OpenMP support if not running on a Mac. 133 134 if sys.platform != "darwin": 134 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) 135 # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9). 135 136 # Shut it off for all unix until we can investigate. 136 137 #CC.append("-fopenmp") … … 144 145 # vcomp90.dll on the path. One may be found here: 145 146 # C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll 146 # Copy this to the python directory and uncomment the OpenMP COMPILE 147 # TODO: remove intermediate OBJ file created in the directory148 # TODO: maybe don't use randomized name for the c file149 # TODO: maybe ask distutils to find MSVC147 # Copy this to the python directory and uncomment the OpenMP COMPILE. 148 # TODO: Remove intermediate OBJ file created in the directory. 149 # TODO: Maybe don't use randomized name for the c file. 150 # TODO: Maybe ask distutils to find MSVC. 150 151 CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG".split() 151 152 if "SAS_OPENMP" in os.environ: … … 172 173 ALLOW_SINGLE_PRECISION_DLLS = True 173 174 175 174 176 def compile(source, output): 175 177 # type: (str, str) -> None … … 183 185 logging.info(command_str) 184 186 try: 185 # need shell=True on windows to keep console box from popping up187 # Need shell=True on windows to keep console box from popping up. 186 188 shell = (os.name == 'nt') 187 189 subprocess.check_output(command, shell=shell, stderr=subprocess.STDOUT) … … 192 194 raise RuntimeError("compile failed. File is in %r"%source) 193 195 196 194 197 def dll_name(model_info, dtype): 195 198 # type: (ModelInfo, np.dtype) -> str … … 202 205 basename += ARCH + ".so" 203 206 204 # Hack to find precompiled dlls 207 # Hack to find precompiled dlls. 205 208 path = joinpath(generate.DATA_PATH, '..', 'compiled_models', basename) 206 209 if os.path.exists(path): … … 242 245 raise ValueError("16 bit floats not supported") 243 246 if dtype == F32 and not ALLOW_SINGLE_PRECISION_DLLS: 244 dtype = F64 # Force 64-bit dll 245 # Note: dtype may be F128 for long double precision 247 dtype = F64 # Force 64-bit dll. 248 # Note: dtype may be F128 for long double precision. 246 249 247 250 dll = dll_path(model_info, dtype) … … 254 257 need_recompile = dll_time < newest_source 255 258 if need_recompile: 256 # Make sure the DLL path exists 259 # Make sure the DLL path exists. 257 260 if not os.path.exists(SAS_DLL_PATH): 258 261 os.makedirs(SAS_DLL_PATH) … … 263 266 file_handle.write(source) 264 267 compile(source=filename, output=dll) 265 # comment the following to keep the generated c file266 # Note: if there is a syntax error then compile raises an error268 # Comment the following to keep the generated C file. 269 # Note: If there is a syntax error then compile raises an error 267 270 # and the source file will not be deleted. 268 271 os.unlink(filename) … … 303 306 self.dllpath = dllpath 304 307 self._dll = None # type: ct.CDLL 305 self._kernels = None # type: List[Callable, Callable]308 self._kernels = None # type: List[Callable, Callable] 306 309 self.dtype = np.dtype(dtype) 307 310 … … 338 341 # type: (List[np.ndarray]) -> DllKernel 339 342 q_input = PyInput(q_vectors, self.dtype) 340 # Note: pickle not supported for DllKernel343 # Note: DLL is lazy loaded. 341 344 if self._dll is None: 342 345 self._load_dll() … … 358 361 self._dll = None 359 362 363 360 364 class DllKernel(Kernel): 361 365 """ … … 379 383 def __init__(self, kernel, model_info, q_input): 380 384 # type: (Callable[[], np.ndarray], ModelInfo, PyInput) -> None 381 #,model_info,q_input) 385 dtype = q_input.dtype 386 self.q_input = q_input 382 387 self.kernel = kernel 388 389 # Attributes accessed from the outside. 390 self.dim = '2d' if q_input.is_2d else '1d' 383 391 self.info = model_info 384 self.q_input = q_input 385 self.dtype = q_input.dtype 386 self.dim = '2d' if q_input.is_2d else '1d' 387 # leave room for f1/f2 results in case we need to compute beta for 1d models 392 self.dtype = dtype 393 394 # Converter to translate input to target type. 395 self._as_dtype = (np.float32 if dtype == generate.F32 396 else np.float64 if dtype == generate.F64 397 else np.float128) 398 399 # Holding place for the returned value. 388 400 nout = 2 if self.info.have_Fq else 1 389 # +4 for total weight, shell volume, effective radius, form volume390 self.result = np.empty( q_input.nq*nout + 4, self.dtype)391 self.real = (np.float32 if self.q_input.dtype == generate.F32 392 else np.float64 if self.q_input.dtype == generate.F64393 e lse np.float128)394 395 def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 396 # type: (CallDetails, np.ndarray, np.ndarray, float, bool, int) -> np.ndarray401 extra_q = 4 # Total weight, form volume, shell volume and R_eff. 402 self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 403 404 def _call_kernel(self, call_details, values, cutoff, magnetic, 405 effective_radius_type): 406 # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 407 408 # Setup kernel function and arguments. 397 409 kernel = self.kernel[1 if magnetic else 0] 398 args = [399 self.q_input.nq, # nq400 None, # pd_start401 None, # pd_stop pd_stride[MAX_PD]402 call_details.buffer.ctypes.data, # problem403 values.ctypes.data, # pars404 self.q_input.q.ctypes.data, # q405 self.result.ctypes.data, # results406 self. real(cutoff), # cutoff407 effective_radius_type, # cutoff410 kernel_args = [ 411 self.q_input.nq, # Number of inputs. 412 None, # Placeholder for pd_start. 413 None, # Placeholder for pd_stop. 414 call_details.buffer.ctypes.data, # Problem definition. 415 values.ctypes.data, # Parameter values. 416 self.q_input.q.ctypes.data, # Q values. 417 self.result.ctypes.data, # Result storage. 418 self._as_dtype(cutoff), # Probability cutoff. 419 effective_radius_type, # R_eff mode. 408 420 ] 421 422 # Call kernel and retrieve results. 409 423 #print("Calling DLL") 410 424 #call_details.show(values) 411 425 step = 100 426 # TODO: Do we need the explicit sleep like the OpenCL and CUDA loops? 412 427 for start in range(0, call_details.num_eval, step): 413 428 stop = min(start + step, call_details.num_eval) 414 args[1:3] = [start, stop]415 kernel(* args) # type: ignore429 kernel_args[1:3] = [start, stop] 430 kernel(*kernel_args) # type: ignore 416 431 417 432 def release(self): 418 433 # type: () -> None 419 434 """ 420 Release anyresources associated with the kernel.435 Release resources associated with the kernel. 421 436 """ 422 self.q_input.release() 437 # TODO: OpenCL/CUDA allocate q_input in __init__ and free it in release. 438 # Should we be doing the same for DLL? 439 #self.q_input.release() 440 pass 441 442 def __del__(self): 443 # type: () -> None 444 self.release()
Note: See TracChangeset
for help on using the changeset viewer.