Changeset 3199b17 in sasmodels for sasmodels/kerneldll.py


Ignore:
Timestamp:
Mar 6, 2019 2:24:03 PM (5 years ago)
Author:
Paul Kienzle <pkienzle@…>
Branches:
master, core_shell_microgels, magnetic_model, ticket-1257-vesicle-product, ticket_1156, ticket_1265_superball, ticket_822_more_unit_tests
Children:
4453136
Parents:
00afc15
Message:

PEP 8 changes and improved consistency between OpenCL/CUDA/DLL code

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kerneldll.py

    re44432d r3199b17  
    100100# pylint: enable=unused-import 
    101101 
    102 # Compiler output is a byte stream that needs to be decode in python 3 
     102# Compiler output is a byte stream that needs to be decode in python 3. 
    103103decode = (lambda s: s) if sys.version_info[0] < 3 else (lambda s: s.decode('utf8')) 
    104104 
     
    115115        COMPILER = "tinycc" 
    116116    elif "VCINSTALLDIR" in os.environ: 
    117         # If vcvarsall.bat has been called, then VCINSTALLDIR is in the environment 
    118         # and we can use the MSVC compiler.  Otherwise, if tinycc is available 
    119         # the use it.  Otherwise, hope that mingw is available. 
     117        # If vcvarsall.bat has been called, then VCINSTALLDIR is in the 
     118        # environment and we can use the MSVC compiler.  Otherwise, if 
     119        # tinycc is available then use it.  Otherwise, hope that mingw 
     120        # is available. 
    120121        COMPILER = "msvc" 
    121122    else: 
     
    124125    COMPILER = "unix" 
    125126 
    126 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86"  # 4 byte pointers on x86 
     127ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86"  # 4 byte pointers on x86. 
    127128if COMPILER == "unix": 
    128     # Generic unix compile 
    129     # On mac users will need the X code command line tools installed 
     129    # Generic unix compile. 
     130    # On Mac users will need the X code command line tools installed. 
    130131    #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp" 
    131132    CC = "cc -shared -fPIC -std=c99 -O2 -Wall".split() 
    132     # add openmp support if not running on a mac 
     133    # Add OpenMP support if not running on a Mac. 
    133134    if sys.platform != "darwin": 
    134         # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) 
     135        # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9). 
    135136        # Shut it off for all unix until we can investigate. 
    136137        #CC.append("-fopenmp") 
     
    144145    # vcomp90.dll on the path.  One may be found here: 
    145146    #       C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll 
    146     # Copy this to the python directory and uncomment the OpenMP COMPILE 
    147     # TODO: remove intermediate OBJ file created in the directory 
    148     # TODO: maybe don't use randomized name for the c file 
    149     # TODO: maybe ask distutils to find MSVC 
     147    # Copy this to the python directory and uncomment the OpenMP COMPILE. 
     148    # TODO: Remove intermediate OBJ file created in the directory. 
     149    # TODO: Maybe don't use randomized name for the c file. 
     150    # TODO: Maybe ask distutils to find MSVC. 
    150151    CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG".split() 
    151152    if "SAS_OPENMP" in os.environ: 
     
    172173ALLOW_SINGLE_PRECISION_DLLS = True 
    173174 
     175 
    174176def compile(source, output): 
    175177    # type: (str, str) -> None 
     
    183185    logging.info(command_str) 
    184186    try: 
    185         # need shell=True on windows to keep console box from popping up 
     187        # Need shell=True on windows to keep console box from popping up. 
    186188        shell = (os.name == 'nt') 
    187189        subprocess.check_output(command, shell=shell, stderr=subprocess.STDOUT) 
     
    192194        raise RuntimeError("compile failed.  File is in %r"%source) 
    193195 
     196 
    194197def dll_name(model_info, dtype): 
    195198    # type: (ModelInfo, np.dtype) ->  str 
     
    202205    basename += ARCH + ".so" 
    203206 
    204     # Hack to find precompiled dlls 
     207    # Hack to find precompiled dlls. 
    205208    path = joinpath(generate.DATA_PATH, '..', 'compiled_models', basename) 
    206209    if os.path.exists(path): 
     
    242245        raise ValueError("16 bit floats not supported") 
    243246    if dtype == F32 and not ALLOW_SINGLE_PRECISION_DLLS: 
    244         dtype = F64  # Force 64-bit dll 
    245     # Note: dtype may be F128 for long double precision 
     247        dtype = F64  # Force 64-bit dll. 
     248    # Note: dtype may be F128 for long double precision. 
    246249 
    247250    dll = dll_path(model_info, dtype) 
     
    254257        need_recompile = dll_time < newest_source 
    255258    if need_recompile: 
    256         # Make sure the DLL path exists 
     259        # Make sure the DLL path exists. 
    257260        if not os.path.exists(SAS_DLL_PATH): 
    258261            os.makedirs(SAS_DLL_PATH) 
     
    263266            file_handle.write(source) 
    264267        compile(source=filename, output=dll) 
    265         # comment the following to keep the generated c file 
    266         # Note: if there is a syntax error then compile raises an error 
     268        # Comment the following to keep the generated C file. 
     269        # Note: If there is a syntax error then compile raises an error 
    267270        # and the source file will not be deleted. 
    268271        os.unlink(filename) 
     
    303306        self.dllpath = dllpath 
    304307        self._dll = None  # type: ct.CDLL 
    305         self._kernels = None # type: List[Callable, Callable] 
     308        self._kernels = None  # type: List[Callable, Callable] 
    306309        self.dtype = np.dtype(dtype) 
    307310 
     
    338341        # type: (List[np.ndarray]) -> DllKernel 
    339342        q_input = PyInput(q_vectors, self.dtype) 
    340         # Note: pickle not supported for DllKernel 
     343        # Note: DLL is lazy loaded. 
    341344        if self._dll is None: 
    342345            self._load_dll() 
     
    358361        self._dll = None 
    359362 
     363 
    360364class DllKernel(Kernel): 
    361365    """ 
     
    379383    def __init__(self, kernel, model_info, q_input): 
    380384        # type: (Callable[[], np.ndarray], ModelInfo, PyInput) -> None 
    381         #,model_info,q_input) 
     385        dtype = q_input.dtype 
     386        self.q_input = q_input 
    382387        self.kernel = kernel 
     388 
     389        # Attributes accessed from the outside. 
     390        self.dim = '2d' if q_input.is_2d else '1d' 
    383391        self.info = model_info 
    384         self.q_input = q_input 
    385         self.dtype = q_input.dtype 
    386         self.dim = '2d' if q_input.is_2d else '1d' 
    387         # leave room for f1/f2 results in case we need to compute beta for 1d models 
     392        self.dtype = dtype 
     393 
     394        # Converter to translate input to target type. 
     395        self._as_dtype = (np.float32 if dtype == generate.F32 
     396                          else np.float64 if dtype == generate.F64 
     397                          else np.float128) 
     398 
     399        # Holding place for the returned value. 
    388400        nout = 2 if self.info.have_Fq else 1 
    389         # +4 for total weight, shell volume, effective radius, form volume 
    390         self.result = np.empty(q_input.nq*nout + 4, self.dtype) 
    391         self.real = (np.float32 if self.q_input.dtype == generate.F32 
    392                      else np.float64 if self.q_input.dtype == generate.F64 
    393                      else np.float128) 
    394  
    395     def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 
    396         # type: (CallDetails, np.ndarray, np.ndarray, float, bool, int) -> np.ndarray 
     401        extra_q = 4  # Total weight, form volume, shell volume and R_eff. 
     402        self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 
     403 
     404    def _call_kernel(self, call_details, values, cutoff, magnetic, 
     405                     effective_radius_type): 
     406        # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 
     407 
     408        # Setup kernel function and arguments. 
    397409        kernel = self.kernel[1 if magnetic else 0] 
    398         args = [ 
    399             self.q_input.nq, # nq 
    400             None, # pd_start 
    401             None, # pd_stop pd_stride[MAX_PD] 
    402             call_details.buffer.ctypes.data, # problem 
    403             values.ctypes.data,  # pars 
    404             self.q_input.q.ctypes.data, # q 
    405             self.result.ctypes.data,   # results 
    406             self.real(cutoff), # cutoff 
    407             effective_radius_type, # cutoff 
     410        kernel_args = [ 
     411            self.q_input.nq,  # Number of inputs. 
     412            None,  # Placeholder for pd_start. 
     413            None,  # Placeholder for pd_stop. 
     414            call_details.buffer.ctypes.data,  # Problem definition. 
     415            values.ctypes.data,  # Parameter values. 
     416            self.q_input.q.ctypes.data,  # Q values. 
     417            self.result.ctypes.data,   # Result storage. 
     418            self._as_dtype(cutoff),  # Probability cutoff. 
     419            effective_radius_type,  # R_eff mode. 
    408420        ] 
     421 
     422        # Call kernel and retrieve results. 
    409423        #print("Calling DLL") 
    410424        #call_details.show(values) 
    411425        step = 100 
     426        # TODO: Do we need the explicit sleep like the OpenCL and CUDA loops? 
    412427        for start in range(0, call_details.num_eval, step): 
    413428            stop = min(start + step, call_details.num_eval) 
    414             args[1:3] = [start, stop] 
    415             kernel(*args) # type: ignore 
     429            kernel_args[1:3] = [start, stop] 
     430            kernel(*kernel_args) # type: ignore 
    416431 
    417432    def release(self): 
    418433        # type: () -> None 
    419434        """ 
    420         Release any resources associated with the kernel. 
     435        Release resources associated with the kernel. 
    421436        """ 
    422         self.q_input.release() 
     437        # TODO: OpenCL/CUDA allocate q_input in __init__ and free it in release. 
     438        # Should we be doing the same for DLL? 
     439        #self.q_input.release() 
     440        pass 
     441 
     442    def __del__(self): 
     443        # type: () -> None 
     444        self.release() 
Note: See TracChangeset for help on using the changeset viewer.