Changes in sasmodels/kerneldll.py [3199b17:e44432d] in sasmodels


Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • sasmodels/kerneldll.py

    r3199b17 re44432d  
    100100# pylint: enable=unused-import 
    101101 
    102 # Compiler output is a byte stream that needs to be decode in python 3. 
     102# Compiler output is a byte stream that needs to be decode in python 3 
    103103decode = (lambda s: s) if sys.version_info[0] < 3 else (lambda s: s.decode('utf8')) 
    104104 
     
    115115        COMPILER = "tinycc" 
    116116    elif "VCINSTALLDIR" in os.environ: 
    117         # If vcvarsall.bat has been called, then VCINSTALLDIR is in the 
    118         # environment and we can use the MSVC compiler.  Otherwise, if 
    119         # tinycc is available then use it.  Otherwise, hope that mingw 
    120         # is available. 
     117        # If vcvarsall.bat has been called, then VCINSTALLDIR is in the environment 
     118        # and we can use the MSVC compiler.  Otherwise, if tinycc is available 
     119        # the use it.  Otherwise, hope that mingw is available. 
    121120        COMPILER = "msvc" 
    122121    else: 
     
    125124    COMPILER = "unix" 
    126125 
    127 ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86"  # 4 byte pointers on x86. 
     126ARCH = "" if ct.sizeof(ct.c_void_p) > 4 else "x86"  # 4 byte pointers on x86 
    128127if COMPILER == "unix": 
    129     # Generic unix compile. 
    130     # On Mac users will need the X code command line tools installed. 
     128    # Generic unix compile 
     129    # On mac users will need the X code command line tools installed 
    131130    #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp" 
    132131    CC = "cc -shared -fPIC -std=c99 -O2 -Wall".split() 
    133     # Add OpenMP support if not running on a Mac. 
     132    # add openmp support if not running on a mac 
    134133    if sys.platform != "darwin": 
    135         # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9). 
     134        # OpenMP seems to be broken on gcc 5.4.0 (ubuntu 16.04.9) 
    136135        # Shut it off for all unix until we can investigate. 
    137136        #CC.append("-fopenmp") 
     
    145144    # vcomp90.dll on the path.  One may be found here: 
    146145    #       C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll 
    147     # Copy this to the python directory and uncomment the OpenMP COMPILE. 
    148     # TODO: Remove intermediate OBJ file created in the directory. 
    149     # TODO: Maybe don't use randomized name for the c file. 
    150     # TODO: Maybe ask distutils to find MSVC. 
     146    # Copy this to the python directory and uncomment the OpenMP COMPILE 
     147    # TODO: remove intermediate OBJ file created in the directory 
     148    # TODO: maybe don't use randomized name for the c file 
     149    # TODO: maybe ask distutils to find MSVC 
    151150    CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG".split() 
    152151    if "SAS_OPENMP" in os.environ: 
     
    173172ALLOW_SINGLE_PRECISION_DLLS = True 
    174173 
    175  
    176174def compile(source, output): 
    177175    # type: (str, str) -> None 
     
    185183    logging.info(command_str) 
    186184    try: 
    187         # Need shell=True on windows to keep console box from popping up. 
     185        # need shell=True on windows to keep console box from popping up 
    188186        shell = (os.name == 'nt') 
    189187        subprocess.check_output(command, shell=shell, stderr=subprocess.STDOUT) 
     
    194192        raise RuntimeError("compile failed.  File is in %r"%source) 
    195193 
    196  
    197194def dll_name(model_info, dtype): 
    198195    # type: (ModelInfo, np.dtype) ->  str 
     
    205202    basename += ARCH + ".so" 
    206203 
    207     # Hack to find precompiled dlls. 
     204    # Hack to find precompiled dlls 
    208205    path = joinpath(generate.DATA_PATH, '..', 'compiled_models', basename) 
    209206    if os.path.exists(path): 
     
    245242        raise ValueError("16 bit floats not supported") 
    246243    if dtype == F32 and not ALLOW_SINGLE_PRECISION_DLLS: 
    247         dtype = F64  # Force 64-bit dll. 
    248     # Note: dtype may be F128 for long double precision. 
     244        dtype = F64  # Force 64-bit dll 
     245    # Note: dtype may be F128 for long double precision 
    249246 
    250247    dll = dll_path(model_info, dtype) 
     
    257254        need_recompile = dll_time < newest_source 
    258255    if need_recompile: 
    259         # Make sure the DLL path exists. 
     256        # Make sure the DLL path exists 
    260257        if not os.path.exists(SAS_DLL_PATH): 
    261258            os.makedirs(SAS_DLL_PATH) 
     
    266263            file_handle.write(source) 
    267264        compile(source=filename, output=dll) 
    268         # Comment the following to keep the generated C file. 
    269         # Note: If there is a syntax error then compile raises an error 
     265        # comment the following to keep the generated c file 
     266        # Note: if there is a syntax error then compile raises an error 
    270267        # and the source file will not be deleted. 
    271268        os.unlink(filename) 
     
    306303        self.dllpath = dllpath 
    307304        self._dll = None  # type: ct.CDLL 
    308         self._kernels = None  # type: List[Callable, Callable] 
     305        self._kernels = None # type: List[Callable, Callable] 
    309306        self.dtype = np.dtype(dtype) 
    310307 
     
    341338        # type: (List[np.ndarray]) -> DllKernel 
    342339        q_input = PyInput(q_vectors, self.dtype) 
    343         # Note: DLL is lazy loaded. 
     340        # Note: pickle not supported for DllKernel 
    344341        if self._dll is None: 
    345342            self._load_dll() 
     
    361358        self._dll = None 
    362359 
    363  
    364360class DllKernel(Kernel): 
    365361    """ 
     
    383379    def __init__(self, kernel, model_info, q_input): 
    384380        # type: (Callable[[], np.ndarray], ModelInfo, PyInput) -> None 
    385         dtype = q_input.dtype 
     381        #,model_info,q_input) 
     382        self.kernel = kernel 
     383        self.info = model_info 
    386384        self.q_input = q_input 
    387         self.kernel = kernel 
    388  
    389         # Attributes accessed from the outside. 
     385        self.dtype = q_input.dtype 
    390386        self.dim = '2d' if q_input.is_2d else '1d' 
    391         self.info = model_info 
    392         self.dtype = dtype 
    393  
    394         # Converter to translate input to target type. 
    395         self._as_dtype = (np.float32 if dtype == generate.F32 
    396                           else np.float64 if dtype == generate.F64 
    397                           else np.float128) 
    398  
    399         # Holding place for the returned value. 
     387        # leave room for f1/f2 results in case we need to compute beta for 1d models 
    400388        nout = 2 if self.info.have_Fq else 1 
    401         extra_q = 4  # Total weight, form volume, shell volume and R_eff. 
    402         self.result = np.empty(self.q_input.nq*nout + extra_q, dtype) 
    403  
    404     def _call_kernel(self, call_details, values, cutoff, magnetic, 
    405                      effective_radius_type): 
    406         # type: (CallDetails, np.ndarray, float, bool, int) -> np.ndarray 
    407  
    408         # Setup kernel function and arguments. 
     389        # +4 for total weight, shell volume, effective radius, form volume 
     390        self.result = np.empty(q_input.nq*nout + 4, self.dtype) 
     391        self.real = (np.float32 if self.q_input.dtype == generate.F32 
     392                     else np.float64 if self.q_input.dtype == generate.F64 
     393                     else np.float128) 
     394 
     395    def _call_kernel(self, call_details, values, cutoff, magnetic, effective_radius_type): 
     396        # type: (CallDetails, np.ndarray, np.ndarray, float, bool, int) -> np.ndarray 
    409397        kernel = self.kernel[1 if magnetic else 0] 
    410         kernel_args = [ 
    411             self.q_input.nq,  # Number of inputs. 
    412             None,  # Placeholder for pd_start. 
    413             None,  # Placeholder for pd_stop. 
    414             call_details.buffer.ctypes.data,  # Problem definition. 
    415             values.ctypes.data,  # Parameter values. 
    416             self.q_input.q.ctypes.data,  # Q values. 
    417             self.result.ctypes.data,   # Result storage. 
    418             self._as_dtype(cutoff),  # Probability cutoff. 
    419             effective_radius_type,  # R_eff mode. 
     398        args = [ 
     399            self.q_input.nq, # nq 
     400            None, # pd_start 
     401            None, # pd_stop pd_stride[MAX_PD] 
     402            call_details.buffer.ctypes.data, # problem 
     403            values.ctypes.data,  # pars 
     404            self.q_input.q.ctypes.data, # q 
     405            self.result.ctypes.data,   # results 
     406            self.real(cutoff), # cutoff 
     407            effective_radius_type, # cutoff 
    420408        ] 
    421  
    422         # Call kernel and retrieve results. 
    423409        #print("Calling DLL") 
    424410        #call_details.show(values) 
    425411        step = 100 
    426         # TODO: Do we need the explicit sleep like the OpenCL and CUDA loops? 
    427412        for start in range(0, call_details.num_eval, step): 
    428413            stop = min(start + step, call_details.num_eval) 
    429             kernel_args[1:3] = [start, stop] 
    430             kernel(*kernel_args) # type: ignore 
     414            args[1:3] = [start, stop] 
     415            kernel(*args) # type: ignore 
    431416 
    432417    def release(self): 
    433418        # type: () -> None 
    434419        """ 
    435         Release resources associated with the kernel. 
     420        Release any resources associated with the kernel. 
    436421        """ 
    437         # TODO: OpenCL/CUDA allocate q_input in __init__ and free it in release. 
    438         # Should we be doing the same for DLL? 
    439         #self.q_input.release() 
    440         pass 
    441  
    442     def __del__(self): 
    443         # type: () -> None 
    444         self.release() 
     422        self.q_input.release() 
Note: See TracChangeset for help on using the changeset viewer.