source: sasmodels/sasmodels/kerneldll.py @ e1454ab

core_shell_microgelscostrafo411magnetic_modelrelease_v0.94release_v0.95ticket-1257-vesicle-productticket_1156ticket_1265_superballticket_822_more_unit_tests
Last change on this file since e1454ab was e1454ab, checked in by Piotr Rozyczko <piotr.rozyczko@…>, 8 years ago

Ticket #539: Precompile model dlls and reuse them.

  • Property mode set to 100644
File size: 11.6 KB
Line 
1r"""
2DLL driver for C kernels
3
4The global attribute *ALLOW_SINGLE_PRECISION_DLLS* should be set to *True* if
5you wish to allow single precision floating point evaluation for the compiled
6models, otherwise it defaults to *False*.
7
8The compiler command line is stored in the attribute *COMPILE*, with string
9substitutions for %(source)s and %(output)s indicating what to compile and
10where to store it.  The actual command is system dependent.
11
12On windows systems, you have a choice of compilers.  *MinGW* is the GNU
13compiler toolchain, available in packages such as anaconda and PythonXY,
14or available stand alone. This toolchain has had difficulties on some
15systems, and may or may not work for you.  In order to build DLLs, *gcc*
16must be on your path.  If the environment variable *SAS_OPENMP* is given
17then -fopenmp is added to the compiler flags.  This requires a version
18of MinGW compiled with OpenMP support.
19
20An alternative toolchain uses the Microsoft Visual C++ compiler, available
21free from microsoft:
22
23    `<http://www.microsoft.com/en-us/download/details.aspx?id=44266>`_
24
25Again, this requires that the compiler is available on your path.  This is
26done by running vcvarsall.bat in a windows terminal.  Install locations are
27system dependent, such as:
28
29    C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\vcvarsall.bat
30
31or maybe
32
33    C:\Users\yourname\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python\9.0\vcvarsall.bat
34
35And again, the environment variable *SAS_OPENMP* controls whether OpenMP is
36used to compile the C code.  This requires the Microsoft vcomp90.dll library,
37which doesn't seem to be included with the compiler, nor does there appear
38to be a public download location.  There may be one on your machine already
39in a location such as:
40
41    C:\Windows\winsxs\x86_microsoft.vc90.openmp*\vcomp90.dll
42
43If you copy this onto your path, such as the python directory or the install
44directory for this application, then OpenMP should be supported.
45"""
46from __future__ import print_function
47
48import sys
49import os
50import tempfile
51import ctypes as ct
52from ctypes import c_void_p, c_int, c_longdouble, c_double, c_float
53import _ctypes
54
55import numpy as np
56
57from . import generate
58from .kernelpy import PyInput, PyModel
59from .exception import annotate_exception
60
61# Compiler platform details
62if sys.platform == 'darwin':
63    #COMPILE = "gcc-mp-4.7 -shared -fPIC -std=c99 -fopenmp -O2 -Wall %s -o %s -lm -lgomp"
64    COMPILE = "gcc -shared -fPIC -std=c99 -O2 -Wall %(source)s -o %(output)s -lm"
65elif os.name == 'nt':
66    # call vcvarsall.bat before compiling to set path, headers, libs, etc.
67    if "VCINSTALLDIR" in os.environ:
68        # MSVC compiler is available, so use it.  OpenMP requires a copy of
69        # vcomp90.dll on the path.  One may be found here:
70        #       C:/Windows/winsxs/x86_microsoft.vc90.openmp*/vcomp90.dll
71        # Copy this to the python directory and uncomment the OpenMP COMPILE
72        # TODO: remove intermediate OBJ file created in the directory
73        # TODO: maybe don't use randomized name for the c file
74        CC = "cl /nologo /Ox /MD /W3 /GS- /DNDEBUG /Tp%(source)s "
75        LN = "/link /DLL /INCREMENTAL:NO /MANIFEST /OUT:%(output)s"
76        if "SAS_OPENMP" in os.environ:
77            COMPILE = " ".join((CC, "/openmp", LN))
78        else:
79            COMPILE = " ".join((CC, LN))
80    else:
81        # fPIC is unused on windows
82        # COMPILE = "gcc -shared -fPIC -std=c99 -O2 -Wall %(source)s -o %(output)s -lm"
83        COMPILE = "gcc -shared -std=c99 -O2 -Wall %(source)s -o %(output)s -lm"
84        if "SAS_OPENMP" in os.environ:
85            COMPILE = COMPILE + " -fopenmp"
86else:
87    COMPILE = "cc -shared -fPIC -fopenmp -std=c99 -O2 -Wall %(source)s -o %(output)s -lm"
88
89# Assume the default location of module DLLs is within the sasmodel directory.
90DLL_PATH = os.path.join(os.path.split(os.path.realpath(__file__))[0], "models", "dll")
91
92ALLOW_SINGLE_PRECISION_DLLS = True
93
94
95def dll_path(model_info, dtype="double"):
96    """
97    Path to the compiled model defined by *model_info*.
98    """
99    from os.path import join as joinpath, split as splitpath, splitext
100    basename = splitext(splitpath(model_info['filename'])[1])[0]
101    if np.dtype(dtype) == generate.F32:
102        basename += "32"
103    elif np.dtype(dtype) == generate.F64:
104        basename += "64"
105    else:
106        basename += "128"
107    return joinpath(DLL_PATH, basename+'.so')
108
109def make_dll(source, model_info, dtype="double"):
110    """
111    Load the compiled model defined by *kernel_module*.
112
113    Recompile if any files are newer than the model file.
114
115    *dtype* is a numpy floating point precision specifier indicating whether
116    the model should be single or double precision.  The default is double
117    precision.
118
119    The DLL is not loaded until the kernel is called so models can
120    be defined without using too many resources.
121
122    Set *sasmodels.kerneldll.DLL_PATH* to the compiled dll output path.
123    The default is the system temporary directory.
124
125    Set *sasmodels.ALLOW_SINGLE_PRECISION_DLLS* to True if single precision
126    models are allowed as DLLs.
127    """
128    if callable(model_info.get('Iq', None)):
129        return PyModel(model_info)
130   
131    dtype = np.dtype(dtype)
132    if dtype == generate.F16:
133        raise ValueError("16 bit floats not supported")
134    if dtype == generate.F32 and not ALLOW_SINGLE_PRECISION_DLLS:
135        dtype = generate.F64  # Force 64-bit dll
136
137    if dtype == generate.F32: # 32-bit dll
138        tempfile_prefix = 'sas_' + model_info['name'] + '32_'
139    elif dtype == generate.F64:
140        tempfile_prefix = 'sas_' + model_info['name'] + '64_'
141    else:
142        tempfile_prefix = 'sas_' + model_info['name'] + '128_'
143 
144    source = generate.convert_type(source, dtype)
145    source_files = generate.model_sources(model_info) + [model_info['filename']]
146    dll = dll_path(model_info, dtype)
147
148    #newest = max(os.path.getmtime(f) for f in source_files)
149    #if not os.path.exists(dll) or os.path.getmtime(dll) < newest:
150    if not os.path.exists(dll):
151        # Replace with a proper temp file
152        fid, filename = tempfile.mkstemp(suffix=".c", prefix=tempfile_prefix)
153        os.fdopen(fid, "w").write(source)
154        command = COMPILE%{"source":filename, "output":dll}
155        print("Compile command: "+command)
156        status = os.system(command)
157        if status != 0 or not os.path.exists(dll):
158            raise RuntimeError("compile failed.  File is in %r"%filename)
159        else:
160            ## comment the following to keep the generated c file
161            os.unlink(filename)
162            #print("saving compiled file in %r"%filename)
163    return dll
164
165
166def load_dll(source, model_info, dtype="double"):
167    """
168    Create and load a dll corresponding to the source, info pair returned
169    from :func:`sasmodels.generate.make` compiled for the target precision.
170
171    See :func:`make_dll` for details on controlling the dll path and the
172    allowed floating point precision.
173    """
174    filename = make_dll(source, model_info, dtype=dtype)
175    return DllModel(filename, model_info, dtype=dtype)
176
177
178IQ_ARGS = [c_void_p, c_void_p, c_int]
179IQXY_ARGS = [c_void_p, c_void_p, c_void_p, c_int]
180
181class DllModel(object):
182    """
183    ctypes wrapper for a single model.
184
185    *source* and *model_info* are the model source and interface as returned
186    from :func:`gen.make`.
187
188    *dtype* is the desired model precision.  Any numpy dtype for single
189    or double precision floats will do, such as 'f', 'float32' or 'single'
190    for single and 'd', 'float64' or 'double' for double.  Double precision
191    is an optional extension which may not be available on all devices.
192
193    Call :meth:`release` when done with the kernel.
194    """
195   
196    def __init__(self, dllpath, model_info, dtype=generate.F32):
197        self.info = model_info
198        self.dllpath = dllpath
199        self.dll = None
200        self.dtype = np.dtype(dtype)
201
202    def _load_dll(self):
203        Nfixed1d = len(self.info['partype']['fixed-1d'])
204        Nfixed2d = len(self.info['partype']['fixed-2d'])
205        Npd1d = len(self.info['partype']['pd-1d'])
206        Npd2d = len(self.info['partype']['pd-2d'])
207
208        #print("dll", self.dllpath)
209        try:
210            self.dll = ct.CDLL(self.dllpath)
211        except:
212            annotate_exception("while loading "+self.dllpath)
213            raise
214
215        fp = (c_float if self.dtype == generate.F32
216              else c_double if self.dtype == generate.F64
217              else c_longdouble)
218        pd_args_1d = [c_void_p, fp] + [c_int]*Npd1d if Npd1d else []
219        pd_args_2d = [c_void_p, fp] + [c_int]*Npd2d if Npd2d else []
220        self.Iq = self.dll[generate.kernel_name(self.info, False)]
221        self.Iq.argtypes = IQ_ARGS + pd_args_1d + [fp]*Nfixed1d
222
223        self.Iqxy = self.dll[generate.kernel_name(self.info, True)]
224        self.Iqxy.argtypes = IQXY_ARGS + pd_args_2d + [fp]*Nfixed2d
225       
226        self.release()
227
228    def __getstate__(self):
229        return self.info, self.dllpath
230
231    def __setstate__(self, state):
232        self.info, self.dllpath = state
233        self.dll = None
234
235    def make_kernel(self, q_vectors):
236        q_input = PyInput(q_vectors, self.dtype)
237        if self.dll is None: self._load_dll()
238        kernel = self.Iqxy if q_input.is_2d else self.Iq
239        return DllKernel(kernel, self.info, q_input)
240
241    def release(self):
242        """
243        Release any resources associated with the model.
244        """
245        if os.name == 'nt':
246            #dll = ct.cdll.LoadLibrary(self.dllpath)
247            dll = ct.CDLL(self.dllpath)
248            libHandle = dll._handle
249            #libHandle = ct.c_void_p(dll._handle)
250            del dll, self.dll
251            self.dll = None
252            #_ctypes.FreeLibrary(libHandle)
253            ct.windll.kernel32.FreeLibrary(libHandle)
254        else:   
255            pass 
256
257
258class DllKernel(object):
259    """
260    Callable SAS kernel.
261
262    *kernel* is the c function to call.
263
264    *model_info* is the module information
265
266    *q_input* is the DllInput q vectors at which the kernel should be
267    evaluated.
268
269    The resulting call method takes the *pars*, a list of values for
270    the fixed parameters to the kernel, and *pd_pars*, a list of (value, weight)
271    vectors for the polydisperse parameters.  *cutoff* determines the
272    integration limits: any points with combined weight less than *cutoff*
273    will not be calculated.
274
275    Call :meth:`release` when done with the kernel instance.
276    """
277    def __init__(self, kernel, model_info, q_input):
278        self.info = model_info
279        self.q_input = q_input
280        self.kernel = kernel
281        self.res = np.empty(q_input.nq, q_input.dtype)
282        dim = '2d' if q_input.is_2d else '1d'
283        self.fixed_pars = model_info['partype']['fixed-' + dim]
284        self.pd_pars = model_info['partype']['pd-' + dim]
285
286        # In dll kernel, but not in opencl kernel
287        self.p_res = self.res.ctypes.data
288
289    def __call__(self, fixed_pars, pd_pars, cutoff):
290        real = (np.float32 if self.q_input.dtype == generate.F32
291                else np.float64 if self.q_input.dtype == generate.F64
292                else np.float128)
293
294        nq = c_int(self.q_input.nq)
295        if pd_pars:
296            cutoff = real(cutoff)
297            loops_N = [np.uint32(len(p[0])) for p in pd_pars]
298            loops = np.hstack(pd_pars)
299            loops = np.ascontiguousarray(loops.T, self.q_input.dtype).flatten()
300            p_loops = loops.ctypes.data
301            dispersed = [p_loops, cutoff] + loops_N
302        else:
303            dispersed = []
304        fixed = [real(p) for p in fixed_pars]
305        args = self.q_input.q_pointers + [self.p_res, nq] + dispersed + fixed
306        #print(pars)
307        self.kernel(*args)
308
309        return self.res
310
311    def release(self):
312        """
313        Release any resources associated with the kernel.
314        """
315        pass
Note: See TracBrowser for help on using the repository browser.