""" C types wrapper for sasview models. """ import ctypes as ct from ctypes import c_void_p, c_int, c_double import numpy as np from . import gen from .gen import F32, F64 IQ_ARGS = [c_void_p, c_void_p, c_int, c_void_p, c_double] IQXY_ARGS = [c_void_p, c_void_p, c_void_p, c_int, c_void_p, c_double] class DllModel(object): """ ctypes wrapper for a single model. *source* and *info* are the model source and interface as returned from :func:`gen.make`. *dtype* is the desired model precision. Any numpy dtype for single or double precision floats will do, such as 'f', 'float32' or 'single' for single and 'd', 'float64' or 'double' for double. Double precision is an optional extension which may not be available on all devices. """ def __init__(self, dllpath, info): self.info = info self.dllpath = dllpath self.dll = None def _load_dll(self): Nfixed1d = len(self.info['partype']['fixed-1d']) Nfixed2d = len(self.info['partype']['fixed-2d']) Npd1d = len(self.info['partype']['pd-1d']) Npd2d = len(self.info['partype']['pd-2d']) self.dll = ct.CDLL(self.dllpath) self.Iq = self.dll[gen.kernel_name(self.info, False)] self.Iq.argtypes = IQ_ARGS + [c_double]*Nfixed1d + [c_int]*Npd1d self.Iqxy = self.dll[gen.kernel_name(self.info, True)] self.Iqxy.argtypes = IQXY_ARGS + [c_double]*Nfixed2d + [c_int]*Npd2d def __getstate__(self): return {'info': self.info, 'dllpath': self.dllpath, 'dll': None} def __setstate__(self, state): self.__dict__ = state def __call__(self, input): if self.dll is None: self._load_dll() kernel = self.Iqxy if input.is_2D else self.Iq return DllKernel(kernel, self.info, input) def make_input(self, q_vectors): """ Make q input vectors available to the model. This only needs to be done once for all models that operate on the same input. So for example, if you are adding two different models together to compare to a data set, then only one model needs to needs to call make_input, so long as the models have the same dtype. """ return DllInput(q_vectors) class DllInput(object): """ Make q data available to the gpu. *q_vectors* is a list of q vectors, which will be *[q]* for 1-D data, and *[qx, qy]* for 2-D data. Internally, the vectors will be reallocated to get the best performance on OpenCL, which may involve shifting and stretching the array to better match the memory architecture. Additional points will be evaluated with *q=1e-3*. *dtype* is the data type for the q vectors. The data type should be set to match that of the kernel, which is an attribute of :class:`GpuProgram`. Note that not all kernels support double precision, so even if the program was created for double precision, the *GpuProgram.dtype* may be single precision. Call :meth:`release` when complete. Even if not called directly, the buffer will be released when the data object is freed. """ def __init__(self, q_vectors): self.nq = q_vectors[0].size self.dtype = np.dtype('double') self.is_2D = (len(q_vectors) == 2) self.q_vectors = [np.ascontiguousarray(q,self.dtype) for q in q_vectors] self.q_pointers = [q.ctypes.data for q in q_vectors] def release(self): self.q_vectors = [] class DllKernel(object): def __init__(self, kernel, info, input): self.input = input self.kernel = kernel self.info = info self.res = np.empty(input.nq, input.dtype) dim = '2d' if input.is_2D else '1d' self.fixed_pars = info['partype']['fixed-'+dim] self.pd_pars = info['partype']['pd-'+dim] # In dll kernel, but not in opencl kernel self.p_res = self.res.ctypes.data def __call__(self, pars, pd_pars, cutoff): real = np.float32 if self.input.dtype == F32 else np.float64 fixed = [real(p) for p in pars] cutoff = real(cutoff) loops = np.hstack(pd_pars) loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten() loops_N = [np.uint32(len(p[0])) for p in pd_pars] nq = c_int(self.input.nq) p_loops = loops.ctypes.data args = self.input.q_pointers + [self.p_res, nq, p_loops, cutoff] + fixed + loops_N #print pars self.kernel(*args) return self.res def release(self): pass