"""
C types wrapper for sasview models.
"""

import ctypes as ct
from ctypes import c_void_p, c_int, c_double

import numpy as np

from . import gen

from .gen import F32, F64

IQ_ARGS = [c_void_p, c_void_p, c_int, c_void_p, c_double]
IQXY_ARGS = [c_void_p, c_void_p, c_void_p, c_int, c_void_p, c_double]

class DllModel(object):
    """
    ctypes wrapper for a single model.

    *source* and *info* are the model source and interface as returned
    from :func:`gen.make`.

    *dtype* is the desired model precision.  Any numpy dtype for single
    or double precision floats will do, such as 'f', 'float32' or 'single'
    for single and 'd', 'float64' or 'double' for double.  Double precision
    is an optional extension which may not be available on all devices.
    """
    def __init__(self, dllpath, info):
        self.info = info
        self.dllpath = dllpath
        self.dll = None

    def _load_dll(self):
        Nfixed1d = len(self.info['partype']['fixed-1d'])
        Nfixed2d = len(self.info['partype']['fixed-2d'])
        Npd1d = len(self.info['partype']['pd-1d'])
        Npd2d = len(self.info['partype']['pd-2d'])

        self.dll = ct.CDLL(self.dllpath)

        self.Iq = self.dll[gen.kernel_name(self.info, False)]
        self.Iq.argtypes = IQ_ARGS + [c_double]*Nfixed1d + [c_int]*Npd1d

        self.Iqxy = self.dll[gen.kernel_name(self.info, True)]
        self.Iqxy.argtypes = IQXY_ARGS + [c_double]*Nfixed2d + [c_int]*Npd2d

    def __getstate__(self):
        return {'info': self.info, 'dllpath': self.dllpath, 'dll': None}

    def __setstate__(self, state):
        self.__dict__ = state

    def __call__(self, input):
        if self.dll is None: self._load_dll()

        kernel = self.Iqxy if input.is_2D else self.Iq
        return DllKernel(kernel, self.info, input)

    def make_input(self, q_vectors):
        """
        Make q input vectors available to the model.

        This only needs to be done once for all models that operate on the
        same input.  So for example, if you are adding two different models
        together to compare to a data set, then only one model needs to
        needs to call make_input, so long as the models have the same dtype.
        """
        return DllInput(q_vectors)


class DllInput(object):
    """
    Make q data available to the gpu.

    *q_vectors* is a list of q vectors, which will be *[q]* for 1-D data,
    and *[qx, qy]* for 2-D data.  Internally, the vectors will be reallocated
    to get the best performance on OpenCL, which may involve shifting and
    stretching the array to better match the memory architecture.  Additional
    points will be evaluated with *q=1e-3*.

    *dtype* is the data type for the q vectors. The data type should be
    set to match that of the kernel, which is an attribute of
    :class:`GpuProgram`.  Note that not all kernels support double
    precision, so even if the program was created for double precision,
    the *GpuProgram.dtype* may be single precision.

    Call :meth:`release` when complete.  Even if not called directly, the
    buffer will be released when the data object is freed.
    """
    def __init__(self, q_vectors):
        self.nq = q_vectors[0].size
        self.dtype = np.dtype('double')
        self.is_2D = (len(q_vectors) == 2)
        self.q_vectors = [np.ascontiguousarray(q,self.dtype) for q in q_vectors]
        self.q_pointers = [q.ctypes.data for q in q_vectors]

    def release(self):
        self.q_vectors = []

class DllKernel(object):
    def __init__(self, kernel, info, input):
        self.input = input
        self.kernel = kernel
        self.info = info
        self.res = np.empty(input.nq, input.dtype)
        dim = '2d' if input.is_2D else '1d'
        self.fixed_pars = info['partype']['fixed-'+dim]
        self.pd_pars = info['partype']['pd-'+dim]

        # In dll kernel, but not in opencl kernel
        self.p_res = self.res.ctypes.data

    def __call__(self, pars, pd_pars, cutoff):
        real = np.float32 if self.input.dtype == F32 else np.float64
        fixed = [real(p) for p in pars]
        cutoff = real(cutoff)
        loops = np.hstack(pd_pars)
        loops = np.ascontiguousarray(loops.T, self.input.dtype).flatten()
        loops_N = [np.uint32(len(p[0])) for p in pd_pars]

        nq = c_int(self.input.nq)
        p_loops = loops.ctypes.data
        args = self.input.q_pointers + [self.p_res, nq, p_loops, cutoff] + fixed + loops_N
        #print pars
        self.kernel(*args)

        return self.res

    def release(self):
        pass