"""
    Module to perform P(r) inversion.
    The module contains the Invertor class.
"""
from sans.pr.core.pr_inversion import Cinvertor
import numpy
import sys
import math, time
from scipy.linalg.basic import lstsq

def help():
    """
        Provide general online help text
        Future work: extend this function to allow topic selection
    """
    info_txt  = "The inversion approach is based on Moore, J. Appl. Cryst. (1980) 13, 168-175.\n\n"
    info_txt += "P(r) is set to be equal to an expansion of base functions of the type "
    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each base functions "
    info_txt += "in the expansion is found by performing a least square fit with the "
    info_txt += "following fit function:\n\n"
    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 + Reg_term\n\n"
    info_txt += "where I_meas(q) is the measured scattering intensity and I_th(q) is "
    info_txt += "the prediction from the Fourier transform of the P(r) expansion. "
    info_txt += "The Reg_term term is a regularization term set to the second derivative "
    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce a smooth P(r) output.\n\n"
    info_txt += "The following are user inputs:\n\n"
    info_txt += "   - Number of terms: the number of base functions in the P(r) expansion.\n\n"
    info_txt += "   - Regularization constant: a multiplicative constant to set the size of "
    info_txt += "the regularization term.\n\n"
    info_txt += "   - Maximum distance: the maximum distance between any two points in the system.\n"
     
    return info_txt
    

class Invertor(Cinvertor):
    """
        Invertor class to perform P(r) inversion
        
        The problem is solved by posing the problem as  Ax = b,
        where x is the set of coefficients we are looking for.
        
        Npts is the number of points.
        
        In the following i refers to the ith base function coefficient.
        The matrix has its entries j in its first Npts rows set to
            A[j][i] = (Fourier transformed base function for point j) 
            
        We them choose a number of r-points, n_r, to evaluate the second
        derivative of P(r) at. This is used as our regularization term.
        For a vector r of length n_r, the following n_r rows are set to
            A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
            
        The vector b has its first Npts entries set to
            b[j] = (I(q) observed for point j)
            
        The following n_r entries are set to zero.
        
        The result is found by using scipy.linalg.basic.lstsq to invert
        the matrix and find the coefficients x.
        
        Methods inherited from Cinvertor:
        - get_peaks(pars): returns the number of P(r) peaks
        - oscillations(pars): returns the oscillation parameters for the output P(r)
        - get_positive(pars): returns the fraction of P(r) that is above zero
        - get_pos_err(pars): returns the fraction of P(r) that is 1-sigma above zero
    """
    ## Chisqr of the last computation
    chi2  = 0
    ## Time elapsed for last computation
    elapsed = 0
    ## Alpha to get the reg term the same size as the signal
    suggested_alpha = 0
    ## Last number of base functions used
    nfunc = 10
    ## Last output values
    out = None
    ## Last errors on output values
    cov = None
    ## Background value
    background = 0
    
    
    def __init__(self):
        Cinvertor.__init__(self)
        
    def __setattr__(self, name, value):
        """
            Set the value of an attribute.
            Access the parent class methods for
            x, y, err, d_max, q_min, q_max and alpha
        """
        if   name=='x':
            if 0.0 in value:
                raise ValueError, "Invertor: one of your q-values is zero. Delete that entry before proceeding"
            return self.set_x(value)
        elif name=='y':
            return self.set_y(value)
        elif name=='err':
            value2 = abs(value)
            return self.set_err(value2)
        elif name=='d_max':
            return self.set_dmax(value)
        elif name=='q_min':
            if value==None:
                return self.set_qmin(-1.0)
            return self.set_qmin(value)
        elif name=='q_max':
            if value==None:
                return self.set_qmax(-1.0)
            return self.set_qmax(value)
        elif name=='alpha':
            return self.set_alpha(value)
        elif name=='slit_height':
            return self.set_slit_height(value)
        elif name=='slit_width':
            return self.set_slit_width(value)
        elif name=='has_bck':
            if value==True:
                return self.set_has_bck(1)
            elif value==False:
                return self.set_has_bck(0)
            else:
                raise ValueError, "Invertor: has_bck can only be True or False"
            
        return Cinvertor.__setattr__(self, name, value)
    
    def __getattr__(self, name):
        """
           Return the value of an attribute
        """
        import numpy
        if   name=='x':
            out = numpy.ones(self.get_nx())
            self.get_x(out)
            return out
        elif name=='y':
            out = numpy.ones(self.get_ny())
            self.get_y(out)
            return out
        elif name=='err':
            out = numpy.ones(self.get_nerr())
            self.get_err(out)
            return out
        elif name=='d_max':
            return self.get_dmax()
        elif name=='q_min':
            qmin = self.get_qmin()
            if qmin<0:
                return None
            return qmin
        elif name=='q_max':
            qmax = self.get_qmax()
            if qmax<0:
                return None
            return qmax
        elif name=='alpha':
            return self.get_alpha()
        elif name=='slit_height':
            return self.get_slit_height()
        elif name=='slit_width':
            return self.get_slit_width()
        elif name=='has_bck':
            value = self.get_has_bck()
            if value==1:
                return True
            else:
                return False
        elif name in self.__dict__:
            return self.__dict__[name]
        return None
    
    def clone(self):
        """
            Return a clone of this instance
        """
        invertor = Invertor()
        invertor.chi2    = self.chi2 
        invertor.elapsed = self.elapsed 
        invertor.nfunc   = self.nfunc 
        invertor.alpha   = self.alpha
        invertor.d_max   = self.d_max
        invertor.q_min   = self.q_min
        invertor.q_max   = self.q_max
        
        invertor.x = self.x
        invertor.y = self.y
        invertor.err = self.err
        invertor.has_bck = self.has_bck
        invertor.slit_height = self.slit_height
        invertor.slit_width  = self.slit_width
        
        return invertor
    
    def invert(self, nfunc=10, nr=20):
        """
            Perform inversion to P(r)
            
            The problem is solved by posing the problem as  Ax = b,
            where x is the set of coefficients we are looking for.
            
            Npts is the number of points.
            
            In the following i refers to the ith base function coefficient.
            The matrix has its entries j in its first Npts rows set to
                A[i][j] = (Fourier transformed base function for point j) 
                
            We them choose a number of r-points, n_r, to evaluate the second
            derivative of P(r) at. This is used as our regularization term.
            For a vector r of length n_r, the following n_r rows are set to
                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
                
            The vector b has its first Npts entries set to
                b[j] = (I(q) observed for point j)
                
            The following n_r entries are set to zero.
            
            The result is found by using scipy.linalg.basic.lstsq to invert
            the matrix and find the coefficients x.
            
            @param nfunc: number of base functions to use.
            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
            @return: c_out, c_cov - the coefficients with covariance matrix 
        """
        # Reset the background value before proceeding
        self.background = 0.0
        return self.lstsq(nfunc, nr=nr)
    
    def iq(self, out, q):
        """
            Function to call to evaluate the scattering intensity
            @param args: c-parameters, and q
            @return: I(q)
        """
        return Cinvertor.iq(self, out, q)+self.background
    
    def invert_optimize(self, nfunc=10, nr=20):
        """
            Slower version of the P(r) inversion that uses scipy.optimize.leastsq.
            
            This probably produce more reliable results, but is much slower.
            The minimization function is set to sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
            where the reg_term is given by Svergun: it is the integral of the square of the first derivative
            of P(r), d(P(r))/dr, integrated over the full range of r.
            
            @param nfunc: number of base functions to use.
            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
            @return: c_out, c_cov - the coefficients with covariance matrix 
        """
        
        from scipy import optimize
        import time
        
        self.nfunc = nfunc
        # First, check that the current data is valid
        if self.is_valid()<=0:
            raise RuntimeError, "Invertor.invert: Data array are of different length"
        
        p = numpy.ones(nfunc)
        t_0 = time.time()
        out, cov_x, info, mesg, success = optimize.leastsq(self.residuals, p, full_output=1, warning=True)
        
        # Compute chi^2
        res = self.residuals(out)
        chisqr = 0
        for i in range(len(res)):
            chisqr += res[i]
        
        self.chi2 = chisqr

        # Store computation time
        self.elapsed = time.time() - t_0
        
        return out, cov_x
    
    def pr_fit(self, nfunc=5):
        """
            This is a direct fit to a given P(r). It assumes that the y data
            is set to some P(r) distribution that we are trying to reproduce
            with a set of base functions.
            
            This method is provided as a test. 
        """
        from scipy import optimize
        
        # First, check that the current data is valid
        if self.is_valid()<=0:
            raise RuntimeError, "Invertor.invert: Data arrays are of different length"
        
        p = numpy.ones(nfunc)
        t_0 = time.time()
        out, cov_x, info, mesg, success = optimize.leastsq(self.pr_residuals, p, full_output=1, warning=True)
        
        # Compute chi^2
        res = self.pr_residuals(out)
        chisqr = 0
        for i in range(len(res)):
            chisqr += res[i]
        
        self.chisqr = chisqr
        
        # Store computation time
        self.elapsed = time.time() - t_0

        return out, cov_x
    
    def pr_err(self, c, c_cov, r):
        """    
            Returns the value of P(r) for a given r, and base function
            coefficients, with error.
            
            @param c: base function coefficients
            @param c_cov: covariance matrice of the base function coefficients
            @param r: r-value to evaluate P(r) at
            @return: P(r)
        """
        return self.get_pr_err(c, c_cov, r)
       
    def _accept_q(self, q):
        """
            Check q-value against user-defined range
        """
        if not self.q_min==None and q<self.q_min:
            return False
        if not self.q_max==None and q>self.q_max:
            return False
        return True
       
    def lstsq(self, nfunc=5, nr=20):
        """
            The problem is solved by posing the problem as  Ax = b,
            where x is the set of coefficients we are looking for.
            
            Npts is the number of points.
            
            In the following i refers to the ith base function coefficient.
            The matrix has its entries j in its first Npts rows set to
                A[i][j] = (Fourier transformed base function for point j) 
                
            We them choose a number of r-points, n_r, to evaluate the second
            derivative of P(r) at. This is used as our regularization term.
            For a vector r of length n_r, the following n_r rows are set to
                A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])
                
            The vector b has its first Npts entries set to
                b[j] = (I(q) observed for point j)
                
            The following n_r entries are set to zero.
            
            The result is found by using scipy.linalg.basic.lstsq to invert
            the matrix and find the coefficients x.
            
            @param nfunc: number of base functions to use.
            @param nr: number of r points to evaluate the 2nd derivative at for the reg. term.

            If the result does not allow us to compute the covariance matrix,
            a matrix filled with zeros will be returned.

        """
        # Note: To make sure an array is contiguous:
        # blah = numpy.ascontiguousarray(blah_original)
        # ... before passing it to C
        
        if self.is_valid()<0:
            raise RuntimeError, "Invertor: invalid data; incompatible data lengths."
        
        self.nfunc = nfunc
        # a -- An M x N matrix.
        # b -- An M x nrhs matrix or M vector.
        npts = len(self.x)
        nq   = nr
        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
        if sqrt_alpha<0.0:
            nq = 0

        # If we need to fit the background, add a term
        if self.has_bck==True:
            nfunc_0 = nfunc
            nfunc += 1

        a = numpy.zeros([npts+nq, nfunc])
        b = numpy.zeros(npts+nq)
        err = numpy.zeros([nfunc, nfunc])
        
        # Construct the a matrix and b vector that represent the problem
        t_0 = time.time()
        self._get_matrix(nfunc, nq, a, b)
             
        # Perform the inversion (least square fit)
        c, chi2, rank, n = lstsq(a, b)
        # Sanity check
        try:
            float(chi2)
        except:
            chi2 = -1.0
        self.chi2 = chi2
                
        inv_cov = numpy.zeros([nfunc,nfunc])
        # Get the covariance matrix, defined as inv_cov = a_transposed * a
        self._get_invcov_matrix(nfunc, nr, a, inv_cov)
                    
        # Compute the reg term size for the output
        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)
                    
        if math.fabs(self.alpha)>0:
            new_alpha = sum_sig/(sum_reg/self.alpha)
        else:
            new_alpha = 0.0
        self.suggested_alpha = new_alpha
        
        try:
            cov = numpy.linalg.pinv(inv_cov)
            err = math.fabs(chi2/float(npts-nfunc)) * cov
        except:
            # We were not able to estimate the errors
            # Return an empty error matrix
            pass
            
        # Keep a copy of the last output
        if self.has_bck==False:
            self.background = 0
            self.out = c
            self.cov = err
        else:
            self.background = c[0]
            
            err_0 = numpy.zeros([nfunc, nfunc])
            c_0 = numpy.zeros(nfunc)
            
            for i in range(nfunc_0):
                c_0[i] = c[i+1]
                for j in range(nfunc_0):
                    err_0[i][j] = err[i+1][j+1]
                    
            self.out = c_0
            self.cov = err_0
            
        return self.out, self.cov
        
    def estimate_numterms(self, isquit_func=None):
        """
            Returns a reasonable guess for the
            number of terms
            @param isquit_func: reference to thread function to call to 
                                check whether the computation needs to
                                be stopped.
            
            @return: number of terms, alpha, message
        """
        from num_term import Num_terms
        estimator = Num_terms(self.clone())
        try:
            return estimator.num_terms(isquit_func)
        except:
            # If we fail, estimate alpha and return the default
            # number of terms 
            best_alpha, message, elapsed =self.estimate_alpha(self.nfunc)
            return self.nfunc, best_alpha, "Could not estimate number of terms"
                    
    def estimate_alpha(self, nfunc):
        """
            Returns a reasonable guess for the
            regularization constant alpha
            
            @param nfunc: number of terms to use in the expansion.
            @return: alpha, message, elapsed
            
            where alpha is the estimate for alpha,
            message is a message for the user,
            elapsed is the computation time
        """
        import time
        try:            
            pr = self.clone()
            
            # T_0 for computation time
            starttime = time.time()
            elapsed = 0
            
            # If the current alpha is zero, try
            # another value
            if pr.alpha<=0:
                pr.alpha = 0.0001
                 
            # Perform inversion to find the largest alpha
            out, cov = pr.invert(nfunc)
            elapsed = time.time()-starttime
            initial_alpha = pr.alpha
            initial_peaks = pr.get_peaks(out)
    
            # Try the inversion with the estimated alpha
            pr.alpha = pr.suggested_alpha
            out, cov = pr.invert(nfunc)
    
            npeaks = pr.get_peaks(out)
            # if more than one peak to start with
            # just return the estimate
            if npeaks>1:
                #message = "Your P(r) is not smooth, please check your inversion parameters"
                message = None
                return pr.suggested_alpha, message, elapsed
            else:
                
                # Look at smaller values
                # We assume that for the suggested alpha, we have 1 peak
                # if not, send a message to change parameters
                alpha = pr.suggested_alpha
                best_alpha = pr.suggested_alpha
                found = False
                for i in range(10):
                    pr.alpha = (0.33)**(i+1)*alpha
                    out, cov = pr.invert(nfunc)
                    
                    peaks = pr.get_peaks(out)
                    if peaks>1:
                        found = True
                        break
                    best_alpha = pr.alpha
                    
                # If we didn't find a turning point for alpha and
                # the initial alpha already had only one peak,
                # just return that
                if not found and initial_peaks==1 and initial_alpha<best_alpha:
                    best_alpha = initial_alpha
                    
                # Check whether the size makes sense
                message=''
                
                if not found:
                    message = "None"
                elif best_alpha>=0.5*pr.suggested_alpha:
                    # best alpha is too big, return a 
                    # reasonable value
                    message  = "The estimated alpha for your system is too large. "
                    message += "Try increasing your maximum distance."
                
                return best_alpha, message, elapsed
    
        except:
            message = "Invertor.estimate_alpha: %s" % sys.exc_value
            return 0, message, elapsed
    
        
    def to_file(self, path, npts=100):
        """
            Save the state to a file that will be readable
            by SliceView.
            @param path: path of the file to write
            @param npts: number of P(r) points to be written
        """
        import pylab
        
        file = open(path, 'w')
        file.write("#d_max=%g\n" % self.d_max)
        file.write("#nfunc=%g\n" % self.nfunc)
        file.write("#alpha=%g\n" % self.alpha)
        file.write("#chi2=%g\n" % self.chi2)
        file.write("#elapsed=%g\n" % self.elapsed)
        file.write("#qmin=%s\n" % str(self.q_min))
        file.write("#qmax=%s\n" % str(self.q_max))
        file.write("#slit_height=%g\n" % self.slit_height)
        file.write("#slit_width=%g\n" % self.slit_width)
        file.write("#background=%g\n" % self.background)
        if self.has_bck==True:
            file.write("#has_bck=1\n")
        else:
            file.write("#has_bck=0\n")
        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
        if not self.out==None:
            if len(self.out)==len(self.cov):
                for i in range(len(self.out)):
                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]), str(self.cov[i][i])))
        file.write("<r>  <Pr>  <dPr>\n")
        r = pylab.arange(0.0, self.d_max, self.d_max/npts)
        
        for r_i in r:
            (value, err) = self.pr_err(self.out, self.cov, r_i)
            file.write("%g  %g  %g\n" % (r_i, value, err))
    
        file.close()
     
        
    def from_file(self, path):
        """
            Load the state of the Invertor from a file,
            to be able to generate P(r) from a set of
            parameters.
            @param path: path of the file to load
        """
        import os
        import re
        if os.path.isfile(path):
            try:
                fd = open(path, 'r')
                
                buff    = fd.read()
                lines   = buff.split('\n')
                for line in lines:
                    if line.startswith('#d_max='):
                        toks = line.split('=')
                        self.d_max = float(toks[1])
                    elif line.startswith('#nfunc='):
                        toks = line.split('=')
                        self.nfunc = int(toks[1])
                        self.out = numpy.zeros(self.nfunc)
                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
                    elif line.startswith('#alpha='):
                        toks = line.split('=')
                        self.alpha = float(toks[1])
                    elif line.startswith('#chi2='):
                        toks = line.split('=')
                        self.chi2 = float(toks[1])
                    elif line.startswith('#elapsed='):
                        toks = line.split('=')
                        self.elapsed = float(toks[1])
                    elif line.startswith('#alpha_estimate='):
                        toks = line.split('=')
                        self.suggested_alpha = float(toks[1])
                    elif line.startswith('#qmin='):
                        toks = line.split('=')
                        try:
                            self.q_min = float(toks[1])
                        except:
                            self.q_min = None
                    elif line.startswith('#qmax='):
                        toks = line.split('=')
                        try:
                            self.q_max = float(toks[1])
                        except:
                            self.q_max = None
                    elif line.startswith('#slit_height='):
                        toks = line.split('=')
                        self.slit_height = float(toks[1])
                    elif line.startswith('#slit_width='):
                        toks = line.split('=')
                        self.slit_width = float(toks[1])
                    elif line.startswith('#background='):
                        toks = line.split('=')
                        self.background = float(toks[1])
                    elif line.startswith('#has_bck='):
                        toks = line.split('=')
                        if int(toks[1])==1:
                            self.has_bck=True
                        else:
                            self.has_bck=False
            
                    # Now read in the parameters
                    elif line.startswith('#C_'):
                        toks = line.split('=')
                        p = re.compile('#C_([0-9]+)')
                        m = p.search(toks[0])
                        toks2 = toks[1].split('+-')
                        i = int(m.group(1))
                        self.out[i] = float(toks2[0])
                        
                        self.cov[i][i] = float(toks2[1])                        
            
            except:
                raise RuntimeError, "Invertor.from_file: corrupted file\n%s" % sys.exc_value
        else:
            raise RuntimeError, "Invertor.from_file: '%s' is not a file" % str(path) 
        
        
    
    
if __name__ == "__main__":
    o = Invertor()