/*
 *******************************************************************************
 *
 *
 *                       Copyright (c) 2002
 *                       Henrik Vestermark
 *                       Denmark
 *
 *                       All Rights Reserved
 *
 *   This source file is subject to the terms and conditions of the
 *   Henrik Vestermark Software License Agreement which restricts the manner
 *   in which it may be used.
 *
 *******************************************************************************
 *
 *
 * Module name     :   cpoly.cpp
 * Module ID Nbr   :   
 * Description     :   cpoly.cpp -- Jenkins-Traub real polynomial root finder.
 *                     Translation of TOMS493 from FORTRAN to C. This
 *                     implementation of Jenkins-Traub partially adapts
 *                     the original code to a C environment by restruction
 *                     many of the 'goto' controls to better fit a block
 *                     structured form. It also eliminates the global memory
 *                     allocation in favor of local, dynamic memory management.
 *
 *                     The calling conventions are slightly modified to return
 *                     the number of roots found as the function value.
 *
 *                     INPUT:
 *                     opr - double precision vector of real coefficients in order of
 *                          decreasing powers.
 *                     opi - double precision vector of imaginary coefficients in order of
 *                          decreasing powers.
 *                     degree - integer degree of polynomial
 *
 *                     OUTPUT:
 *                     zeror,zeroi - output double precision vectors of the
 *                          real and imaginary parts of the zeros.
 *                            to be consistent with rpoly.cpp the zeros is inthe index
 *                            [0..max_degree-1]
 *
 *                     RETURN:
 *                     returnval:   -1 if leading coefficient is zero, otherwise
 *                          number of roots found. 
 * --------------------------------------------------------------------------
 * Change Record   :   
 *
 * Version	Author/Date		Description of changes
 * -------  -----------		----------------------
 * 01.01		HVE/021101		Initial release
 *
 * -- SRK changed name of *pi to *piw (and all occurances in this file) to avoid the conflict with pi in Apple's fp.h
 *
 * End of Change Record
 * --------------------------------------------------------------------------
 */


/* define version string */

#include <stdlib.h>
#include <math.h>
#include <float.h>

static double sr, si, tr, ti, pvr, pvi, are, mre, eta, infin;
static int nn;
static double *pr, *piw, *hr, *hi, *qpr, *qpi, *qhr, *qhi, *shr, *shi; 

static void noshft( const int l1 );
static void fxshft( const int l2, double *zr, double *zi, int *conv );
static void vrshft( const int l3, double *zr, double *zi, int *conv );
static void calct( int *bol );
static void nexth( const int bol );
static void polyev( const int nn, const double sr, const double si, const double pr[], const double piw[], double qr[], double qi[], double *pvr, double *pvi );
static double errev( const int nn, const double qr[], const double qi[], const double ms, const double mp, const double are, const double mre );
static void cauchy( const int nn, double pt[], double q[], double *fn_val );
static double scale( const int nn, const double pt[], const double eta, const double infin, const double smalno, const double base );
static void cdivid( const double ar, const double ai, const double br, const double bi, double *cr, double *ci );
static double cmod( const double r, const double i );
static void mcon( double *eta, double *infiny, double *smalno, double *base );

int cpoly( const double *opr, const double *opi, int degree, double *zeror, double *zeroi ) 
{
	int cnt1, cnt2, idnn2, i, conv;
	double xx, yy, cosr, sinr, smalno, base, xxx, zr, zi, bnd;
	
	mcon( &eta, &infin, &smalno, &base );
	are = eta;
	mre = 2.0 * sqrt( 2.0 ) * eta;
	xx = 0.70710678;
	yy = -xx;
	cosr = -0.060756474;
	sinr = -0.99756405;
	nn = degree;  
	
	// Algorithm fails if the leading coefficient is zero
	if( opr[ 0 ] == 0 && opi[ 0 ] == 0 )
		return -1;
	
	// Allocate arrays
	pr = malloc( sizeof( double ) * ( degree + 1 ) );
	piw = malloc( sizeof( double ) * ( degree + 1 ) );
	hr =malloc( sizeof( double ) * ( degree + 1 ) );
	hi = malloc( sizeof( double ) * ( degree + 1 ) );
	qpr= malloc( sizeof( double ) * ( degree + 1 ) );
	qpi= malloc( sizeof( double ) * ( degree + 1 ) );
	qhr= malloc( sizeof( double ) * ( degree + 1 ) );
	qhi= malloc( sizeof( double ) * ( degree + 1 ) );
	shr= malloc( sizeof( double ) * ( degree + 1 ) );
	shi= malloc( sizeof( double ) * ( degree + 1 ) );
	
	// Remove the zeros at the origin if any
	while( opr[ nn ] == 0 && opi[ nn ] == 0 )
	{
		idnn2 = degree - nn;
		zeror[ idnn2 ] = 0;
		zeroi[ idnn2 ] = 0;
		nn--;
	}
	
	// Make a copy of the coefficients
	for( i = 0; i <= nn; i++ )
	{
		pr[ i ] = opr[ i ];
		piw[ i ] = opi[ i ];
		shr[ i ] = cmod( pr[ i ], piw[ i ] );
	}
	
	// Scale the polynomial
	bnd = scale( nn, shr, eta, infin, smalno, base );
	if( bnd != 1 )
		for( i = 0; i <= nn; i++ )
		{
			pr[ i ] *= bnd;
			piw[ i ] *= bnd;
		}
	
search: 
	if( nn <= 1 )
	{
		cdivid( -pr[ 1 ], -piw[ 1 ], pr[ 0 ], piw[ 0 ], &zeror[ degree-1 ], &zeroi[ degree-1 ] );
		goto finish;
	}
	
	// Calculate bnd, alower bound on the modulus of the zeros
	for( i = 0; i<= nn; i++ )
		shr[ i ] = cmod( pr[ i ], piw[ i ] );
	
	cauchy( nn, shr, shi, &bnd );
	
	// Outer loop to control 2 Major passes with different sequences of shifts
	for( cnt1 = 1; cnt1 <= 2; cnt1++ )
	{
		// First stage  calculation , no shift
		noshft( 5 );
		
		// Inner loop to select a shift
		for( cnt2 = 1; cnt2 <= 9; cnt2++ )
		{
			// Shift is chosen with modulus bnd and amplitude rotated by 94 degree from the previous shif
			xxx = cosr * xx - sinr * yy;
			yy = sinr * xx + cosr * yy;
			xx = xxx;
			sr = bnd * xx;
			si = bnd * yy;
			
			// Second stage calculation, fixed shift
			fxshft( 10 * cnt2, &zr, &zi, &conv );
			if( conv )
            {
				// The second stage jumps directly to the third stage ieration
				// If successful the zero is stored and the polynomial deflated
				idnn2 = degree - nn;
				zeror[ idnn2 ] = zr;
				zeroi[ idnn2 ] = zi;
				nn--;
				for( i = 0; i <= nn; i++ )
				{
					pr[ i ] = qpr[ i ];
					piw[ i ] = qpi[ i ];
				}
				goto search;
            }
			// If the iteration is unsuccessful another shift is chosen
		}
		// if 9 shifts fail, the outer loop is repeated with another sequence of shifts
	}
	
	// The zerofinder has failed on two major passes
	// return empty handed with the number of roots found (less than the original degree)
	degree -= nn;
	
finish:
	// Deallocate arrays
	free( pr );
	free( piw );
	free( hr );
	free( hi );
	free( qpr );
	free( qpi );
	free( qhr );
	free( qhi );
	free( shr );
	free( shi );
	
	return degree;       
}


// COMPUTES  THE DERIVATIVE  POLYNOMIAL AS THE INITIAL H
// POLYNOMIAL AND COMPUTES L1 NO-SHIFT H POLYNOMIALS.
//
static void noshft( const int l1 )
{
	int i, j, jj, n, nm1;
	double xni, t1, t2;
	
	n = nn;
	nm1 = n - 1;
	for( i = 0; i < n; i++ )
	{
		xni = nn - i;
		hr[ i ] = xni * pr[ i ] / n;
		hi[ i ] = xni * piw[ i ] / n;
	}
	for( jj = 1; jj <= l1; jj++ )
	{
		if( cmod( hr[ n - 1 ], hi[ n - 1 ] ) > eta * 10 * cmod( pr[ n - 1 ], piw[ n - 1 ] ) )
		{
			cdivid( -pr[ nn ], -piw[ nn ], hr[ n - 1 ], hi[ n - 1 ], &tr, &ti );
			for( i = 0; i < nm1; i++ )
            {
				j = nn - i - 1;
				t1 = hr[ j - 1 ];
				t2 = hi[ j - 1 ];
				hr[ j ] = tr * t1 - ti * t2 + pr[ j ];
				hi[ j ] = tr * t2 + ti * t1 + piw[ j ];
            }
			hr[ 0 ] = pr[ 0 ];
			hi[ 0 ] = piw[ 0 ];
		}
		else
		{
			// If the constant term is essentially zero, shift H coefficients
			for( i = 0; i < nm1; i++ )
            {
				j = nn - i - 1;
				hr[ j ] = hr[ j - 1 ];
				hi[ j ] = hi[ j - 1 ];
            }
			hr[ 0 ] = 0;
			hi[ 0 ] = 0;
		}
	}
}

// COMPUTES L2 FIXED-SHIFT H POLYNOMIALS AND TESTS FOR CONVERGENCE.
// INITIATES A VARIABLE-SHIFT ITERATION AND RETURNS WITH THE
// APPROXIMATE ZERO IF SUCCESSFUL.
// L2 - LIMIT OF FIXED SHIFT STEPS
// ZR,ZI - APPROXIMATE ZERO IF CONV IS .TRUE.
// CONV  - LOGICAL INDICATING CONVERGENCE OF STAGE 3 ITERATION
//
static void fxshft( const int l2, double *zr, double *zi, int *conv )
{
	int i, j, n;
	int test, pasd, bol;
	double otr, oti, svsr, svsi;
	
	n = nn;
	polyev( nn, sr, si, pr, piw, qpr, qpi, &pvr, &pvi );
	test = 1;
	pasd = 0;
	
	// Calculate first T = -P(S)/H(S)
	calct( &bol );
	
	// Main loop for second stage
	for( j = 1; j <= l2; j++ )
	{
		otr = tr;
		oti = ti;
		
		// Compute the next H Polynomial and new t
		nexth( bol );
		calct( &bol );
		*zr = sr + tr;
		*zi = si + ti;
		
		// Test for convergence unless stage 3 has failed once or this
		// is the last H Polynomial
		if( !( bol || !test || j == 12 ) )
			if( cmod( tr - otr, ti - oti ) < 0.5 * cmod( *zr, *zi ) )
            {
				if( pasd )
				{
					// The weak convergence test has been passwed twice, start the third stage
					// Iteration, after saving the current H polynomial and shift
					for( i = 0; i < n; i++ )
					{
						shr[ i ] = hr[ i ];
						shi[ i ] = hi[ i ];
					}
					svsr = sr;
					svsi = si;
					vrshft( 10, zr, zi, conv );
					if( *conv ) return;
					
					//The iteration failed to converge. Turn off testing and restore h,s,pv and T
					test = 0;
					for( i = 0; i < n; i++ )
					{
						hr[ i ] = shr[ i ];
						hi[ i ] = shi[ i ];
					}
					sr = svsr;
					si = svsi;
					polyev( nn, sr, si, pr, piw, qpr, qpi, &pvr, &pvi );
					calct( &bol );
					continue;
				}
				pasd = 1;
            }
			else
				pasd = 0;
	}
	
	// Attempt an iteration with final H polynomial from second stage
	vrshft( 10, zr, zi, conv );
}

// CARRIES OUT THE THIRD STAGE ITERATION.
// L3 - LIMIT OF STEPS IN STAGE 3.
// ZR,ZI   - ON ENTRY CONTAINS THE INITIAL ITERATE, IF THE
//           ITERATION CONVERGES IT CONTAINS THE FINAL ITERATE ON EXIT.
// CONV    -  .TRUE. IF ITERATION CONVERGES
//
static void vrshft( const int l3, double *zr, double *zi, int *conv )
{
	int b, bol;
	int i, j;
	double mp, ms, omp, relstp, r1, r2, tp;
	
	*conv = 0;
	b = 0;
	sr = *zr;
	si = *zi;
	
	// Main loop for stage three
	for( i = 1; i <= l3; i++ )
	{
		// Evaluate P at S and test for convergence
		polyev( nn, sr, si, pr, piw, qpr, qpi, &pvr, &pvi );
		mp = cmod( pvr, pvi );
		ms = cmod( sr, si );
		if( mp <= 20 * errev( nn, qpr, qpi, ms, mp, are, mre ) )
		{
			// Polynomial value is smaller in value than a bound onthe error
			// in evaluationg P, terminate the ietartion
			*conv = 1;
			*zr = sr;
			*zi = si;
			return;
		}
		if( i != 1 )
		{
			if( !( b || mp < omp || relstp >= 0.05 ) )
            {
				// Iteration has stalled. Probably a cluster of zeros. Do 5 fixed 
				// shift steps into the cluster to force one zero to dominate
				tp = relstp;
				b = 1;
				if( relstp < eta ) tp = eta;
				r1 = sqrt( tp );
				r2 = sr * ( 1 + r1 ) - si * r1;
				si = sr * r1 + si * ( 1 + r1 );
				sr = r2;
				polyev( nn, sr, si, pr, piw, qpr, qpi, &pvr, &pvi );
				for( j = 1; j <= 5; j++ )
				{
					calct( &bol );
					nexth( bol );
				}
				omp = infin;
				goto _20;
            }
			
			// Exit if polynomial value increase significantly
			if( mp *0.1 > omp ) return;
		}
		
		omp = mp;
		
		// Calculate next iterate
	_20:  calct( &bol );
		nexth( bol );
		calct( &bol );
		if( !bol )
		{
			relstp = cmod( tr, ti ) / cmod( sr, si );
			sr += tr;
			si += ti;
		}
	}
}

// COMPUTES  T = -P(S)/H(S).
// BOOL   - LOGICAL, SET TRUE IF H(S) IS ESSENTIALLY ZERO.
static void calct( int *bol )
{
	int n;
	double hvr, hvi;
	
	n = nn;
	
	// evaluate h(s)
	polyev( n - 1, sr, si, hr, hi, qhr, qhi, &hvr, &hvi );
	*bol = cmod( hvr, hvi ) <= are * 10 * cmod( hr[ n - 1 ], hi[ n - 1 ] ) ? 1 : 0;
	if( !*bol )
	{
		cdivid( -pvr, -pvi, hvr, hvi, &tr, &ti );
		return;
	}
	
	tr = 0;
	ti = 0;
}

// CALCULATES THE NEXT SHIFTED H POLYNOMIAL.
// BOOL   -  LOGICAL, IF .TRUE. H(S) IS ESSENTIALLY ZERO
//
static void nexth( const int bol )
{
	int j, n;
	double t1, t2;
	
	n = nn;
	if( !bol )
	{
		for( j = 1; j < n; j++ )
		{
			t1 = qhr[ j - 1 ];
			t2 = qhi[ j - 1 ];
			hr[ j ] = tr * t1 - ti * t2 + qpr[ j ];
			hi[ j ] = tr * t2 + ti * t1 + qpi[ j ];
		}
		hr[ 0 ] = qpr[ 0 ];
		hi[ 0 ] = qpi[ 0 ];
		return;
	}
	
	// If h[s] is zero replace H with qh
	for( j = 1; j < n; j++ )
	{
		hr[ j ] = qhr[ j - 1 ];
		hi[ j ] = qhi[ j - 1 ];
	}
	hr[ 0 ] = 0;
	hi[ 0 ] = 0;
}

// EVALUATES A POLYNOMIAL  P  AT  S  BY THE HORNER RECURRENCE
// PLACING THE PARTIAL SUMS IN Q AND THE COMPUTED VALUE IN PV.
//  
static void polyev( const int nn, const double sr, const double si, const double pr[], const double piw[], double qr[], double qi[], double *pvr, double *pvi )  
{
	int i;
	double t;
	
	qr[ 0 ] = pr[ 0 ];
	qi[ 0 ] = piw[ 0 ];
	*pvr = qr[ 0 ];
	*pvi = qi[ 0 ];
	
	for( i = 1; i <= nn; i++ )
	{
		t = ( *pvr ) * sr - ( *pvi ) * si + pr[ i ];
		*pvi = ( *pvr ) * si + ( *pvi ) * sr + piw[ i ];
		*pvr = t;
		qr[ i ] = *pvr;
		qi[ i ] = *pvi;
	}
}

// BOUNDS THE ERROR IN EVALUATING THE POLYNOMIAL BY THE HORNER RECURRENCE.
// QR,QI - THE PARTIAL SUMS
// MS    -MODULUS OF THE POINT
// MP    -MODULUS OF POLYNOMIAL VALUE
// ARE, MRE -ERROR BOUNDS ON COMPLEX ADDITION AND MULTIPLICATION
//
static double errev( const int nn, const double qr[], const double qi[], const double ms, const double mp, const double are, const double mre )
{
	int i;
	double e;
	
	e = cmod( qr[ 0 ], qi[ 0 ] ) * mre / ( are + mre );
	for( i = 0; i <= nn; i++ )
		e = e * ms + cmod( qr[ i ], qi[ i ] );
	
	return e * ( are + mre ) - mp * mre;
}

// CAUCHY COMPUTES A LOWER BOUND ON THE MODULI OF THE ZEROS OF A
// POLYNOMIAL - PT IS THE MODULUS OF THE COEFFICIENTS.
//
static void cauchy( const int nn, double pt[], double q[], double *fn_val )
{
	int i, n;
	double x, xm, f, dx, df;
	
	pt[ nn ] = -pt[ nn ];
	
	// Compute upper estimate bound
	n = nn;
	x = exp( log( -pt[ nn ] ) - log( pt[ 0 ] ) ) / n;
	if( pt[ n - 1 ] != 0 )
	{
		// Newton step at the origin is better, use it
		xm = -pt[ nn ] / pt[ n - 1 ];
		if( xm < x ) x = xm;
	}
	
	// Chop the interval (0,x) until f < 0
	while(1)
	{
		xm = x * 0.1;
		f = pt[ 0 ];
		for( i = 1; i <= nn; i++ )
			f = f * xm + pt[ i ];
		if( f <= 0 )
			break;
		x = xm;
	}
	dx = x;
	
	// Do Newton iteration until x converges to two decimal places
	while( fabs( dx / x ) > 0.005 )
	{
		q[ 0 ] = pt[ 0 ];
		for( i = 1; i <= nn; i++ )
			q[ i ] = q[ i - 1 ] * x + pt[ i ];
		f = q[ nn ];
		df = q[ 0 ];
		for( i = 1; i < n; i++ )
			df = df * x + q[ i ];
		dx = f / df;
		x -= dx;
	}
	
	*fn_val = x;
}

// RETURNS A SCALE FACTOR TO MULTIPLY THE COEFFICIENTS OF THE POLYNOMIAL.
// THE SCALING IS DONE TO AVOID OVERFLOW AND TO AVOID UNDETECTED UNDERFLOW
// INTERFERING WITH THE CONVERGENCE CRITERION.  THE FACTOR IS A POWER OF THE
// BASE.
// PT - MODULUS OF COEFFICIENTS OF P
// ETA, INFIN, SMALNO, BASE - CONSTANTS DESCRIBING THE FLOATING POINT ARITHMETIC.
//
static double scale( const int nn, const double pt[], const double eta, const double infin, const double smalno, const double base )
{
	int i, l;
	double hi, lo, max, min, x, sc;
	double fn_val;
	
	// Find largest and smallest moduli of coefficients
	hi = sqrt( infin );
	lo = smalno / eta;
	max = 0;
	min = infin;
	
	for( i = 0; i <= nn; i++ )
	{
		x = pt[ i ];
		if( x > max ) max = x;
		if( x != 0 && x < min ) min = x;
	}
	
	// Scale only if there are very large or very small components
	fn_val = 1;
	if( min >= lo && max <= hi ) return fn_val;
	x = lo / min;
	if( x <= 1 )
		sc = 1 / ( sqrt( max )* sqrt( min ) );
	else
	{
		sc = x;
		if( infin / sc > max ) sc = 1;
	}
	l = (int)( log( sc ) / log(base ) + 0.5 );
	fn_val = pow( base , l );
	return fn_val;
}

// COMPLEX DIVISION C = A/B, AVOIDING OVERFLOW.
//
static void cdivid( const double ar, const double ai, const double br, const double bi, double *cr, double *ci )
{
	double r, d, t, infin;
	
	if( br == 0 && bi == 0 )
	{
		// Division by zero, c = infinity
		mcon( &t, &infin, &t, &t );
		*cr = infin;
		*ci = infin;
		return;
	}
	
	if( fabs( br ) < fabs( bi ) )
	{
		r = br/ bi;
		d = bi + r * br;
		*cr = ( ar * r + ai ) / d;
		*ci = ( ai * r - ar ) / d;
		return;
	}
	
	r = bi / br;
	d = br + r * bi;
	*cr = ( ar + ai * r ) / d;
	*ci = ( ai - ar * r ) / d;
}

// MODULUS OF A COMPLEX NUMBER AVOIDING OVERFLOW.
//
static double cmod( const double r, const double i )
{
	double ar, ai;
	
	ar = fabs( r );
	ai = fabs( i );
	if( ar < ai )
		return ai * sqrt( 1.0 + pow( ( ar / ai ), 2.0 ) );
	
	if( ar > ai )
		return ar * sqrt( 1.0 + pow( ( ai / ar ), 2.0 ) );
	
	return ar * sqrt( 2.0 );
}

// MCON PROVIDES MACHINE CONSTANTS USED IN VARIOUS PARTS OF THE PROGRAM.
// THE USER MAY EITHER SET THEM DIRECTLY OR USE THE STATEMENTS BELOW TO
// COMPUTE THEM. THE MEANING OF THE FOUR CONSTANTS ARE -
// ETA       THE MAXIMUM RELATIVE REPRESENTATION ERROR WHICH CAN BE DESCRIBED
//           AS THE SMALLEST POSITIVE FLOATING-POINT NUMBER SUCH THAT
//           1.0_dp + ETA &gt; 1.0.
// INFINY    THE LARGEST FLOATING-POINT NUMBER
// SMALNO    THE SMALLEST POSITIVE FLOATING-POINT NUMBER
// BASE      THE BASE OF THE FLOATING-POINT NUMBER SYSTEM USED
//
static void mcon( double *eta, double *infiny, double *smalno, double *base )
{
	*base = 10;//DBL_RADIX;
	*eta = DBL_EPSILON;
	*infiny = DBL_MAX;
	*smalno = DBL_MIN;
}