
/* tests/test-fields.C
 * Written by Dan Roche
 * Copyright (C) June 2004 Dan Roche
 *
 * ========LICENCE========
 * This file is part of the library LinBox.
 *
 * LinBox is free software: you can redistribute it and/or modify
 * it under the terms of the  GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * ========LICENCE========
 */


/*! @file  tests/benchmark-fields.C
 * @ingroup tests
 * @brief  no doc
 */

#include "linbox/linbox-config.h"
#include "linbox/util/timer.h"
// #include "linbox/field/givaro-gfq.h"

#ifdef __LINBOX_HAVE_NTL
#include "linbox/ring/ntl.h"
#include "linbox/ring/pid-ntl-lzz_p.h"
#include "linbox/ring/pir-ntl-zz_p.h"
#endif

#include "linbox/field/modular.h"
#include <givaro/modular-balanced.h>
#include "linbox/field/Modular/modular-crooked-double.h"
#include "linbox/field/field-traits.h"
#include "linbox/vector/stream.h"
#include "linbox/integer.h"
#include "linbox/ring/pir-modular-int32.h"
// #include "linbox/field/gf2.h"
#include "linbox/field/gmp-rational.h"
#include "linbox/ring/local2_32.h"

#ifdef __LINBOX_HAVE_LIDIA
#include "linbox/field/lidia.h"
#endif

#include <iostream>
#include <iomanip>


using namespace LinBox;

/* fieldTest is a template function to test out the performance of a given field on a
 * machine.  Taken are three arguments.  The first is a field class object.  The second
 * is an array, declared but not necessarily initialized, of ten doubles.  The
 * first nine values will be filled with mops for add, sub, neg, mul, int, div,
 * axpy, dot1, and dot2, respectively.  (Dot1 is dense*dense, Dot2 is dense*sparse).
 * The last value is filled with mops for walking through an array of size iter.
 * The third argument is optional and specifies how many loop iterations to use.
 */

template< class Field >
void fieldTest( const Field& f, double* array, long iter = 1000000, bool fulltest = false )
{

	size_t vectorSize = 10000;
	float sparsity = .01f;
	int i;

	// initialize a few field elements,
	typedef typename Field::Element Element;
	Element returnValue; f.assign(returnValue, f.one);
	Element s; f.assign(s, f.zero);

	Element a, b, c;
	typename Field::RandIter r(f);
	r.random( a ); r.random( b ); r.random( c );
	BlasVector<Field> dv1( f,  vectorSize ), dv2( f, vectorSize );
	for (i = 0; i <(int) vectorSize; ++i ) {
		r.random( dv1[(size_t)i] );
		r.random( dv2[(size_t)i] );
	}
	RandomSparseStream<Field> sparse( f, sparsity, vectorSize );
	typename RandomSparseStream<Field>::Vector sv; sparse.get( sv );

#if 0
	// initialize and fill array of random elements.
	typename Field::RandIter gen(f);
	typename Field::Element *elements;
	elements = new typename Field::Element[ iter * 3 ];
	for( int i = 0; i < iter*3; i++ ) {
		do { r.random( elements[(size_t)i] ); }
		while( f.isZero( elements[(size_t)i] ) );
	}

	// initialize random vector streams
	RandomDenseStream<Field> dense( f, gen, vectorSize, 2);
	typename RandomDenseStream<Field>::Vector dv1; dense.get( dv1 );
	typename RandomDenseStream<Field>::Vector dv2; dense.get( dv2 );
	RandomSparseStream<Field> sparse( f, gen, sparsity, vectorSize );
	typename RandomSparseStream<Field>::Vector sv; sparse.get( sv );

	RandomDenseStream<Field> dense1( f, gen, vectorSize, iter/vectorSize );
	RandomDenseStream<Field> dense2( f, gen, vectorSize, iter/vectorSize );
	RandomSparseStream<Field> sparse( f, sparsity, vectorSize );

	// initialize individual vectors to hold results
	typename RandomDenseStream<Field>::Vector dv1;
	typename RandomDenseStream<Field>::Vector dv2;
	typename RandomSparseStream<Field>::Vector sv;
#endif

	VectorWrapper::ensureDim (dv1,vectorSize);
	VectorWrapper::ensureDim (dv2,vectorSize);
	VectorWrapper::ensureDim (sv,vectorSize);

	VectorDomain<Field> VD( f );

	UserTimer timer;
	double overHeadTime;

	timer.clear(); timer.start();
	f.assign(s, f.zero);
	for( i = 0; i < iter; i++ ) { f.init(returnValue, i); f.addin(s, returnValue); }
	timer.stop(); overHeadTime = timer.time();

	// add
	timer.clear(); timer.start();
	for( i = 0; i < iter; i++ ) {
		f.init(a, i);
		f.add( returnValue, a, b);
		f.addin(s, returnValue);
	}
	timer.stop(); array[0] = timer.time() - overHeadTime;
	//std::cout << iter << " add done " << array[0] << std::endl;

	if (fulltest) {
		// sub
		timer.clear(); timer.start();
		for( i = 0; i < iter; i++ ) {
			f.init(a, i);
			f.sub( returnValue, a, b);
			f.addin(s, returnValue);
		}
		timer.stop(); array[1] = timer.time() - overHeadTime;

		// neg
		timer.clear(); timer.start();
		for( i = 0; i < iter; i++ ) {
			f.init(a, i);
			f.neg( returnValue, a);
			f.addin(s, returnValue);
		}
		timer.stop(); array[2] = timer.time() - overHeadTime;
	} // end if (fulltest)

	// mul
	timer.clear(); timer.start();
	for( i = 0; i < iter; i++ ) {
		f.init(a, i);
		f.mul( returnValue, a, b);
		f.addin(s, returnValue);
	}
	timer.stop(); array[3] = timer.time() - overHeadTime;
	//std::cout << iter << " mul done " << array[3] << std::endl;

	if (fulltest) {
		// inv
		timer.clear(); timer.start();
		for( i = 0; i < iter; i++ ) {
			f.init(a, i);  if (f.isZero(a)) f.assign(a, f.one);
			f.inv( returnValue, a);
			f.addin(s, returnValue);
		}
		timer.stop(); array[4] = timer.time() - overHeadTime;

		// div
		timer.clear(); timer.start();
		for( i = 0; i < iter; i++ ) {
			f.init(a, i);
			f.div( returnValue, a, b);
			f.addin(s, returnValue);
		}
		timer.stop(); array[5] = timer.time() - overHeadTime;
	} // end if (fulltest)

	// axpy
	timer.clear(); timer.start();
	for( i = 0; i < iter; i++ ) {
		f.init(a, i);
		f.axpy( returnValue, a, b, c);
		f.addin(s, returnValue);
	}
	timer.stop(); array[6] = timer.time() - overHeadTime;
	//std::cout << timer.time() << "  - " << overHeadTime << " = " << array[6] << std::endl;;

	// DotProduct1 ( dense * dense )
	timer.clear(); timer.start();
	for( i = 0; i < iter/(int)vectorSize; i++ ) {
		f.init(dv1.back(), i);
		VD.dot( returnValue, dv1, dv2 );
		f.addin(s, returnValue);
	}
	timer.stop(); array[7] = timer.time();
	//	std::cout << (iter/vectorSize) << " dd " << timer.time() << std::endl;;

	if (fulltest) {
		// DotProduct2 ( dense * sparse )
		timer.clear(); timer.start();
		for( i = 0; i < iter/(int)vectorSize; i++ ) {
			f.init(dv1.back(), i);
			long sparsity_inv = 100;
			for ( int j = 0; j < (int)sparsity_inv; ++ j ) {
				f.init(dv1.front(), j);
				VD.dot( returnValue, dv1, sv );
				f.addin(s, returnValue);
			}
		}
		timer.stop(); array[8] = timer.time();
		//std::cout << "ds " << timer.time() << std::endl;;
	} // end if (fulltest)

	// Convert timings to mops (million operations per second)
	for( i = 0; i < 9; i++ ) {
		double t = array[(size_t)i];
		array[(size_t)i] = (double)iter / (t > 0 ? (t * 1000000) : 0) ;
	}
	// use s (just in case compiler cares)
	if (f.isZero(s)) std::cout << "zero sum" << std::endl;
}

/* This simple test takes and int and a float as arguments, only to make
 * sure the compiler does not optimize too much to make the test useless.
 * The number returned is the number of times per second the inner loop
 * (one floating-point and one int operation) can be executed on the current
 * machine.
 */
int64_t getOps(int unit)
{
	int64_t ops = 1;
	int64_t i = 0;
	int a = 13;
	double b = 1.3;
	UserTimer opsClock;
	opsClock.clear();
	while( opsClock.time() < unit ) {
		// long double c;
		ops *= 2;
		i = 0;
		opsClock.start();
		while( ++i < ops ) {
			a *= a;
			b *= b;
		}
		opsClock.stop();
		// random code to prevent optimization of the loop
		if (a<b)
			b=a;
		else
			b = 2*a;
		// c = a+b;

	}
	return ops;
}

void printTimings( double* timings, bool fulltest = false )
{
	if (fulltest){ std::cout
		<< std::setw(11) << timings[0] << ' '
		<< std::setw(11) << timings[1] << ' '
		<< std::setw(11) << timings[2] << ' '
		<< std::setw(11) << timings[3] << ' '
		<< std::setw(11) << timings[4] << ' '
		<< std::setw(11) << timings[5] << ' '
		;} std::cout
		<< std::setw(11) << timings[6] << ' '
		<< std::setw(11) << timings[7] << ' '
		; if (fulltest){ std::cout
			<< std::setw(11) << timings[8] << ' '
			;} std::cout
			<< std::setw(11) << timings[6]/(1/(1/timings[0] + 1/timings[3])); // axpy/(mul+add) ratio
}

template <class Field>
void doTest(const char* name, integer& p, integer& exp, int64_t& iter, bool fulltest = false)
{
	if( FieldTraits<Field>::goodModulus( p ) &&
	    FieldTraits<Field>::goodExponent( exp ) ) {
		static double mops[11];
		Field fld( p, exp );
		fieldTest( fld, mops, iter, fulltest);
		// print name
		std::cout << std::setw(20) << name;
		printTimings( mops, fulltest);
		std::cout << std::endl;
	}
	else {
		std::cout << std::setw(20) << name << ": " << p << "^" << exp << " is out of range" << std::endl;
	}
}

int main(int argc, char** argv)
{
	int64_t ops = getOps(1);
	std::cout << "timings recorded in mops.  Bigger is better." << std::endl;
	std::cout << "Ops per sec, roughly: " << ops << std::endl;
	//int64_t iterations = ops/16;
	int64_t iterations = ops;
	integer prime(101), exp(1);
	if( argc >= 2 ) prime = integer( argv[1] );
	if( argc >= 3 ) exp = integer( argv[2] );
	//bool fulltest = true;
	bool fulltest = false;
	if( argc > 3 ) fulltest = ( argv[3][0] == 1 );
	if( argc > 4 ) exit(1);

	std::cout << std::setw(20) << "Field Name";
	if (fulltest) {
		std::cout
		<< std::setw(12) << "add "
		<< std::setw(12) << "sub "
		<< std::setw(12) << "neg "
		<< std::setw(12) << "mul "
		<< std::setw(12) << "inv "
		<< std::setw(12) << "div "
		;
	}
	std::cout
	<< std::setw(12) << "axpy"
	<< std::setw(12) << "dot d*d "
	;
	if (fulltest) {
		std::cout
		<< std::setw(12) << "dot d*s "
		;
	}
	std::cout
	<< std::setw(12) << "axpy/(mul+add)"
	<< std::endl;

	doTest< Givaro::Modular<int8_t> >( "Givaro::Modular<int8_t>", prime, exp, iterations, fulltest );
	doTest< Givaro::Modular<int16_t> >( "Givaro::Modular<int16_t>", prime, exp, iterations, fulltest );
	doTest< Givaro::Modular<int32_t> >( "Givaro::Modular<int32_t>", prime, exp, iterations, fulltest );
	//doTest< Givaro::Modular<int> >( "Givaro::Modular<int>", prime, exp, iterations, fulltest );
	doTest< Givaro::Modular<double> >( "Givaro::Modular<double>", prime, exp, iterations, fulltest );
	doTest< Givaro::Modular<float> >( "Givaro::Modular<float>", prime, exp, iterations, fulltest );

	//doTest< Givaro::ModularBalanced<int8_t> >( "Givaro::ModularBalanced<int8_t>", prime, exp, iterations, fulltest );
	//doTest< Givaro::ModularBalanced<int16_t> >( "Givaro::ModularBalanced<int16_t>", prime, exp, iterations, fulltest );
	doTest< Givaro::ModularBalanced<int32_t> >( "Givaro::ModularBalanced<int32_t>", prime, exp, iterations, fulltest );
	doTest< Givaro::ModularBalanced<double> >( "Givaro::ModularBalanced<double>", prime, exp, iterations, fulltest );
	doTest< Givaro::ModularBalanced<float> >( "Givaro::ModularBalanced<float>", prime, exp, iterations, fulltest );

	doTest< ModularCrooked<double> >( "ModularCrooked<double>", prime, exp, iterations, fulltest );

#ifdef __LINBOX_HAVE_NTL
	doTest< NTL_zz_p >( "NTL_zz_p", prime, exp, iterations, fulltest );
	doTest< NTL_PID_zz_p >( "NTL_PID_zz_p", prime, exp, iterations, fulltest );
	doTest< NTL_ZZ_p >( "NTL_ZZ_p", prime, exp, iterations, fulltest );
	doTest< PIR_ntl_ZZ_p >( "PIR_ntl_ZZ_p", prime, exp, iterations, fulltest );
	doTest< NTL_ZZ >( "NTL_ZZ", prime, exp, iterations, fulltest );
#endif
#ifdef __LINBOX_HAVE_LIDIA
	doTest< LidiaGfq >( "LidiaGfq", prime, exp, iterations, fulltest );
#endif
	//	doTest< GF2 >( "GF2", prime, exp, iterations, fulltest );
	doTest< GMPRationalField >( "GMPRationalField", prime, exp, iterations, fulltest );
	//if (prime == 2)
	doTest< PIRModular<int32_t> >( "PIRModular<int32_t>", prime, exp, iterations, fulltest );
	doTest< Local2_32 >( "Local2_32", prime, exp, iterations, fulltest );

	return 0;
}

// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,:0,t0,+0,=s
// Local Variables:
// mode: C++
// tab-width: 8
// indent-tabs-mode: nil
// c-basic-offset: 8
// End:

