/*
 * skel.cpp. skeleton for rdtsc benchmarks
 *
 * Copyright (C) 1999, 2001 by Manfred Spraul.
 *	All rights reserved except the rights granted by the GPL.
 *
 * Redistribution of this file is permitted under the terms of the GNU 
 * General Public License (GPL) version 2 or later.
 * $Header: /pub/home/manfred/cvs-tree/timetest/rep_nop.cpp,v 1.1 2001/04/07 19:38:33 manfred Exp $
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>

// disable local interrupts during benchmark
#undef USE_CLI

// define a cache flushing function
#undef CACHE_FLUSH

#ifdef USE_CLI
#include <sys/io.h>
#define CLI	"cli\n\t"
#define STI	"sti\n\t"
#else
#define CLI
#define STI
#define iopl(a)	do { } while(0)
#endif

// Intel recommends that a serializing instruction
// should be called before and after rdtsc.
// CPUID is a serializing instruction.
// ".align 128:" P 4 L2 cache line size
#define read_rdtsc_before(time)		\
	__asm__ __volatile__(		\
		".align 128\n\t"	\
		"xor %%eax,%%eax\n\t"	\
		CLI			\
		"cpuid\n\t"		\
		"rdtsc\n\t"		\
		"mov %%eax,(%0)\n\t"	\
		"mov %%edx,4(%0)\n\t"	\
		"xor %%eax,%%eax\n\t"	\
		"cpuid\n\t"		\
		: /* no output */	\
		: "S"(&time)		\
		: "eax", "ebx", "ecx", "edx", "memory")

#define read_rdtsc_after(time)		\
	__asm__ __volatile__(		\
		"xor %%eax,%%eax\n\t"	\
		"cpuid\n\t"		\
		"rdtsc\n\t"		\
		"mov %%eax,(%0)\n\t"	\
		"mov %%edx,4(%0)\n\t"	\
		"xor %%eax,%%eax\n\t"	\
		"cpuid\n\t"		\
		STI			\
		: /* no output */	\
		: "S"(&time)		\
		: "eax", "ebx", "ecx", "edx", "memory")

#define BUILD_TESTFNC(name, text, instructions) \
void name##_dummy(void)				\
{						\
	__asm__ __volatile__(			\
		".align 4096\n\t"		\
		"xor %%eax, %%eax\n\t"		\
		: : : "eax");			\
}						\
static unsigned long name##_best = 1024*1024*1024; \
\
static void name(void) \
{ \
	unsigned long long time; \
	unsigned long long time2; \
 \
	read_rdtsc_before(time); \
	instructions; \
	read_rdtsc_after(time2); \
	if(time2-time < name##_best) { \
		printf( text ":\t%10Ld ticks; \n", \
			time2-time-zerotest_best); \
		name##_best = time2-time; \
	} \
}

void filler(void)
{
static int i = 3;
static int j;
	j = i*i;
}

#define DO_3(x) \
	do { x; x; x; } while(0)

#define DO_10(x) \
	do { x; x; x; x; x; x; x; x; x; x;} while(0)

#define DO_50(x) \
	do { DO_10(x); DO_10(x);DO_10(x); DO_10(x);DO_10(x);} while(0)


#define DO_T(y) do { \
	DO_3(filler()); \
	y; \
	DO_3(filler());} while(0)

#ifdef CACHE_FLUSH
#define DRAIN_SZ	(4*1024*1024)
int other[3*DRAIN_SZ] __attribute ((aligned (4096)));
static inline void drain_cache(void)
{
	int i;
	for(i=0;i<DRAIN_SZ;i++) other[DRAIN_SZ+i]=0;
	for(i=0;i<DRAIN_SZ;i++) if(other[DRAIN_SZ+i]!=0) break;
}
#else
static inline void drain_cache(void)
{
}
#endif

#define DO_TEST(x) \
	do { \
		int i; \
		for(i=0;i<5000;i++) \
			x; \
	} while(0)

//////////////////////////////////////////////////////////////////////////////

#define REP_NOP()	__asm__ __volatile__ ("rep;nop\n\t": : : "memory");
#define NOP()		__asm__ __volatile__ ("nop\n\t": : : "memory");

BUILD_TESTFNC(zerotest,"zerotest", DO_T((void)0));
BUILD_TESTFNC(rnop,"rep nop", DO_T(REP_NOP()));
BUILD_TESTFNC(nop,"nop", DO_T(NOP()));

//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
int main()
{
	if(geteuid() == 0) {
		int res = nice(-20);
		if(res < 0) {
			perror("nice(-20)");
			return 1;
		}
		printf("MOVETEST, reniced to (-20).\n");	
	} else
	{
		printf("MOVETEST called by non-superuser, running with normal priority.\n");
	}
	for(;;) {
		DO_TEST(zerotest());
		DO_TEST(rnop());
		DO_TEST(nop());
	}
	return 0;
}
