/*
 * Sample program to test the effect of PAUSE/YIELD instruction in a highly
 * contended scenario.  The Intel and ARM docs recommend the use of PAUSE and
 * YIELD respectively, in spinlock tight loops.
 *
 * This program can be run with :
 * gcc -O3 -o spin spin.c -lrt ; ./spin [number_of_processes]
 * By default, 4 processes are spawned.
 *
 * Child processes wait in a tight loop for a shared variable to become 0,
 * while the parent process continuously increments a sequence number for a
 * fixed amount of time, after which, it sets the shared variable to 0. The
 * child tight loop calls YIELD/PAUSE in each iteration.
 *
 * The intention is to create a number of processes much larger than the
 * available CPUs, so that the scheduler hopefully pre-empts the processes
 * because of the PAUSE, and the main process gets more CPU share because of
 * which it will increment its sequence number more number of times. So the
 * expectation is that with PAUSE, the program will end up with a much higher
 * sequence number than without PAUSE. Similarly, the child processes should
 * have lesser CPU cycles with PAUSE than without PAUSE.
 *
 * Author: Amit Khandekar
 */

#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>        /* For mode constants */
#include <fcntl.h>
#include <float.h>
#include <time.h>
#include <signal.h>

#include <unistd.h>
#include <sys/types.h>

#define SIZE sizeof(int)
#define SHM_NAME "/shm"
#define RUN_DURATION 15

volatile char timer_exceeded = 0;

typedef void     (*sigfunc_type)(int);

static void pqsignal(int signo, sigfunc_type func);
static void handle_sig_alarm(int dummy);

static __inline__ void
spin_delay(void)
{
    /*
     * Adding a PAUSE in the spin delay loop may help slow down the tight loop
     */
    __asm__ __volatile__(
        " pause          \n");
}

int main(int argn, char *argv[])
{
	int	i, fd;
	int nprocs = 4;
	int childpid = 0;
	volatile void *shared_address;

	if (argn > 1)
		sscanf(argv[1], "%d", &nprocs);

	if ((fd = shm_open(SHM_NAME, O_RDWR | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO)) == -1)
	{
		perror("Could not create shared memory");
		return -1;
	}

	if (ftruncate(fd, SIZE) < 0)
	{
		perror("ftruncate failed");
		return -1;
	}

	shared_address = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
	if (shared_address == MAP_FAILED)
	{
		if (shm_unlink(SHM_NAME) != 0)
		{
			perror("could not destroy shared memory");
			return -1;
		}
	}
	close(fd);

	/* This will cause children to keep on spinning until it is set back to 0 */
	*(int *) shared_address = 1;

	/* Spawn children */
	for (i = 0; i < nprocs; i++)
	{
		if (fork() == 0)
		{
			childpid = getpid();
			break;
		}
	}

	if (childpid == 0) /* Am I a parent ? */
	{
		double dbl = -10000000; /* Some random initial value */

		/* For RUN_DURATION seconds, let me keep incrementing the double value */
    	pqsignal(SIGALRM, handle_sig_alarm);
    	alarm(RUN_DURATION);
		while (!timer_exceeded)
		{
			dbl += 1;
		}
		printf("Final sequence number: %g\n", dbl);

		/* Unblock the children  */
		*(int*) shared_address = 0;

		if (shm_unlink(SHM_NAME) != 0)
		{
			perror("could not destroy shared memory");
			return -1;
		}
	}
	else /* I am a child */
	{
		clock_t cpu_time = clock();
		int num ;
		volatile int *add = (int*) shared_address;

		/* Keep on spinning with delay, until parent unblocks me */
		do
		{
			spin_delay();
			num = *add;
		}
		while (num == 1);

		printf("pid: %d; cpu cycles by me: %ld\n",
			   childpid, (long) (clock() - cpu_time));

		/* We have come out of loop, that means parent set *shared_address to 0 */
	}

	return 0;
}

static void
pqsignal(int signo, sigfunc_type func)
{
    struct sigaction act, oact;

    act.sa_handler = func;
    sigemptyset(&act.sa_mask);
    act.sa_flags = SA_RESTART;
    if (sigaction(signo, &act, &oact) < 0)
	{
		perror("sigaction returned error");
        exit(-1);
    }
}

static void
handle_sig_alarm(int dummy)
{
    timer_exceeded = 1;
//	printf("alarm went\n");
}

