/*
 * lat_ctx.c - context switch timer 
 *
 * usage: lat_ctx [-s size] #procs [#procs....]
 *
 * Copyright (c) 1994 Larry McVoy.  Distributed under the FSF GPL with
 * additional restriction that results may published only if
 * (1) the benchmark is unmodified, and
 * (2) the version in the sccsid below is included in the report.
 * Support for this development by Sun Microsystems is gratefully acknowledged.
 */
char	*id = "$Id: lat_ctx.c,v 1.3 1995/10/26 04:03:09 lm Exp lm $\n";

#include "timing.h"
#include "bench.h"
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>

#define	CTX_TIME	2000000
#define	MAXPIPE	100

int	process_size;
char	*data;
int	pids[MAXPIPE];
int	p[MAXPIPE][2];
int	sumit();
double	ctx(double overhead, int procs);
double	pipe_cost(int procs);
void	doit(), killem();

int
main(ac, av)
	int	ac;
	char	**av;
{
	int	i;
	int	tries;

	if (ac < 2) {
usage:		printf("Usage: %s [-s kbytes] processes [processes ...]\n",
		    av[0]);
		exit(1);
	}

	/*
	 * If they specified a context size, get it.
	 */
	if (!strcmp(av[1], "-s")) {
		int	fd;

		if (ac < 3) {
			goto usage;
		}
		process_size = atoi(av[2]) * 1024;
		if (process_size != 0) {
#ifdef USE_MMAP
#define	D "/tmp/foo"
			fd = open(D, O_RDWR);
			data = mmap(0, process_size,
			    PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
			if ((int)data == -1) {
				perror("mmap");
				goto usage;
			}
			close(fd);
#else
			data = malloc(process_size);
			if (!data) {
				perror("malloc");
				exit(1);
			}
#endif
			bzero(data, process_size);
		}
		ac -= 2;
		av += 2;
	}


	for (i = 1; i < ac; ++i) {
		int	p = atoi(av[i]);
		double	overhead, result;

		overhead = pipe_cost(p);
		result = ctx(overhead, p);
		fprintf(stderr,
		    "Context switch of %d %dk procs: %.2f (%.2f overhead)\n",
		    p, process_size/1024, result, overhead);
	}
	return (0);
}


double
ctx(double overhead, int procs)
{
	int	msg = 0, i;
	int	sum;
	int	N;
	uint64	usecs;


	/*
	 * Use the pipes as a ring, and fork off a bunch of processes
	 * to pass the byte through their part of the ring.
	 */
	signal(SIGTERM, SIG_IGN);
     	for (i = 1; i < procs; ++i) {
		switch (pids[i] = fork()) {
		    case -1: 
			perror("fork");
			killem(procs);

		    case 0:	/* child */
			doit(p, i-1, i);
			/* NOTREACHED */

		    default:	/* parent */
		    	;
	    	}
	}

	if (process_size) {
		bzero(data, process_size);
	}
	/*
	 * Go once around the loop to make sure that everyone is ready and
	 * to get the token in the pipeline.
	 */
	if (write(p[0][1], &msg, sizeof(msg)) != sizeof(msg) ||
	    read(p[procs-1][0], &msg, sizeof(msg)) != sizeof(msg) ||
	    write(p[0][1], &msg, sizeof(msg)) != sizeof(msg)) {
		perror("write/read/write on pipe");
		exit(1);
	}

	LOOP_FIRST(N, usecs, CTX_TIME);
	if (read(p[procs-1][0], &msg, sizeof(msg)) != sizeof(msg)) {
		perror("read/write on pipe");
		exit(1);
	}
	sum = sumit();
	if (write(p[0][1], &msg, sizeof(msg)) != sizeof(msg)) {
		perror("read/write on pipe");
		exit(1);
	}
	LOOP_LAST(N, usecs, CTX_TIME);

	/*
	 * Close the pipes and kill the children.
	 */
     	killem(procs);
     	for (i = 0; i < procs; ++i) {
		close(p[i][0]);
		close(p[i][1]);
		if (i > 0) {
			wait(0);
		}
	}

	use_result(sum);

	/*
	 * We know the overhead cost of each pipe trip, but we did it
	 * write times.
	 */
	return (((double)usecs / (N * procs)) - overhead);
}

void
killem(procs)
	int	procs;
{
	int	i;

	for (i = 1; i < procs; ++i) {
		if (pids[i] > 0) {
			kill(pids[i], SIGTERM);
		}
	}
}

void
doit(p, rd, wr)
	int	p[MAXPIPE][2];
	int	rd, wr;
{
	int	msg, sum;

	signal(SIGTERM, SIG_DFL);
	if (process_size) {
		bzero(data, process_size);
	}
	for ( ;; ) {
		if (read(p[rd][0], &msg, sizeof(msg)) != sizeof(msg)) {
			perror("read/write on pipe");
			break;
		}
		sum = sumit();
		if (write(p[wr][1], &msg, sizeof(msg)) != sizeof(msg)) {
			perror("read/write on pipe");
			break;
		}
	}
	use_result(sum);
	exit(1);
}

/*
 * Calculate the cost of passing a byte through a pipe.  I do it with a
 * bunch of pipes to try and burn through the onboard caches.  Note that
 * on a Sun SPARC ss2, it made little difference if the loop was over one
 * or thirty pipes.
 */
double
pipe_cost(int procs)
{
	int	msg = 0, sum, i, k = 0, N;
	uint64	usecs;

	/*
	 * Get a bunch of pipes.
	 */
     	for (i = 0; i < procs; ++i) {
		if (pipe(p[i]) == -1) {
			perror("pipe");
			exit(1);
		}
	}

	/*
	 * Measure the overhead of passing a byte around the ring.
	 */
	if (write(p[k = 0][1], &msg, sizeof(msg)) != sizeof(msg)) {
		perror("read/write on pipe");
		exit(1);
	}
	LOOP_FIRST(N, usecs, CTX_TIME)
	if (read(p[k][0], &msg, sizeof(msg)) != sizeof(msg)) {
		perror("read/write on pipe");
		exit(1);
	}
	if (++k == procs) {
		k = 0;
	}
	if (write(p[k][1], &msg, sizeof(msg)) != sizeof(msg)) {
		perror("read/write on pipe");
		exit(1);
	}
	LOOP_LAST(N, usecs, CTX_TIME);

	use_result(sum);
	return ((double)usecs / N);
}

int
sumit()
{
	int	i, sum = 0;
	int	*d = (int*)data;

#define	TEN	sum+=d[0]+d[1]+d[2]+d[3]+d[4]+d[5]+d[6]+d[7]+d[8]+d[9];d+=10;
#define	FIFTY	TEN TEN TEN TEN TEN
#define	HUNDRED	FIFTY FIFTY
#define	HALFK	HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED TEN sum+=*d++;sum+=*d++;

	for (i = process_size/sizeof(int); i > 512; i -= 512) {
		HALFK
	}
	return (sum);
}

#if 0
#define	SIZE	1<<20
caches()
{
        int     i, sum = 0;
        char    *d = (char *)malloc(SIZE);
        char    *save = d;

        bzero(d, SIZE);

#define TEN     sum+=d[0]+d[1]+d[2]+d[3]+d[4]+d[5]+d[6]+d[7]+d[8]+d[9];d+=10;
#define FIFTY   TEN TEN TEN TEN TEN
#define HUNDRED FIFTY FIFTY
#define HALFK   HUNDRED HUNDRED HUNDRED HUNDRED HUNDRED TEN sum+=*d++;sum+=*d++;
#define KILO    HALFK HALFK

        for (i = SIZE; i > 8192; i -= 8192) {
                KILO KILO KILO KILO
                KILO KILO KILO KILO
        }
        free(save);
        return (sum);
}
#endif
