/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* (C) 2008 by University of Illinois
* See COPYRIGHT in top-level directory.
*/
/*
* This code is intended to test the trace overhead when using an
* MPI tracing package. To perform the test, follow these steps:
*
* 1) Run with the versbose mode selected to determine the delay argument
* to use in subsequent tests:
* mpiexec -n 4096 allredtrace -v
* Assume that the computed delay count is 6237; that value is used in
* the following.
*
* 2) Run with an explicit delay count, without tracing enabled:
* mpiexec -n 4096 allredtrace -delaycount 6237
*
* 3) Build allredtrace with tracing enabled, then run:
* mpiexec -n 4096 allredtrace -delaycount 6237
*
* Compare the total times. The tracing version should take slightly
* longer but no more than, for example, 15%.
*/
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int verbose = 0;
static int lCount = 0;
void Delay(int);
void SetupDelay(double);
int main(int argc, char *argv[])
{
double usecPerCall = 100;
double t, t1, tsum;
int i, nLoop = 100;
int rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
/* Process arguments. We allow the delay count to be set from the
* command line to ensure reproducibility */
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-delaycount") == 0) {
i++;
lCount = atoi(argv[i]);
} else if (strcmp(argv[i], "-v") == 0) {
verbose = 1;
} else {
fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
exit(1);
}
}
if (lCount == 0) {
SetupDelay(usecPerCall);
}
MPI_Barrier(MPI_COMM_WORLD);
t = MPI_Wtime();
for (i = 0; i < nLoop; i++) {
MPI_Allreduce(&t1, &tsum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
Delay(lCount);
}
t = MPI_Wtime() - t;
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0) {
printf("For delay count %d, time is %e\n", lCount, t);
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
void SetupDelay(double usec)
{
double t, tick;
double sec = 1.0e-6 * usec;
int nLoop, i, direction;
/* Compute the number of times to run the tests to get an accurate
* number given the timer resolution. */
nLoop = 1;
tick = 100 * MPI_Wtick();
do {
nLoop = 2 * nLoop;
t = MPI_Wtime();
for (i = 0; i < nLoop; i++) {
MPI_Wtime();
}
t = MPI_Wtime() - t;
}
while (t < tick && nLoop < 100000);
if (verbose)
printf("nLoop = %d\n", nLoop);
/* Start with an estimated count */
lCount = 128;
direction = 0;
while (1) {
t = MPI_Wtime();
for (i = 0; i < nLoop; i++) {
Delay(lCount);
}
t = MPI_Wtime() - t;
t = t / nLoop;
if (verbose)
printf("lCount = %d, time = %e\n", lCount, t);
if (t > 10 * tick)
nLoop = nLoop / 2;
/* Compare measured delay */
if (t > 2 * sec) {
lCount = lCount / 2;
if (direction == 1)
break;
direction = -1;
} else if (t < sec / 2) {
lCount = lCount * 2;
if (direction == -1)
break;
direction = 1;
} else if (t < sec) {
/* sec/2 <= t < sec , so estimate the lCount to hit sec */
lCount = (sec / t) * lCount;
} else
break;
}
if (verbose)
printf("lCount = %d, t = %e\n", lCount, t);
/* Should coordinate with the other processes - take the max? */
}
volatile double delayCounter = 0;
void Delay(int count)
{
int i;
delayCounter = 0.0;
for (i = 0; i < count; i++) {
delayCounter += 2.73;
}
}