|
Packit |
0848f5 |
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
|
Packit |
0848f5 |
/*
|
|
Packit |
0848f5 |
* (C) 2006 by Argonne National Laboratory.
|
|
Packit |
0848f5 |
* See COPYRIGHT in top-level directory.
|
|
Packit |
0848f5 |
*/
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* This program provides a simple test of send-receive performance between
|
|
Packit |
0848f5 |
two (or more) processes. This sometimes called head-to-head or
|
|
Packit |
0848f5 |
ping-ping test, as both processes send at the same time.
|
|
Packit |
0848f5 |
*/
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
#include "mpi.h"
|
|
Packit |
0848f5 |
#include <stdio.h>
|
|
Packit |
0848f5 |
#include <stdlib.h>
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
#define MAXTESTS 32
|
|
Packit |
0848f5 |
#define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
static int verbose = 0;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
int main(int argc, char *argv[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int wsize, wrank, partner, len, maxlen, k, reps, repsleft;
|
|
Packit |
0848f5 |
double t1;
|
|
Packit |
0848f5 |
MPI_Request rreq;
|
|
Packit |
0848f5 |
char *rbuf, *sbuf;
|
|
Packit |
0848f5 |
double times[3][MAXTESTS];
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Init(&argc, &argv);
|
|
Packit |
0848f5 |
if (getenv("MPITEST_VERBOSE"))
|
|
Packit |
0848f5 |
verbose = 1;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Comm_size(MPI_COMM_WORLD, &wsize);
|
|
Packit |
0848f5 |
MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if (wsize < 2) {
|
|
Packit |
0848f5 |
fprintf(stderr, "This program requires at least 2 processes\n");
|
|
Packit |
0848f5 |
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
/* Set partner based on whether rank is odd or even */
|
|
Packit |
0848f5 |
if (wrank & 0x1) {
|
|
Packit |
0848f5 |
partner = wrank - 1;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else if (wrank < wsize - 1) {
|
|
Packit |
0848f5 |
partner = wrank + 1;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else
|
|
Packit |
0848f5 |
/* Handle wsize odd */
|
|
Packit |
0848f5 |
partner = MPI_PROC_NULL;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Allocate and initialize buffers */
|
|
Packit |
0848f5 |
maxlen = 1024 * 1024;
|
|
Packit |
0848f5 |
rbuf = (char *) malloc(maxlen);
|
|
Packit |
0848f5 |
sbuf = (char *) malloc(maxlen);
|
|
Packit |
0848f5 |
if (!rbuf || !sbuf) {
|
|
Packit |
0848f5 |
fprintf(stderr, "Could not allocate %d byte buffers\n", maxlen);
|
|
Packit |
0848f5 |
MPI_Abort(MPI_COMM_WORLD, 2);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
for (k = 0; k < maxlen; k++) {
|
|
Packit |
0848f5 |
rbuf[k] = 0;
|
|
Packit |
0848f5 |
sbuf[k] = 0;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Test Irecv and send, head to head */
|
|
Packit |
0848f5 |
if (wrank == 0 && verbose) {
|
|
Packit |
0848f5 |
printf("Irecv-send\n");
|
|
Packit |
0848f5 |
printf("len\ttime \trate\n");
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Send powers of 2 bytes */
|
|
Packit |
0848f5 |
len = 1;
|
|
Packit |
0848f5 |
for (k = 0; k < 20; k++) {
|
|
Packit |
0848f5 |
/* We use a simple linear form for the number of tests to
|
|
Packit |
0848f5 |
* reduce the impact of the granularity of the timer */
|
|
Packit |
0848f5 |
reps = 50 - k;
|
|
Packit |
0848f5 |
repsleft = reps;
|
|
Packit |
0848f5 |
/* Make sure that both processes are ready to start */
|
|
Packit |
0848f5 |
MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
|
|
Packit |
0848f5 |
MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
Packit |
0848f5 |
t1 = MPI_Wtime();
|
|
Packit |
0848f5 |
while (repsleft--) {
|
|
Packit |
0848f5 |
MPI_Irecv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq);
|
|
Packit |
0848f5 |
MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Wait(&rreq, MPI_STATUS_IGNORE);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
t1 = MPI_Wtime() - t1;
|
|
Packit |
0848f5 |
times[0][k] = t1 / reps;
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
t1 = t1 / reps;
|
|
Packit |
0848f5 |
if (t1 > 0) {
|
|
Packit |
0848f5 |
double rate;
|
|
Packit |
0848f5 |
rate = (len / t1) / 1.e6;
|
|
Packit |
0848f5 |
t1 = t1 * 1.e6;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("%d\t%g\t%g\n", len, t1, len / t1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else {
|
|
Packit |
0848f5 |
t1 = t1 * 1.e6;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("%d\t%g\tINF\n", len, t1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
fflush(stdout);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
len *= 2;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Test Sendrecv, head to head */
|
|
Packit |
0848f5 |
if (wrank == 0 && verbose) {
|
|
Packit |
0848f5 |
printf("Sendrecv\n");
|
|
Packit |
0848f5 |
printf("len\ttime (usec)\trate (MB/s)\n");
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Send powers of 2 bytes */
|
|
Packit |
0848f5 |
len = 1;
|
|
Packit |
0848f5 |
for (k = 0; k < 20; k++) {
|
|
Packit |
0848f5 |
/* We use a simple linear form for the number of tests to
|
|
Packit |
0848f5 |
* reduce the impact of the granularity of the timer */
|
|
Packit |
0848f5 |
reps = 50 - k;
|
|
Packit |
0848f5 |
repsleft = reps;
|
|
Packit |
0848f5 |
/* Make sure that both processes are ready to start */
|
|
Packit |
0848f5 |
MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
|
|
Packit |
0848f5 |
MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
Packit |
0848f5 |
t1 = MPI_Wtime();
|
|
Packit |
0848f5 |
while (repsleft--) {
|
|
Packit |
0848f5 |
MPI_Sendrecv(sbuf, len, MPI_BYTE, partner, k,
|
|
Packit |
0848f5 |
rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
t1 = MPI_Wtime() - t1;
|
|
Packit |
0848f5 |
times[1][k] = t1 / reps;
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
t1 = t1 / reps;
|
|
Packit |
0848f5 |
if (t1 > 0) {
|
|
Packit |
0848f5 |
double rate;
|
|
Packit |
0848f5 |
rate = (len / t1) / 1.e6;
|
|
Packit |
0848f5 |
t1 = t1 * 1.e6;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("%d\t%g\t%g\n", len, t1, len / t1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else {
|
|
Packit |
0848f5 |
t1 = t1 * 1.e6;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("%d\t%g\tINF\n", len, t1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
fflush(stdout);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
len *= 2;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Test Send/recv, ping-pong */
|
|
Packit |
0848f5 |
if (wrank == 0 && verbose) {
|
|
Packit |
0848f5 |
printf("Pingpong\n");
|
|
Packit |
0848f5 |
printf("len\ttime (usec)\trate (MB/s)\n");
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Send powers of 2 bytes */
|
|
Packit |
0848f5 |
len = 1;
|
|
Packit |
0848f5 |
for (k = 0; k < 20; k++) {
|
|
Packit |
0848f5 |
/* We use a simple linear form for the number of tests to
|
|
Packit |
0848f5 |
* reduce the impact of the granularity of the timer */
|
|
Packit |
0848f5 |
reps = 50 - k;
|
|
Packit |
0848f5 |
repsleft = reps;
|
|
Packit |
0848f5 |
/* Make sure that both processes are ready to start */
|
|
Packit |
0848f5 |
MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
|
|
Packit |
0848f5 |
MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
Packit |
0848f5 |
t1 = MPI_Wtime();
|
|
Packit |
0848f5 |
while (repsleft--) {
|
|
Packit |
0848f5 |
if (wrank & 0x1) {
|
|
Packit |
0848f5 |
MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else {
|
|
Packit |
0848f5 |
MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
|
Packit |
0848f5 |
MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
t1 = MPI_Wtime() - t1;
|
|
Packit |
0848f5 |
times[2][k] = t1 / reps;
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
t1 = t1 / reps;
|
|
Packit |
0848f5 |
if (t1 > 0) {
|
|
Packit |
0848f5 |
double rate;
|
|
Packit |
0848f5 |
rate = (len / t1) / 1.e6;
|
|
Packit |
0848f5 |
t1 = t1 * 1.e6;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("%d\t%g\t%g\n", len, t1, len / t1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else {
|
|
Packit |
0848f5 |
t1 = t1 * 1.e6;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("%d\t%g\tINF\n", len, t1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
fflush(stdout);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
len *= 2;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* At this point, we could optionally analyze the results and report
|
|
Packit |
0848f5 |
* success or failure based on some criteria, such as near monotone
|
|
Packit |
0848f5 |
* increases in bandwidth. This test was created because of a
|
|
Packit |
0848f5 |
* fall-off in performance noted in the ch3:sock device:channel */
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
int nPerfErrors = 0;
|
|
Packit |
0848f5 |
len = 1;
|
|
Packit |
0848f5 |
for (k = 0; k < 20; k++) {
|
|
Packit |
0848f5 |
double T0, T1, T2;
|
|
Packit |
0848f5 |
T0 = times[0][k] * 1.e6;
|
|
Packit |
0848f5 |
T1 = times[1][k] * 1.e6;
|
|
Packit |
0848f5 |
T2 = times[2][k] * 1.e6;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2);
|
|
Packit |
0848f5 |
/* Lets look at long messages only */
|
|
Packit |
0848f5 |
if (k > 10) {
|
|
Packit |
0848f5 |
double T0Old, T1Old, T2Old;
|
|
Packit |
0848f5 |
T0Old = times[0][k - 1] * 1.0e6;
|
|
Packit |
0848f5 |
T1Old = times[1][k - 1] * 1.0e6;
|
|
Packit |
0848f5 |
T2Old = times[2][k - 1] * 1.0e6;
|
|
Packit |
0848f5 |
if (T0 > (2 + ERROR_MARGIN) * T0Old) {
|
|
Packit |
0848f5 |
nPerfErrors++;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (T1 > (2 + ERROR_MARGIN) * T1Old) {
|
|
Packit |
0848f5 |
nPerfErrors++;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (T2 > (2 + ERROR_MARGIN) * T2Old) {
|
|
Packit |
0848f5 |
nPerfErrors++;
|
|
Packit |
0848f5 |
if (verbose)
|
|
Packit |
0848f5 |
printf("Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
len *= 2;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (nPerfErrors > 8) {
|
|
Packit |
0848f5 |
/* Allow for 1-2 errors for eager-rendezvous shifting
|
|
Packit |
0848f5 |
* point and cache effects. There should be a better way
|
|
Packit |
0848f5 |
* of doing this. */
|
|
Packit |
0848f5 |
printf(" Found %d performance errors\n", nPerfErrors);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else {
|
|
Packit |
0848f5 |
printf(" No Errors\n");
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
fflush(stdout);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
free(sbuf);
|
|
Packit |
0848f5 |
free(rbuf);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Finalize();
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
return 0;
|
|
Packit |
0848f5 |
}
|