/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ /* * (C) 2006 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ /* This program provides a simple test of send-receive performance between two (or more) processes. This sometimes called head-to-head or ping-ping test, as both processes send at the same time. */ #include "mpi.h" #include #include #include "mpitest.h" #define MAXTESTS 32 #define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */ static int verbose = 0; int main(int argc, char *argv[]) { int wsize, wrank, partner, len, maxlen, k, reps, repsleft; double t1; MPI_Request rreq; char *rbuf, *sbuf; double times[3][MAXTESTS]; MTest_Init(&argc, &argv); if (getenv("MPITEST_VERBOSE")) verbose = 1; MPI_Comm_size(MPI_COMM_WORLD, &wsize); MPI_Comm_rank(MPI_COMM_WORLD, &wrank); if (wsize < 2) { fprintf(stderr, "This program requires at least 2 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* Set partner based on whether rank is odd or even */ if (wrank & 0x1) { partner = wrank - 1; } else if (wrank < wsize - 1) { partner = wrank + 1; } else /* Handle wsize odd */ partner = MPI_PROC_NULL; /* Allocate and initialize buffers */ maxlen = 1024 * 1024; rbuf = (char *) malloc(maxlen); sbuf = (char *) malloc(maxlen); if (!rbuf || !sbuf) { fprintf(stderr, "Could not allocate %d byte buffers\n", maxlen); MPI_Abort(MPI_COMM_WORLD, 2); } for (k = 0; k < maxlen; k++) { rbuf[k] = 0; sbuf[k] = 0; } MPI_Barrier(MPI_COMM_WORLD); /* Test Irecv and send, head to head */ if (wrank == 0 && verbose) { printf("Irecv-send\n"); printf("len\ttime \trate\n"); } /* Send powers of 2 bytes */ len = 1; for (k = 0; k < 20; k++) { /* We use a simple linear form for the number of tests to * reduce the impact of the granularity of the timer */ reps = 50 - k; repsleft = reps; /* Make sure that both processes are ready to start */ MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); t1 = MPI_Wtime(); while (repsleft--) { MPI_Irecv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq); MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD); MPI_Wait(&rreq, MPI_STATUS_IGNORE); } t1 = MPI_Wtime() - t1; times[0][k] = t1 / reps; if (wrank == 0) { t1 = t1 / reps; if (t1 > 0) { double rate; rate = (len / t1) / 1.e6; t1 = t1 * 1.e6; if (verbose) printf("%d\t%g\t%g\n", len, t1, len / t1); } else { t1 = t1 * 1.e6; if (verbose) printf("%d\t%g\tINF\n", len, t1); } if (verbose) fflush(stdout); } len *= 2; } MPI_Barrier(MPI_COMM_WORLD); /* Test Sendrecv, head to head */ if (wrank == 0 && verbose) { printf("Sendrecv\n"); printf("len\ttime (usec)\trate (MB/s)\n"); } /* Send powers of 2 bytes */ len = 1; for (k = 0; k < 20; k++) { /* We use a simple linear form for the number of tests to * reduce the impact of the granularity of the timer */ reps = 50 - k; repsleft = reps; /* Make sure that both processes are ready to start */ MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); t1 = MPI_Wtime(); while (repsleft--) { MPI_Sendrecv(sbuf, len, MPI_BYTE, partner, k, rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } t1 = MPI_Wtime() - t1; times[1][k] = t1 / reps; if (wrank == 0) { t1 = t1 / reps; if (t1 > 0) { double rate; rate = (len / t1) / 1.e6; t1 = t1 * 1.e6; if (verbose) printf("%d\t%g\t%g\n", len, t1, len / t1); } else { t1 = t1 * 1.e6; if (verbose) printf("%d\t%g\tINF\n", len, t1); } if (verbose) fflush(stdout); } len *= 2; } MPI_Barrier(MPI_COMM_WORLD); /* Test Send/recv, ping-pong */ if (wrank == 0 && verbose) { printf("Pingpong\n"); printf("len\ttime (usec)\trate (MB/s)\n"); } /* Send powers of 2 bytes */ len = 1; for (k = 0; k < 20; k++) { /* We use a simple linear form for the number of tests to * reduce the impact of the granularity of the timer */ reps = 50 - k; repsleft = reps; /* Make sure that both processes are ready to start */ MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); t1 = MPI_Wtime(); while (repsleft--) { if (wrank & 0x1) { MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD); MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else { MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD); } } t1 = MPI_Wtime() - t1; times[2][k] = t1 / reps; if (wrank == 0) { t1 = t1 / reps; if (t1 > 0) { double rate; rate = (len / t1) / 1.e6; t1 = t1 * 1.e6; if (verbose) printf("%d\t%g\t%g\n", len, t1, len / t1); } else { t1 = t1 * 1.e6; if (verbose) printf("%d\t%g\tINF\n", len, t1); } if (verbose) fflush(stdout); } len *= 2; } /* At this point, we could optionally analyze the results and report * success or failure based on some criteria, such as near monotone * increases in bandwidth. This test was created because of a * fall-off in performance noted in the ch3:sock device:channel */ int nPerfErrors = 0; if (wrank == 0) { len = 1; for (k = 0; k < 20; k++) { double T0, T1, T2; T0 = times[0][k] * 1.e6; T1 = times[1][k] * 1.e6; T2 = times[2][k] * 1.e6; if (verbose) printf("%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2); /* Lets look at long messages only */ if (k > 10) { double T0Old, T1Old, T2Old; T0Old = times[0][k - 1] * 1.0e6; T1Old = times[1][k - 1] * 1.0e6; T2Old = times[2][k - 1] * 1.0e6; if (T0 > (2 + ERROR_MARGIN) * T0Old) { nPerfErrors++; if (verbose) printf("Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0); } if (T1 > (2 + ERROR_MARGIN) * T1Old) { nPerfErrors++; if (verbose) printf("Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1); } if (T2 > (2 + ERROR_MARGIN) * T2Old) { nPerfErrors++; if (verbose) printf("Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2); } } len *= 2; } fflush(stdout); } free(sbuf); free(rbuf); MTest_Finalize(nPerfErrors); return MTestReturnValue(nPerfErrors); }