Blame test/mpi/perf/sendrecvl.c

Packit 0848f5
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
Packit 0848f5
/*
Packit 0848f5
 *  (C) 2006 by Argonne National Laboratory.
Packit 0848f5
 *      See COPYRIGHT in top-level directory.
Packit 0848f5
 */
Packit 0848f5
Packit 0848f5
/* This program provides a simple test of send-receive performance between
Packit 0848f5
   two (or more) processes.  This sometimes called head-to-head or
Packit 0848f5
   ping-ping test, as both processes send at the same time.
Packit 0848f5
*/
Packit 0848f5
Packit 0848f5
#include "mpi.h"
Packit 0848f5
#include <stdio.h>
Packit 0848f5
#include <stdlib.h>
Packit 0848f5
Packit 0848f5
#define MAXTESTS 32
Packit 0848f5
#define ERROR_MARGIN 1.0        /* FIXME: This number is pretty much randomly chosen */
Packit 0848f5
Packit 0848f5
static int verbose = 0;
Packit 0848f5
Packit 0848f5
int main(int argc, char *argv[])
Packit 0848f5
{
Packit 0848f5
    int wsize, wrank, partner, len, maxlen, k, reps, repsleft;
Packit 0848f5
    double t1;
Packit 0848f5
    MPI_Request rreq;
Packit 0848f5
    char *rbuf, *sbuf;
Packit 0848f5
    double times[3][MAXTESTS];
Packit 0848f5
Packit 0848f5
    MPI_Init(&argc, &argv);
Packit 0848f5
    if (getenv("MPITEST_VERBOSE"))
Packit 0848f5
        verbose = 1;
Packit 0848f5
Packit 0848f5
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);
Packit 0848f5
    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
Packit 0848f5
Packit 0848f5
    if (wsize < 2) {
Packit 0848f5
        fprintf(stderr, "This program requires at least 2 processes\n");
Packit 0848f5
        MPI_Abort(MPI_COMM_WORLD, 1);
Packit 0848f5
    }
Packit 0848f5
    /* Set partner based on whether rank is odd or even */
Packit 0848f5
    if (wrank & 0x1) {
Packit 0848f5
        partner = wrank - 1;
Packit 0848f5
    }
Packit 0848f5
    else if (wrank < wsize - 1) {
Packit 0848f5
        partner = wrank + 1;
Packit 0848f5
    }
Packit 0848f5
    else
Packit 0848f5
        /* Handle wsize odd */
Packit 0848f5
        partner = MPI_PROC_NULL;
Packit 0848f5
Packit 0848f5
    /* Allocate and initialize buffers */
Packit 0848f5
    maxlen = 1024 * 1024;
Packit 0848f5
    rbuf = (char *) malloc(maxlen);
Packit 0848f5
    sbuf = (char *) malloc(maxlen);
Packit 0848f5
    if (!rbuf || !sbuf) {
Packit 0848f5
        fprintf(stderr, "Could not allocate %d byte buffers\n", maxlen);
Packit 0848f5
        MPI_Abort(MPI_COMM_WORLD, 2);
Packit 0848f5
    }
Packit 0848f5
    for (k = 0; k < maxlen; k++) {
Packit 0848f5
        rbuf[k] = 0;
Packit 0848f5
        sbuf[k] = 0;
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
    MPI_Barrier(MPI_COMM_WORLD);
Packit 0848f5
Packit 0848f5
    /* Test Irecv and send, head to head */
Packit 0848f5
    if (wrank == 0 && verbose) {
Packit 0848f5
        printf("Irecv-send\n");
Packit 0848f5
        printf("len\ttime    \trate\n");
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
    /* Send powers of 2 bytes */
Packit 0848f5
    len = 1;
Packit 0848f5
    for (k = 0; k < 20; k++) {
Packit 0848f5
        /* We use a simple linear form for the number of tests to
Packit 0848f5
         * reduce the impact of the granularity of the timer */
Packit 0848f5
        reps = 50 - k;
Packit 0848f5
        repsleft = reps;
Packit 0848f5
        /* Make sure that both processes are ready to start */
Packit 0848f5
        MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
Packit 0848f5
                     MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
Packit 0848f5
        t1 = MPI_Wtime();
Packit 0848f5
        while (repsleft--) {
Packit 0848f5
            MPI_Irecv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq);
Packit 0848f5
            MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
Packit 0848f5
            MPI_Wait(&rreq, MPI_STATUS_IGNORE);
Packit 0848f5
        }
Packit 0848f5
        t1 = MPI_Wtime() - t1;
Packit 0848f5
        times[0][k] = t1 / reps;
Packit 0848f5
        if (wrank == 0) {
Packit 0848f5
            t1 = t1 / reps;
Packit 0848f5
            if (t1 > 0) {
Packit 0848f5
                double rate;
Packit 0848f5
                rate = (len / t1) / 1.e6;
Packit 0848f5
                t1 = t1 * 1.e6;
Packit 0848f5
                if (verbose)
Packit 0848f5
                    printf("%d\t%g\t%g\n", len, t1, len / t1);
Packit 0848f5
            }
Packit 0848f5
            else {
Packit 0848f5
                t1 = t1 * 1.e6;
Packit 0848f5
                if (verbose)
Packit 0848f5
                    printf("%d\t%g\tINF\n", len, t1);
Packit 0848f5
            }
Packit 0848f5
            if (verbose)
Packit 0848f5
                fflush(stdout);
Packit 0848f5
        }
Packit 0848f5
Packit 0848f5
        len *= 2;
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
    MPI_Barrier(MPI_COMM_WORLD);
Packit 0848f5
Packit 0848f5
    /* Test Sendrecv, head to head */
Packit 0848f5
    if (wrank == 0 && verbose) {
Packit 0848f5
        printf("Sendrecv\n");
Packit 0848f5
        printf("len\ttime (usec)\trate (MB/s)\n");
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
    /* Send powers of 2 bytes */
Packit 0848f5
    len = 1;
Packit 0848f5
    for (k = 0; k < 20; k++) {
Packit 0848f5
        /* We use a simple linear form for the number of tests to
Packit 0848f5
         * reduce the impact of the granularity of the timer */
Packit 0848f5
        reps = 50 - k;
Packit 0848f5
        repsleft = reps;
Packit 0848f5
        /* Make sure that both processes are ready to start */
Packit 0848f5
        MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
Packit 0848f5
                     MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
Packit 0848f5
        t1 = MPI_Wtime();
Packit 0848f5
        while (repsleft--) {
Packit 0848f5
            MPI_Sendrecv(sbuf, len, MPI_BYTE, partner, k,
Packit 0848f5
                         rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
Packit 0848f5
        }
Packit 0848f5
        t1 = MPI_Wtime() - t1;
Packit 0848f5
        times[1][k] = t1 / reps;
Packit 0848f5
        if (wrank == 0) {
Packit 0848f5
            t1 = t1 / reps;
Packit 0848f5
            if (t1 > 0) {
Packit 0848f5
                double rate;
Packit 0848f5
                rate = (len / t1) / 1.e6;
Packit 0848f5
                t1 = t1 * 1.e6;
Packit 0848f5
                if (verbose)
Packit 0848f5
                    printf("%d\t%g\t%g\n", len, t1, len / t1);
Packit 0848f5
            }
Packit 0848f5
            else {
Packit 0848f5
                t1 = t1 * 1.e6;
Packit 0848f5
                if (verbose)
Packit 0848f5
                    printf("%d\t%g\tINF\n", len, t1);
Packit 0848f5
            }
Packit 0848f5
            if (verbose)
Packit 0848f5
                fflush(stdout);
Packit 0848f5
        }
Packit 0848f5
Packit 0848f5
        len *= 2;
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
    MPI_Barrier(MPI_COMM_WORLD);
Packit 0848f5
Packit 0848f5
    /* Test Send/recv, ping-pong */
Packit 0848f5
    if (wrank == 0 && verbose) {
Packit 0848f5
        printf("Pingpong\n");
Packit 0848f5
        printf("len\ttime (usec)\trate (MB/s)\n");
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
    /* Send powers of 2 bytes */
Packit 0848f5
    len = 1;
Packit 0848f5
    for (k = 0; k < 20; k++) {
Packit 0848f5
        /* We use a simple linear form for the number of tests to
Packit 0848f5
         * reduce the impact of the granularity of the timer */
Packit 0848f5
        reps = 50 - k;
Packit 0848f5
        repsleft = reps;
Packit 0848f5
        /* Make sure that both processes are ready to start */
Packit 0848f5
        MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
Packit 0848f5
                     MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
Packit 0848f5
        t1 = MPI_Wtime();
Packit 0848f5
        while (repsleft--) {
Packit 0848f5
            if (wrank & 0x1) {
Packit 0848f5
                MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
Packit 0848f5
                MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
Packit 0848f5
            }
Packit 0848f5
            else {
Packit 0848f5
                MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
Packit 0848f5
                MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
Packit 0848f5
            }
Packit 0848f5
        }
Packit 0848f5
        t1 = MPI_Wtime() - t1;
Packit 0848f5
        times[2][k] = t1 / reps;
Packit 0848f5
        if (wrank == 0) {
Packit 0848f5
            t1 = t1 / reps;
Packit 0848f5
            if (t1 > 0) {
Packit 0848f5
                double rate;
Packit 0848f5
                rate = (len / t1) / 1.e6;
Packit 0848f5
                t1 = t1 * 1.e6;
Packit 0848f5
                if (verbose)
Packit 0848f5
                    printf("%d\t%g\t%g\n", len, t1, len / t1);
Packit 0848f5
            }
Packit 0848f5
            else {
Packit 0848f5
                t1 = t1 * 1.e6;
Packit 0848f5
                if (verbose)
Packit 0848f5
                    printf("%d\t%g\tINF\n", len, t1);
Packit 0848f5
            }
Packit 0848f5
            if (verbose)
Packit 0848f5
                fflush(stdout);
Packit 0848f5
        }
Packit 0848f5
Packit 0848f5
        len *= 2;
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
Packit 0848f5
    /* At this point, we could optionally analyze the results and report
Packit 0848f5
     * success or failure based on some criteria, such as near monotone
Packit 0848f5
     * increases in bandwidth.  This test was created because of a
Packit 0848f5
     * fall-off in performance noted in the ch3:sock device:channel */
Packit 0848f5
Packit 0848f5
    if (wrank == 0) {
Packit 0848f5
        int nPerfErrors = 0;
Packit 0848f5
        len = 1;
Packit 0848f5
        for (k = 0; k < 20; k++) {
Packit 0848f5
            double T0, T1, T2;
Packit 0848f5
            T0 = times[0][k] * 1.e6;
Packit 0848f5
            T1 = times[1][k] * 1.e6;
Packit 0848f5
            T2 = times[2][k] * 1.e6;
Packit 0848f5
            if (verbose)
Packit 0848f5
                printf("%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2);
Packit 0848f5
            /* Lets look at long messages only */
Packit 0848f5
            if (k > 10) {
Packit 0848f5
                double T0Old, T1Old, T2Old;
Packit 0848f5
                T0Old = times[0][k - 1] * 1.0e6;
Packit 0848f5
                T1Old = times[1][k - 1] * 1.0e6;
Packit 0848f5
                T2Old = times[2][k - 1] * 1.0e6;
Packit 0848f5
                if (T0 > (2 + ERROR_MARGIN) * T0Old) {
Packit 0848f5
                    nPerfErrors++;
Packit 0848f5
                    if (verbose)
Packit 0848f5
                        printf("Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0);
Packit 0848f5
                }
Packit 0848f5
                if (T1 > (2 + ERROR_MARGIN) * T1Old) {
Packit 0848f5
                    nPerfErrors++;
Packit 0848f5
                    if (verbose)
Packit 0848f5
                        printf("Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1);
Packit 0848f5
                }
Packit 0848f5
                if (T2 > (2 + ERROR_MARGIN) * T2Old) {
Packit 0848f5
                    nPerfErrors++;
Packit 0848f5
                    if (verbose)
Packit 0848f5
                        printf("Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2);
Packit 0848f5
                }
Packit 0848f5
            }
Packit 0848f5
            len *= 2;
Packit 0848f5
        }
Packit 0848f5
        if (nPerfErrors > 8) {
Packit 0848f5
            /* Allow for 1-2 errors for eager-rendezvous shifting
Packit 0848f5
             * point and cache effects. There should be a better way
Packit 0848f5
             * of doing this. */
Packit 0848f5
            printf(" Found %d performance errors\n", nPerfErrors);
Packit 0848f5
        }
Packit 0848f5
        else {
Packit 0848f5
            printf(" No Errors\n");
Packit 0848f5
        }
Packit 0848f5
        fflush(stdout);
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
    free(sbuf);
Packit 0848f5
    free(rbuf);
Packit 0848f5
Packit 0848f5
    MPI_Finalize();
Packit 0848f5
Packit 0848f5
    return 0;
Packit 0848f5
}