|
Packit |
0848f5 |
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
|
Packit |
0848f5 |
/*
|
|
Packit |
0848f5 |
* (C) 2010 by Argonne National Laboratory.
|
|
Packit |
0848f5 |
* See COPYRIGHT in top-level directory.
|
|
Packit |
0848f5 |
*/
|
|
Packit |
0848f5 |
#include "mpi.h"
|
|
Packit |
0848f5 |
#include <stdio.h>
|
|
Packit |
0848f5 |
#include <stdlib.h>
|
|
Packit |
0848f5 |
#include "mpitest.h"
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
#include <math.h> /* for fabs(3) */
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Measure and compare the relative performance of MPI_Group_translate_ranks
|
|
Packit |
0848f5 |
* with small and large group2 sizes but a constant number of ranks. This
|
|
Packit |
0848f5 |
* serves as a performance sanity check for the Scalasca use case where we
|
|
Packit |
0848f5 |
* translate to MPI_COMM_WORLD ranks. The performance should only depend on the
|
|
Packit |
0848f5 |
* number of ranks passed, not the size of either group (especially group2).
|
|
Packit |
0848f5 |
*
|
|
Packit |
0848f5 |
* This test is probably only meaningful for large-ish process counts, so we may
|
|
Packit |
0848f5 |
* not be able to run this test by default in the nightlies. */
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* number of iterations used for timing */
|
|
Packit |
0848f5 |
#define NUM_LOOPS (1000000)
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
int main(int argc, char *argv[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int errs = 0;
|
|
Packit |
0848f5 |
int *ranks;
|
|
Packit |
0848f5 |
int *ranksout;
|
|
Packit |
0848f5 |
MPI_Group gworld, grev, gself;
|
|
Packit |
0848f5 |
MPI_Comm comm;
|
|
Packit |
0848f5 |
MPI_Comm commrev;
|
|
Packit |
0848f5 |
int rank, size, i;
|
|
Packit |
0848f5 |
double start, end, time1, time2;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MTest_Init(&argc, &argv);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
comm = MPI_COMM_WORLD;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Comm_size(comm, &size);
|
|
Packit |
0848f5 |
MPI_Comm_rank(comm, &rank;;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
ranks = malloc(size * sizeof(int));
|
|
Packit |
0848f5 |
ranksout = malloc(size * sizeof(int));
|
|
Packit |
0848f5 |
if (!ranks || !ranksout) {
|
|
Packit |
0848f5 |
fprintf(stderr, "out of memory\n");
|
|
Packit |
0848f5 |
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* generate a comm with the rank order reversed */
|
|
Packit |
0848f5 |
MPI_Comm_split(comm, 0, (size - rank - 1), &commrev);
|
|
Packit |
0848f5 |
MPI_Comm_group(commrev, &grev);
|
|
Packit |
0848f5 |
MPI_Comm_group(MPI_COMM_SELF, &gself);
|
|
Packit |
0848f5 |
MPI_Comm_group(comm, &gworld);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* sanity check correctness first */
|
|
Packit |
0848f5 |
for (i = 0; i < size; i++) {
|
|
Packit |
0848f5 |
ranks[i] = i;
|
|
Packit |
0848f5 |
ranksout[i] = -1;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
|
|
Packit |
0848f5 |
for (i = 0; i < size; i++) {
|
|
Packit |
0848f5 |
if (ranksout[i] != (size - i - 1)) {
|
|
Packit |
0848f5 |
if (rank == 0)
|
|
Packit |
0848f5 |
printf("%d: (gworld) expected ranksout[%d]=%d, got %d\n", rank, i,
|
|
Packit |
0848f5 |
(size - rank - 1), ranksout[i]);
|
|
Packit |
0848f5 |
++errs;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
|
|
Packit |
0848f5 |
for (i = 0; i < size; i++) {
|
|
Packit |
0848f5 |
int expected = (i == (size - rank - 1) ? 0 : MPI_UNDEFINED);
|
|
Packit |
0848f5 |
if (ranksout[i] != expected) {
|
|
Packit |
0848f5 |
if (rank == 0)
|
|
Packit |
0848f5 |
printf("%d: (gself) expected ranksout[%d]=%d, got %d\n", rank, i, expected,
|
|
Packit |
0848f5 |
ranksout[i]);
|
|
Packit |
0848f5 |
++errs;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* now compare relative performance */
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* we needs lots of procs to get a group large enough to have meaningful
|
|
Packit |
0848f5 |
* numbers. On most testing machines this means that we're oversubscribing
|
|
Packit |
0848f5 |
* cores in a big way, which might perturb the timing results. So we make
|
|
Packit |
0848f5 |
* sure everyone started up and then everyone but rank 0 goes to sleep to
|
|
Packit |
0848f5 |
* let rank 0 do all the timings. */
|
|
Packit |
0848f5 |
MPI_Barrier(comm);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if (rank != 0) {
|
|
Packit |
0848f5 |
MTestSleep(10);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else { /* rank==0 */
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MTestSleep(1); /* try to avoid timing while everyone else is making syscalls */
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); /*throwaway iter */
|
|
Packit |
0848f5 |
start = MPI_Wtime();
|
|
Packit |
0848f5 |
for (i = 0; i < NUM_LOOPS; ++i) {
|
|
Packit |
0848f5 |
MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
end = MPI_Wtime();
|
|
Packit |
0848f5 |
time1 = end - start;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); /*throwaway iter */
|
|
Packit |
0848f5 |
start = MPI_Wtime();
|
|
Packit |
0848f5 |
for (i = 0; i < NUM_LOOPS; ++i) {
|
|
Packit |
0848f5 |
MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
end = MPI_Wtime();
|
|
Packit |
0848f5 |
time2 = end - start;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* complain if the "gworld" time exceeds 2x the "gself" time */
|
|
Packit |
0848f5 |
if (fabs(time1 - time2) > (2.00 * time2)) {
|
|
Packit |
0848f5 |
printf("too much difference in MPI_Group_translate_ranks performance:\n");
|
|
Packit |
0848f5 |
printf("time1=%f time2=%f\n", time1, time2);
|
|
Packit |
0848f5 |
printf("(fabs(time1-time2)/time2)=%f\n", (fabs(time1 - time2) / time2));
|
|
Packit |
0848f5 |
if (time1 < time2) {
|
|
Packit |
0848f5 |
printf("also, (time1
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
++errs;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
free(ranks);
|
|
Packit |
0848f5 |
free(ranksout);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Group_free(&grev);
|
|
Packit |
0848f5 |
MPI_Group_free(&gself);
|
|
Packit |
0848f5 |
MPI_Group_free(&gworld);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Comm_free(&commrev);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MTest_Finalize(errs);
|
|
Packit |
0848f5 |
MPI_Finalize();
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
return 0;
|
|
Packit |
0848f5 |
}
|