|
Packit Service |
c5cf8c |
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
|
Packit Service |
c5cf8c |
/*
|
|
Packit Service |
c5cf8c |
*
|
|
Packit Service |
c5cf8c |
* (C) 2009 by Argonne National Laboratory.
|
|
Packit Service |
c5cf8c |
* See COPYRIGHT in top-level directory.
|
|
Packit Service |
c5cf8c |
*/
|
|
Packit Service |
c5cf8c |
#include "mpi.h"
|
|
Packit Service |
c5cf8c |
#include <stdio.h>
|
|
Packit Service |
c5cf8c |
#include <stdlib.h>
|
|
Packit Service |
c5cf8c |
#include "mpitest.h"
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/*
|
|
Packit Service |
c5cf8c |
static char MTEST_Descrip[] = "Test error reporting from faults with point to point communication";
|
|
Packit Service |
c5cf8c |
*/
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
int ReportErr(int errcode, const char name[]);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
int main(int argc, char *argv[])
|
|
Packit Service |
c5cf8c |
{
|
|
Packit Service |
c5cf8c |
int wrank, wsize, rank, size, color;
|
|
Packit Service |
c5cf8c |
int j, tmp;
|
|
Packit Service |
c5cf8c |
int err, toterrs, errs = 0;
|
|
Packit Service |
c5cf8c |
MPI_Comm newcomm;
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MTest_Init(&argc, &argv);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPI_Comm_size(MPI_COMM_WORLD, &wsize);
|
|
Packit Service |
c5cf8c |
MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* Color is 0 or 1; 1 will be the processes that "fault" */
|
|
Packit Service |
c5cf8c |
/* process 0 and wsize/2+1...wsize-1 are in non-faulting group */
|
|
Packit Service |
c5cf8c |
color = (wrank > 0) && (wrank <= wsize / 2);
|
|
Packit Service |
c5cf8c |
MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &newcomm);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPI_Comm_size(newcomm, &size);
|
|
Packit Service |
c5cf8c |
MPI_Comm_rank(newcomm, &rank;;
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* Set errors return on COMM_WORLD and the new comm */
|
|
Packit Service |
c5cf8c |
MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
|
|
Packit Service |
c5cf8c |
MPI_Comm_set_errhandler(newcomm, MPI_ERRORS_RETURN);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
err = MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit Service |
c5cf8c |
if (err)
|
|
Packit Service |
c5cf8c |
errs += ReportErr(err, "Barrier");
|
|
Packit Service |
c5cf8c |
if (color) {
|
|
Packit Service |
c5cf8c |
/* Simulate a fault on some processes */
|
|
Packit Service |
c5cf8c |
exit(1);
|
|
Packit Service |
c5cf8c |
} else {
|
|
Packit Service |
c5cf8c |
/* To improve the chance that the "faulted" processes will have
|
|
Packit Service |
c5cf8c |
* exited, wait for 1 second */
|
|
Packit Service |
c5cf8c |
MTestSleep(1);
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* Can we still use newcomm? */
|
|
Packit Service |
c5cf8c |
for (j = 0; j < rank; j++) {
|
|
Packit Service |
c5cf8c |
err = MPI_Recv(&tmp, 1, MPI_INT, j, 0, newcomm, MPI_STATUS_IGNORE);
|
|
Packit Service |
c5cf8c |
if (err)
|
|
Packit Service |
c5cf8c |
errs += ReportErr(err, "Recv");
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
for (j = rank + 1; j < size; j++) {
|
|
Packit Service |
c5cf8c |
err = MPI_Send(&rank, 1, MPI_INT, j, 0, newcomm);
|
|
Packit Service |
c5cf8c |
if (err)
|
|
Packit Service |
c5cf8c |
errs += ReportErr(err, "Recv");
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* Now, try sending in MPI_COMM_WORLD on dead processes */
|
|
Packit Service |
c5cf8c |
/* There is a race condition here - we don't know for sure that the faulted
|
|
Packit Service |
c5cf8c |
* processes have exited. However, we can ensure a failure by using
|
|
Packit Service |
c5cf8c |
* synchronous sends - the sender will wait until the reciever handles
|
|
Packit Service |
c5cf8c |
* receives the message, which will not happen (the process will exit
|
|
Packit Service |
c5cf8c |
* without matching the message, even if it has not yet exited). */
|
|
Packit Service |
c5cf8c |
for (j = 1; j <= wsize / 2; j++) {
|
|
Packit Service |
c5cf8c |
err = MPI_Ssend(&rank, 1, MPI_INT, j, 0, MPI_COMM_WORLD);
|
|
Packit Service |
c5cf8c |
if (!err) {
|
|
Packit Service |
c5cf8c |
errs++;
|
|
Packit Service |
c5cf8c |
fprintf(stderr, "Ssend succeeded to dead process %d\n", j);
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
err = MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, newcomm);
|
|
Packit Service |
c5cf8c |
if (err)
|
|
Packit Service |
c5cf8c |
errs += ReportErr(err, "Allreduce");
|
|
Packit Service |
c5cf8c |
MPI_Comm_free(&newcomm);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MTest_Finalize(toterrs);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
return MTestReturnValue(errs);
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
int ReportErr(int errcode, const char name[])
|
|
Packit Service |
c5cf8c |
{
|
|
Packit Service |
c5cf8c |
int errclass, errlen;
|
|
Packit Service |
c5cf8c |
char errmsg[MPI_MAX_ERROR_STRING];
|
|
Packit Service |
c5cf8c |
MPI_Error_class(errcode, &errclass);
|
|
Packit Service |
c5cf8c |
MPI_Error_string(errcode, errmsg, &errlen);
|
|
Packit Service |
c5cf8c |
fprintf(stderr, "In %s, error code %d(class %d) = %s\n", name, errcode, errclass, errmsg);
|
|
Packit Service |
c5cf8c |
return 1;
|
|
Packit Service |
c5cf8c |
}
|