/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "mpiimpl.h"
/* Algorithm: Blocked Alltoallw
*
* Since each process sends/receives different amounts of data to every other
* process, we don't know the total message size for all processes without
* additional communication. Therefore we simply use the "middle of the road"
* isend/irecv algorithm that works reasonably well in all cases.
*
* We post all irecvs and isends and then do a waitall. We scatter the order of
* sources and destinations among the processes, so that all processes don't
* try to send/recv to/from the same process at the same time.
*
* *** Modification: We post only a small number of isends and irecvs at a time
* and wait on them as suggested by Tony Ladd. ***
*/
#undef FUNCNAME
#define FUNCNAME MPIR_Alltoallw_intra_scattered
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Alltoallw_intra_scattered(const void *sendbuf, const int sendcounts[], const int sdispls[],
const MPI_Datatype sendtypes[], void *recvbuf,
const int recvcounts[], const int rdispls[],
const MPI_Datatype recvtypes[], MPIR_Comm * comm_ptr,
MPIR_Errflag_t * errflag)
{
int comm_size, i;
int mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
MPI_Status *starray;
MPIR_Request **reqarray;
int dst, rank;
int outstanding_requests;
int ii, ss, bblock;
int type_size;
MPIR_CHKLMEM_DECL(2);
comm_size = comm_ptr->local_size;
rank = comm_ptr->rank;
#ifdef HAVE_ERROR_CHECKING
/* When MPI_IN_PLACE, we use pair-wise sendrecv_replace in order to conserve memory usage,
* which is keeping with the spirit of the MPI-2.2 Standard. But
* because of this approach all processes must agree on the global
* schedule of sendrecv_replace operations to avoid deadlock.
*/
MPIR_Assert(sendbuf != MPI_IN_PLACE);
#endif
bblock = MPIR_CVAR_ALLTOALL_THROTTLE;
if (bblock == 0)
bblock = comm_size;
MPIR_CHKLMEM_MALLOC(starray, MPI_Status *, 2 * bblock * sizeof(MPI_Status), mpi_errno,
"starray", MPL_MEM_BUFFER);
MPIR_CHKLMEM_MALLOC(reqarray, MPIR_Request **, 2 * bblock * sizeof(MPIR_Request *), mpi_errno,
"reqarray", MPL_MEM_BUFFER);
/* post only bblock isends/irecvs at a time as suggested by Tony Ladd */
for (ii = 0; ii < comm_size; ii += bblock) {
outstanding_requests = 0;
ss = comm_size - ii < bblock ? comm_size - ii : bblock;
/* do the communication -- post ss sends and receives: */
for (i = 0; i < ss; i++) {
dst = (rank + i + ii) % comm_size;
if (recvcounts[dst]) {
MPIR_Datatype_get_size_macro(recvtypes[dst], type_size);
if (type_size) {
mpi_errno = MPIC_Irecv((char *) recvbuf + rdispls[dst],
recvcounts[dst], recvtypes[dst], dst,
MPIR_ALLTOALLW_TAG, comm_ptr,
&reqarray[outstanding_requests]);
if (mpi_errno) {
MPIR_ERR_POP(mpi_errno);
}
outstanding_requests++;
}
}
}
for (i = 0; i < ss; i++) {
dst = (rank - i - ii + comm_size) % comm_size;
if (sendcounts[dst]) {
MPIR_Datatype_get_size_macro(sendtypes[dst], type_size);
if (type_size) {
mpi_errno = MPIC_Isend((char *) sendbuf + sdispls[dst],
sendcounts[dst], sendtypes[dst], dst,
MPIR_ALLTOALLW_TAG, comm_ptr,
&reqarray[outstanding_requests], errflag);
if (mpi_errno) {
MPIR_ERR_POP(mpi_errno);
}
outstanding_requests++;
}
}
}
mpi_errno = MPIC_Waitall(outstanding_requests, reqarray, starray, errflag);
if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS)
MPIR_ERR_POP(mpi_errno);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno == MPI_ERR_IN_STATUS) {
for (i = 0; i < outstanding_requests; i++) {
if (starray[i].MPI_ERROR != MPI_SUCCESS) {
mpi_errno = starray[i].MPI_ERROR;
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag =
MPIX_ERR_PROC_FAILED ==
MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
}
}
}
}
/* --END ERROR HANDLING-- */
}
fn_exit:
MPIR_CHKLMEM_FREEALL();
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag != MPIR_ERR_NONE)
MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail");
return mpi_errno;
fn_fail:
goto fn_exit;
}