/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ /* * (C) 2017 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ #include "mpiimpl.h" /* Algorithm: Inplace Alltoallw * * We use pair-wise sendrecv_replace in order to conserve memory usage, which * is keeping with the spirit of the MPI-2.2 Standard. But because of this * approach all processes must agree on the global schedule of sendrecv_replace * operations to avoid deadlock. * * Note that this is not an especially efficient algorithm in terms of time and * there will be multiple repeated malloc/free's rather than maintaining a * single buffer across the whole loop. Something like MADRE is probably the * best solution for the MPI_IN_PLACE scenario. */ #undef FUNCNAME #define FUNCNAME MPIR_Ialltoallw_sched_intra_inplace #undef FCNAME #define FCNAME MPL_QUOTE(FUNCNAME) int MPIR_Ialltoallw_sched_intra_inplace(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm * comm_ptr, MPIR_Sched_t s) { int mpi_errno = MPI_SUCCESS; int comm_size, i, j; int dst, rank; int recv_extent; MPI_Aint true_extent, true_lb; int max_size; void *tmp_buf = NULL, *adj_tmp_buf = NULL; MPIR_SCHED_CHKPMEM_DECL(1); comm_size = comm_ptr->local_size; rank = comm_ptr->rank; /* The regular MPI_Alltoallw handles MPI_IN_PLACE using pairwise * sendrecv_replace calls. We don't have a sendrecv_replace, so just * malloc the maximum of the counts array entries and then perform the * pairwise exchanges manually with schedule barriers instead. * * Because of this approach all processes must agree on the global * schedule of "sendrecv_replace" operations to avoid deadlock. * * This keeps with the spirit of the MPI-2.2 standard, which is to * conserve memory when using MPI_IN_PLACE for these routines. * Something like MADRE would probably generate a more optimal * algorithm. */ max_size = 0; for (i = 0; i < comm_size; ++i) { /* only look at recvtypes/recvcounts because the send vectors are * ignored when sendbuf==MPI_IN_PLACE */ MPIR_Type_get_true_extent_impl(recvtypes[i], &true_lb, &true_extent); MPIR_Datatype_get_extent_macro(recvtypes[i], recv_extent); max_size = MPL_MAX(max_size, recvcounts[i] * MPL_MAX(recv_extent, true_extent)); } MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, max_size, mpi_errno, "Ialltoallw tmp_buf", MPL_MEM_BUFFER); for (i = 0; i < comm_size; ++i) { /* start inner loop at i to avoid re-exchanging data */ for (j = i; j < comm_size; ++j) { if (rank == i && rank == j) { /* no need to "sendrecv_replace" for ourselves */ } else if (rank == i || rank == j) { if (rank == i) dst = j; else dst = i; MPIR_Type_get_true_extent_impl(recvtypes[i], &true_lb, &true_extent); adj_tmp_buf = (void *) ((char *) tmp_buf - true_lb); mpi_errno = MPIR_Sched_send(((char *) recvbuf + rdispls[dst]), recvcounts[dst], recvtypes[dst], dst, comm_ptr, s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Sched_recv(adj_tmp_buf, recvcounts[dst], recvtypes[dst], dst, comm_ptr, s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_SCHED_BARRIER(s); mpi_errno = MPIR_Sched_copy(adj_tmp_buf, recvcounts[dst], recvtypes[dst], ((char *) recvbuf + rdispls[dst]), recvcounts[dst], recvtypes[dst], s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_SCHED_BARRIER(s); } } } MPIR_SCHED_CHKPMEM_COMMIT(s); fn_exit: return mpi_errno; fn_fail: MPIR_SCHED_CHKPMEM_REAP(s); goto fn_exit; }