/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ /* * (C) 2017 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ #include "mpiimpl.h" #undef FUNCNAME #define FUNCNAME MPIR_Ialltoallv_sched_intra_inplace #undef FCNAME #define FCNAME MPL_QUOTE(FUNCNAME) int MPIR_Ialltoallv_sched_intra_inplace(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm * comm_ptr, MPIR_Sched_t s) { int max_count; void *tmp_buf = NULL; int mpi_errno = MPI_SUCCESS; int comm_size; int i, j; MPI_Aint recv_extent; int dst, rank; MPIR_SCHED_CHKPMEM_DECL(1); comm_size = comm_ptr->local_size; rank = comm_ptr->rank; /* Get extent and size of recvtype, don't look at sendtype for MPI_IN_PLACE */ MPIR_Datatype_get_extent_macro(recvtype, recv_extent); /* The regular MPI_Alltoallv handles MPI_IN_PLACE using pairwise * sendrecv_replace calls. We don't have a sendrecv_replace, so just * malloc the maximum of the counts array entries and then perform the * pairwise exchanges manually with schedule barriers instead. * * Because of this approach all processes must agree on the global * schedule of "sendrecv_replace" operations to avoid deadlock. * * This keeps with the spirit of the MPI-2.2 standard, which is to * conserve memory when using MPI_IN_PLACE for these routines. * Something like MADRE would probably generate a more optimal * algorithm. */ max_count = 0; for (i = 0; i < comm_size; ++i) { max_count = MPL_MAX(max_count, recvcounts[i]); } MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, max_count * recv_extent, mpi_errno, "Ialltoallv tmp_buf", MPL_MEM_BUFFER); for (i = 0; i < comm_size; ++i) { /* start inner loop at i to avoid re-exchanging data */ for (j = i; j < comm_size; ++j) { if (rank == i && rank == j) { /* no need to "sendrecv_replace" for ourselves */ } else if (rank == i || rank == j) { if (rank == i) dst = j; else dst = i; mpi_errno = MPIR_Sched_send(((char *) recvbuf + rdispls[dst] * recv_extent), recvcounts[dst], recvtype, dst, comm_ptr, s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Sched_recv(tmp_buf, recvcounts[dst], recvtype, dst, comm_ptr, s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_SCHED_BARRIER(s); mpi_errno = MPIR_Sched_copy(tmp_buf, recvcounts[dst], recvtype, ((char *) recvbuf + rdispls[dst] * recv_extent), recvcounts[dst], recvtype, s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_SCHED_BARRIER(s); } } } MPIR_SCHED_BARRIER(s); MPIR_SCHED_CHKPMEM_COMMIT(s); fn_exit: return mpi_errno; fn_fail: MPIR_SCHED_CHKPMEM_REAP(s); goto fn_exit; }