Blame src/mpi/coll/iallgather/iallgather_intra_ring.c

Packit Service c5cf8c
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
Packit Service c5cf8c
/*
Packit Service c5cf8c
 *  (C) 2017 by Argonne National Laboratory.
Packit Service c5cf8c
 *      See COPYRIGHT in top-level directory.
Packit Service c5cf8c
 */
Packit Service c5cf8c
Packit Service c5cf8c
#include "mpiimpl.h"
Packit Service c5cf8c
Packit Service c5cf8c
/* Algorithm: Ring
Packit Service c5cf8c
 *
Packit Service c5cf8c
 * In the first step, each process i sends its contribution to process
Packit Service c5cf8c
 * i+1 and receives the contribution from process i-1 (with
Packit Service c5cf8c
 * wrap-around).  From the second step onwards, each process i
Packit Service c5cf8c
 * forwards to process i+1 the data it received from process i-1 in
Packit Service c5cf8c
 * the previous step.  This takes a total of p-1 steps.
Packit Service c5cf8c
 *
Packit Service c5cf8c
 * Cost = (p-1).alpha + n.((p-1)/p).beta
Packit Service c5cf8c
 *
Packit Service c5cf8c
 * This algorithm is preferred to recursive doubling for long messages
Packit Service c5cf8c
 * because we find that this communication pattern (nearest neighbor)
Packit Service c5cf8c
 * performs twice as fast as recursive doubling for long messages (on
Packit Service c5cf8c
 * Myrinet and IBM SP).
Packit Service c5cf8c
 */
Packit Service c5cf8c
#undef FUNCNAME
Packit Service c5cf8c
#define FUNCNAME MPIR_Iallgather_sched_intra_ring
Packit Service c5cf8c
#undef FCNAME
Packit Service c5cf8c
#define FCNAME MPL_QUOTE(FUNCNAME)
Packit Service c5cf8c
int MPIR_Iallgather_sched_intra_ring(const void *sendbuf, int sendcount, MPI_Datatype
Packit Service c5cf8c
                                     sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
Packit Service c5cf8c
                                     MPIR_Comm * comm_ptr, MPIR_Sched_t s)
Packit Service c5cf8c
{
Packit Service c5cf8c
    int mpi_errno = MPI_SUCCESS;
Packit Service c5cf8c
    int rank, comm_size;
Packit Service c5cf8c
    int i, j, jnext, left, right;
Packit Service c5cf8c
    MPI_Aint recvtype_extent;
Packit Service c5cf8c
Packit Service c5cf8c
    comm_size = comm_ptr->local_size;
Packit Service c5cf8c
    rank = comm_ptr->rank;
Packit Service c5cf8c
Packit Service c5cf8c
    MPIR_Datatype_get_extent_macro(recvtype, recvtype_extent);
Packit Service c5cf8c
Packit Service c5cf8c
    /* This is the largest offset we add to recvbuf */
Packit Service c5cf8c
    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
Packit Service c5cf8c
                                     (comm_size * recvcount * recvtype_extent));
Packit Service c5cf8c
Packit Service c5cf8c
    /* First, load the "local" version in the recvbuf. */
Packit Service c5cf8c
    if (sendbuf != MPI_IN_PLACE) {
Packit Service c5cf8c
        mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
Packit Service c5cf8c
                                    ((char *) recvbuf + rank * recvcount * recvtype_extent),
Packit Service c5cf8c
                                    recvcount, recvtype, s);
Packit Service c5cf8c
        if (mpi_errno)
Packit Service c5cf8c
            MPIR_ERR_POP(mpi_errno);
Packit Service c5cf8c
        MPIR_SCHED_BARRIER(s);
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    /* Now, send left to right.  This fills in the receive area in
Packit Service c5cf8c
     * reverse order. */
Packit Service c5cf8c
    left = (comm_size + rank - 1) % comm_size;
Packit Service c5cf8c
    right = (rank + 1) % comm_size;
Packit Service c5cf8c
Packit Service c5cf8c
    j = rank;
Packit Service c5cf8c
    jnext = left;
Packit Service c5cf8c
    for (i = 1; i < comm_size; i++) {
Packit Service c5cf8c
        mpi_errno = MPIR_Sched_send(((char *) recvbuf + j * recvcount * recvtype_extent),
Packit Service c5cf8c
                                    recvcount, recvtype, right, comm_ptr, s);
Packit Service c5cf8c
        if (mpi_errno)
Packit Service c5cf8c
            MPIR_ERR_POP(mpi_errno);
Packit Service c5cf8c
        /* concurrent, no barrier here */
Packit Service c5cf8c
        mpi_errno = MPIR_Sched_recv(((char *) recvbuf + jnext * recvcount * recvtype_extent),
Packit Service c5cf8c
                                    recvcount, recvtype, left, comm_ptr, s);
Packit Service c5cf8c
        if (mpi_errno)
Packit Service c5cf8c
            MPIR_ERR_POP(mpi_errno);
Packit Service c5cf8c
        MPIR_SCHED_BARRIER(s);
Packit Service c5cf8c
Packit Service c5cf8c
        j = jnext;
Packit Service c5cf8c
        jnext = (comm_size + jnext - 1) % comm_size;
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
  fn_exit:
Packit Service c5cf8c
    return mpi_errno;
Packit Service c5cf8c
  fn_fail:
Packit Service c5cf8c
    goto fn_exit;
Packit Service c5cf8c
}