Blob Blame History Raw
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpiimpl.h"

#if defined(HAVE_LIMITS_H)
#include <limits.h>
#endif
#if defined(HAVE_UNISTD_H)
#include <unistd.h>
#endif
#if defined(HAVE_ERRNO_H)
#include <errno.h>
#endif

/* MPIR_Find_local_and_external -- from the list of processes in comm,
   builds a list of local processes, i.e., processes on this same
   node, and a list of external processes, i.e., one process from each
   node.

   Note that this will not work correctly for spawned or attached
   processes.

   external processes: For a communicator, there is one external
                       process per node.  You can think of this as the
                       root or master process for that node.

   OUT:
     local_size_p      - number of processes on this node
     local_rank_p      - rank of this processes among local processes
     local_ranks_p     - (*local_ranks_p)[i] = the rank in comm
                         of the process with local rank i.
                         This is of size (*local_size_p)
     external_size_p   - number of external processes
     external_rank_p   - rank of this process among the external
                         processes, or -1 if this process is not external
     external_ranks_p  - (*external_ranks_p)[i] = the rank in comm
                         of the process with external rank i.
                         This is of size (*external_size_p)
     intranode_table_p - (*internode_table_p)[i] gives the rank in
     (optional)          *local_ranks_p of rank i in comm or -1 if not
                         applicable.  It is of size comm->remote_size.
                         No return if NULL is specified.
     internode_table_p - (*internode_table_p)[i] gives the rank in
     (optional)          *external_ranks_p of the root of the node
                         containing rank i in comm.  It is of size
                         comm->remote_size. No return if NULL is specified.
*/
#undef FUNCNAME
#define FUNCNAME MPIR_Find_local_and_external
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)

int MPIR_Find_local_and_external(MPIR_Comm * comm, int *local_size_p, int *local_rank_p,
                                 int **local_ranks_p, int *external_size_p, int *external_rank_p,
                                 int **external_ranks_p, int **intranode_table_p,
                                 int **internode_table_p)
{
    int mpi_errno = MPI_SUCCESS;
    int *nodes;
    int external_size;
    int external_rank;
    int *external_ranks;
    int local_size;
    int local_rank;
    int *local_ranks;
    int *internode_table;
    int *intranode_table;
    int i;
    int max_node_id;
    int node_id;
    int my_node_id;
    MPIR_CHKLMEM_DECL(1);
    MPIR_CHKPMEM_DECL(4);

    /* Scan through the list of processes in comm and add one
     * process from each node to the list of "external" processes.  We
     * add the first process we find from each node.  nodes[] is an
     * array where we keep track of whether we have already added that
     * node to the list. */

    /* these two will be realloc'ed later to the appropriate size (currently unknown) */
    /* FIXME: realloc doesn't guarantee that the allocated area will be
     * shrunk - so using realloc is not an appropriate strategy. */
    MPIR_CHKPMEM_MALLOC(external_ranks, int *, sizeof(int) * comm->remote_size, mpi_errno,
                        "external_ranks", MPL_MEM_COMM);
    MPIR_CHKPMEM_MALLOC(local_ranks, int *, sizeof(int) * comm->remote_size, mpi_errno,
                        "local_ranks", MPL_MEM_COMM);

    MPIR_CHKPMEM_MALLOC(internode_table, int *, sizeof(int) * comm->remote_size, mpi_errno,
                        "internode_table", MPL_MEM_COMM);
    MPIR_CHKPMEM_MALLOC(intranode_table, int *, sizeof(int) * comm->remote_size, mpi_errno,
                        "intranode_table", MPL_MEM_COMM);

    mpi_errno = MPID_Get_max_node_id(comm, &max_node_id);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);
    MPIR_Assert(max_node_id >= 0);
    MPIR_CHKLMEM_MALLOC(nodes, int *, sizeof(int) * (max_node_id + 1), mpi_errno, "nodes",
                        MPL_MEM_COMM);

    /* nodes maps node_id to rank in external_ranks of leader for that node */
    for (i = 0; i < (max_node_id + 1); ++i)
        nodes[i] = -1;

    for (i = 0; i < comm->remote_size; ++i)
        intranode_table[i] = -1;

    external_size = 0;

    mpi_errno = MPID_Get_node_id(comm, comm->rank, &my_node_id);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);
    MPIR_Assert(my_node_id >= 0);
    MPIR_Assert(my_node_id <= max_node_id);

    local_size = 0;
    local_rank = -1;
    external_rank = -1;

    for (i = 0; i < comm->remote_size; ++i) {
        mpi_errno = MPID_Get_node_id(comm, i, &node_id);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        /* The upper level can catch this non-fatal error and should be
         * able to recover gracefully. */
        MPIR_ERR_CHKANDJUMP(node_id < 0, mpi_errno, MPI_ERR_OTHER, "**dynamic_node_ids");

        MPIR_Assert(node_id <= max_node_id);

        /* build list of external processes */
        if (nodes[node_id] == -1) {
            if (i == comm->rank)
                external_rank = external_size;
            nodes[node_id] = external_size;
            external_ranks[external_size] = i;
            ++external_size;
        }

        /* build the map from rank in comm to rank in external_ranks */
        internode_table[i] = nodes[node_id];

        /* build list of local processes */
        if (node_id == my_node_id) {
            if (i == comm->rank)
                local_rank = local_size;

            intranode_table[i] = local_size;
            local_ranks[local_size] = i;
            ++local_size;
        }
    }

    /*
     * printf("------------------------------------------------------------------------\n");
     * printf("comm = %p\n", comm);
     * printf("comm->size = %d\n", comm->remote_size);
     * printf("comm->rank = %d\n", comm->rank);
     * printf("local_size = %d\n", local_size);
     * printf("local_rank = %d\n", local_rank);
     * printf("local_ranks = %p\n", local_ranks);
     * for (i = 0; i < local_size; ++i)
     * printf("  local_ranks[%d] = %d\n", i, local_ranks[i]);
     * printf("external_size = %d\n", external_size);
     * printf("external_rank = %d\n", external_rank);
     * printf("external_ranks = %p\n", external_ranks);
     * for (i = 0; i < external_size; ++i)
     * printf("  external_ranks[%d] = %d\n", i, external_ranks[i]);
     * printf("intranode_table = %p\n", intranode_table);
     * for (i = 0; i < comm->remote_size; ++i)
     * printf("  intranode_table[%d] = %d\n", i, intranode_table[i]);
     * printf("internode_table = %p\n", internode_table);
     * for (i = 0; i < comm->remote_size; ++i)
     * printf("  internode_table[%d] = %d\n", i, internode_table[i]);
     * printf("nodes = %p\n", nodes);
     * for (i = 0; i < (max_node_id + 1); ++i)
     * printf("  nodes[%d] = %d\n", i, nodes[i]);
     */

    MPIR_CHKPMEM_COMMIT();

    *local_size_p = local_size;
    *local_rank_p = local_rank;
    *local_ranks_p = MPL_realloc(local_ranks, sizeof(int) * local_size, MPL_MEM_COMM);
    MPIR_ERR_CHKANDJUMP(*local_ranks_p == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem2");

    *external_size_p = external_size;
    *external_rank_p = external_rank;
    *external_ranks_p = MPL_realloc(external_ranks, sizeof(int) * external_size, MPL_MEM_COMM);
    MPIR_ERR_CHKANDJUMP(*external_ranks_p == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem2");

    /* no need to realloc */
    if (intranode_table_p)
        *intranode_table_p = intranode_table;
    else
        MPL_free(intranode_table);

    if (internode_table_p)
        *internode_table_p = internode_table;
    else
        MPL_free(internode_table);

  fn_exit:
    MPIR_CHKLMEM_FREEALL();
    return mpi_errno;
  fn_fail:
    MPIR_CHKPMEM_REAP();
    goto fn_exit;
}

/* maps rank r in comm_ptr to the rank of the leader for r's node in
   comm_ptr->node_roots_comm and returns this value.

   This function does NOT use mpich error handling.
 */
#undef FUNCNAME
#define FUNCNAME MPIR_Get_internode_rank
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Get_internode_rank(MPIR_Comm * comm_ptr, int r)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Comm_valid_ptr(comm_ptr, mpi_errno, TRUE);
    MPIR_Assert(mpi_errno == MPI_SUCCESS);
    MPIR_Assert(r < comm_ptr->remote_size);
    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
    MPIR_Assert(comm_ptr->internode_table != NULL);

    return comm_ptr->internode_table[r];
}

/* maps rank r in comm_ptr to the rank in comm_ptr->node_comm or -1 if r is not
   a member of comm_ptr->node_comm.

   This function does NOT use mpich error handling.
 */
#undef FUNCNAME
#define FUNCNAME MPIR_Get_intranode_rank
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Get_intranode_rank(MPIR_Comm * comm_ptr, int r)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Comm_valid_ptr(comm_ptr, mpi_errno, TRUE);
    MPIR_Assert(mpi_errno == MPI_SUCCESS);
    MPIR_Assert(r < comm_ptr->remote_size);
    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
    MPIR_Assert(comm_ptr->intranode_table != NULL);

    /* FIXME this could/should be a list of ranks on the local node, which
     * should take up much less space on a typical thin(ish)-node system. */
    return comm_ptr->intranode_table[r];
}