/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "mpiimpl.h"
#include "mpicomm.h"
#include "mpir_info.h" /* MPIR_Info_free */
#include "utlist.h"
#include "uthash.h"
/* This is the utility file for comm that contains the basic comm items
and storage management */
#ifndef MPID_COMM_PREALLOC
#define MPID_COMM_PREALLOC 8
#endif
/* Preallocated comm objects */
/* initialized in initthread.c */
MPIR_Comm MPIR_Comm_builtin[MPIR_COMM_N_BUILTIN] = { {0}
};
MPIR_Comm MPIR_Comm_direct[MPID_COMM_PREALLOC] = { {0}
};
MPIR_Object_alloc_t MPIR_Comm_mem = {
0,
0,
0,
0,
MPIR_COMM,
sizeof(MPIR_Comm),
MPIR_Comm_direct,
MPID_COMM_PREALLOC
};
/* Communicator creation functions */
struct MPIR_Commops *MPIR_Comm_fns = NULL;
struct MPIR_Comm_hint_fn_elt {
char name[MPI_MAX_INFO_KEY];
MPIR_Comm_hint_fn_t fn;
void *state;
UT_hash_handle hh;
};
static struct MPIR_Comm_hint_fn_elt *MPID_hint_fns = NULL;
/* FIXME :
Reusing context ids can lead to a race condition if (as is desirable)
MPI_Comm_free does not include a barrier. Consider the following:
Process A frees the communicator.
Process A creates a new communicator, reusing the just released id
Process B sends a message to A on the old communicator.
Process A receives the message, and believes that it belongs to the
new communicator.
Process B then cancels the message, and frees the communicator.
The likelihood of this happening can be reduced by introducing a gap
between when a context id is released and when it is reused. An alternative
is to use an explicit message (in the implementation of MPI_Comm_free)
to indicate that a communicator is being freed; this will often require
less communication than a barrier in MPI_Comm_free, and will ensure that
no messages are later sent to the same communicator (we may also want to
have a similar check when building fault-tolerant versions of MPI).
*/
/* Zeroes most non-handle fields in a communicator, as well as initializing any
* other special fields, such as a per-object mutex. Also defaults the
* reference count to 1, under the assumption that the caller holds a reference
* to it.
*
* !!! The resulting struct is _not_ ready for communication !!! */
int MPII_Comm_init(MPIR_Comm * comm_p)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Object_set_ref(comm_p, 1);
/* initialize local and remote sizes to -1 to allow other parts of
* the stack to detect errors more easily */
comm_p->local_size = -1;
comm_p->remote_size = -1;
/* Clear many items (empty means to use the default; some of these
* may be overridden within the upper-level communicator initialization) */
comm_p->errhandler = NULL;
comm_p->attributes = NULL;
comm_p->remote_group = NULL;
comm_p->local_group = NULL;
comm_p->topo_fns = NULL;
comm_p->name[0] = '\0';
comm_p->info = NULL;
comm_p->hierarchy_kind = MPIR_COMM_HIERARCHY_KIND__FLAT;
comm_p->node_comm = NULL;
comm_p->node_roots_comm = NULL;
comm_p->intranode_table = NULL;
comm_p->internode_table = NULL;
/* abstractions bleed a bit here... :(*/
comm_p->next_sched_tag = MPIR_FIRST_NBC_TAG;
/* Initialize the revoked flag as false */
comm_p->revoked = 0;
comm_p->mapper_head = NULL;
comm_p->mapper_tail = NULL;
#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
{
int thr_err;
MPID_Thread_mutex_create(&MPIR_THREAD_POBJ_COMM_MUTEX(comm_p), &thr_err);
MPIR_Assert(thr_err == 0);
}
#endif
/* Fields not set include context_id, remote and local size, and
* kind, since different communicator construction routines need
* different values */
return mpi_errno;
}
/*
Create a communicator structure and perform basic initialization
(mostly clearing fields and updating the reference count).
*/
#undef FUNCNAME
#define FUNCNAME MPIR_Comm_create
#undef FCNAME
#define FCNAME "MPIR_Comm_create"
int MPIR_Comm_create(MPIR_Comm ** newcomm_ptr)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Comm *newptr;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE);
newptr = (MPIR_Comm *) MPIR_Handle_obj_alloc(&MPIR_Comm_mem);
MPIR_ERR_CHKANDJUMP(!newptr, mpi_errno, MPI_ERR_OTHER, "**nomem");
*newcomm_ptr = newptr;
mpi_errno = MPII_Comm_init(newptr);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
/* Insert this new communicator into the list of known communicators.
* Make this conditional on debugger support to match the test in
* MPIR_Comm_release . */
MPII_COMML_REMEMBER(newptr);
fn_fail:
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE);
return mpi_errno;
}
/* Create a local intra communicator from the local group of the
specified intercomm. */
/* FIXME this is an alternative constructor that doesn't use MPIR_Comm_create! */
#undef FUNCNAME
#define FUNCNAME MPII_Setup_intercomm_localcomm
#undef FCNAME
#define FCNAME "MPII_Setup_intercomm_localcomm"
int MPII_Setup_intercomm_localcomm(MPIR_Comm * intercomm_ptr)
{
MPIR_Comm *localcomm_ptr;
int mpi_errno = MPI_SUCCESS;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
localcomm_ptr = (MPIR_Comm *) MPIR_Handle_obj_alloc(&MPIR_Comm_mem);
MPIR_ERR_CHKANDJUMP(!localcomm_ptr, mpi_errno, MPI_ERR_OTHER, "**nomem");
/* get sensible default values for most fields (usually zeros) */
mpi_errno = MPII_Comm_init(localcomm_ptr);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
/* use the parent intercomm's recv ctx as the basis for our ctx */
localcomm_ptr->recvcontext_id =
MPIR_CONTEXT_SET_FIELD(IS_LOCALCOMM, intercomm_ptr->recvcontext_id, 1);
localcomm_ptr->context_id = localcomm_ptr->recvcontext_id;
MPL_DBG_MSG_FMT(MPIR_DBG_COMM, TYPICAL,
(MPL_DBG_FDEST,
"setup_intercomm_localcomm ic=%p ic->context_id=%d ic->recvcontext_id=%d lc->recvcontext_id=%d",
intercomm_ptr, intercomm_ptr->context_id, intercomm_ptr->recvcontext_id,
localcomm_ptr->recvcontext_id));
/* Save the kind of the communicator */
localcomm_ptr->comm_kind = MPIR_COMM_KIND__INTRACOMM;
/* Set the sizes and ranks */
localcomm_ptr->remote_size = intercomm_ptr->local_size;
localcomm_ptr->local_size = intercomm_ptr->local_size;
localcomm_ptr->pof2 = intercomm_ptr->pof2;
localcomm_ptr->rank = intercomm_ptr->rank;
MPIR_Comm_map_dup(localcomm_ptr, intercomm_ptr, MPIR_COMM_MAP_DIR__L2L);
/* TODO More advanced version: if the group is available, dup it by
* increasing the reference count instead of recreating it later */
/* FIXME : No local functions for the topology routines */
intercomm_ptr->local_comm = localcomm_ptr;
/* sets up the SMP-aware sub-communicators and tables */
mpi_errno = MPIR_Comm_commit(localcomm_ptr);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_fail:
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
return mpi_errno;
}
#undef FUNCNAME
#define FUNCNAME MPIR_Comm_map_irregular
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Comm_map_irregular(MPIR_Comm * newcomm, MPIR_Comm * src_comm,
int *src_mapping, int src_mapping_size,
MPIR_Comm_map_dir_t dir, MPIR_Comm_map_t ** map)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Comm_map_t *mapper;
MPIR_CHKPMEM_DECL(3);
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
MPIR_CHKPMEM_MALLOC(mapper, MPIR_Comm_map_t *, sizeof(MPIR_Comm_map_t), mpi_errno, "mapper",
MPL_MEM_COMM);
mapper->type = MPIR_COMM_MAP_TYPE__IRREGULAR;
mapper->src_comm = src_comm;
mapper->dir = dir;
mapper->src_mapping_size = src_mapping_size;
if (src_mapping) {
mapper->src_mapping = src_mapping;
mapper->free_mapping = 0;
} else {
MPIR_CHKPMEM_MALLOC(mapper->src_mapping, int *,
src_mapping_size * sizeof(int), mpi_errno, "mapper mapping",
MPL_MEM_COMM);
mapper->free_mapping = 1;
}
mapper->next = NULL;
LL_APPEND(newcomm->mapper_head, newcomm->mapper_tail, mapper);
if (map)
*map = mapper;
fn_exit:
MPIR_CHKPMEM_COMMIT();
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
return mpi_errno;
fn_fail:
MPIR_CHKPMEM_REAP();
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIR_Comm_map_dup
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Comm_map_dup(MPIR_Comm * newcomm, MPIR_Comm * src_comm, MPIR_Comm_map_dir_t dir)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Comm_map_t *mapper;
MPIR_CHKPMEM_DECL(1);
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
MPIR_CHKPMEM_MALLOC(mapper, MPIR_Comm_map_t *, sizeof(MPIR_Comm_map_t), mpi_errno, "mapper",
MPL_MEM_COMM);
mapper->type = MPIR_COMM_MAP_TYPE__DUP;
mapper->src_comm = src_comm;
mapper->dir = dir;
mapper->next = NULL;
LL_APPEND(newcomm->mapper_head, newcomm->mapper_tail, mapper);
fn_exit:
MPIR_CHKPMEM_COMMIT();
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
return mpi_errno;
fn_fail:
MPIR_CHKPMEM_REAP();
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIR_Comm_map_free
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Comm_map_free(MPIR_Comm * comm)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Comm_map_t *mapper, *tmp;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_FREE);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_MAP_FREE);
for (mapper = comm->mapper_head; mapper;) {
tmp = mapper->next;
if (mapper->type == MPIR_COMM_MAP_TYPE__IRREGULAR && mapper->free_mapping)
MPL_free(mapper->src_mapping);
MPL_free(mapper);
mapper = tmp;
}
comm->mapper_head = NULL;
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_MAP_FREE);
return mpi_errno;
}
/* Provides a hook for the top level functions to perform some manipulation on a
communicator just before it is given to the application level.
For example, we create sub-communicators for SMP-aware collectives at this
step. */
#undef FUNCNAME
#define FUNCNAME MPIR_Comm_commit
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Comm_commit(MPIR_Comm * comm)
{
int mpi_errno = MPI_SUCCESS;
int num_local = -1, num_external = -1;
int local_rank = -1, external_rank = -1;
int *local_procs = NULL, *external_procs = NULL;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COMMIT);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COMMIT);
/* It's OK to relax these assertions, but we should do so very
* intentionally. For now this function is the only place that we create
* our hierarchy of communicators */
MPIR_Assert(comm->node_comm == NULL);
MPIR_Assert(comm->node_roots_comm == NULL);
/* Notify device of communicator creation */
if (comm != MPIR_Process.comm_world) {
mpi_errno = MPID_Comm_create_hook(comm);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
MPIR_Comm_map_free(comm);
}
/* Create collectives-specific infrastructure */
mpi_errno = MPIR_Coll_comm_init(comm);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
MPIR_Comm_map_free(comm);
comm->pof2 = MPL_pof2(comm->local_size);
if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM && !MPIR_CONTEXT_READ_FIELD(SUBCOMM, comm->context_id)) { /*make sure this is not a subcomm */
mpi_errno = MPIR_Find_local_and_external(comm,
&num_local, &local_rank, &local_procs,
&num_external, &external_rank, &external_procs,
&comm->intranode_table, &comm->internode_table);
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno) {
if (MPIR_Err_is_fatal(mpi_errno))
MPIR_ERR_POP(mpi_errno);
/* Non-fatal errors simply mean that this communicator will not have
* any node awareness. Node-aware collectives are an optimization. */
MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE,
"MPIR_Find_local_and_external failed for comm_ptr=%p", comm);
if (comm->intranode_table)
MPL_free(comm->intranode_table);
if (comm->internode_table)
MPL_free(comm->internode_table);
mpi_errno = MPI_SUCCESS;
goto fn_exit;
}
/* --END ERROR HANDLING-- */
/* defensive checks */
MPIR_Assert(num_local > 0);
MPIR_Assert(num_local > 1 || external_rank >= 0);
MPIR_Assert(external_rank < 0 || external_procs != NULL);
/* if the node_roots_comm and comm would be the same size, then creating
* the second communicator is useless and wasteful. */
if (num_external == comm->remote_size) {
MPIR_Assert(num_local == 1);
goto fn_exit;
}
/* we don't need a local comm if this process is the only one on this node */
if (num_local > 1) {
mpi_errno = MPIR_Comm_create(&comm->node_comm);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
comm->node_comm->context_id = comm->context_id + MPIR_CONTEXT_INTRANODE_OFFSET;
comm->node_comm->recvcontext_id = comm->node_comm->context_id;
comm->node_comm->rank = local_rank;
comm->node_comm->comm_kind = MPIR_COMM_KIND__INTRACOMM;
comm->node_comm->hierarchy_kind = MPIR_COMM_HIERARCHY_KIND__NODE;
comm->node_comm->local_comm = NULL;
MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Create node_comm=%p\n", comm->node_comm);
comm->node_comm->local_size = num_local;
comm->node_comm->pof2 = MPL_pof2(comm->node_comm->local_size);
comm->node_comm->remote_size = num_local;
MPIR_Comm_map_irregular(comm->node_comm, comm, local_procs,
num_local, MPIR_COMM_MAP_DIR__L2L, NULL);
/* Notify device of communicator creation */
mpi_errno = MPID_Comm_create_hook(comm->node_comm);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
/* don't call MPIR_Comm_commit here */
/* Create collectives-specific infrastructure */
mpi_errno = MPIR_Coll_comm_init(comm->node_comm);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
MPIR_Comm_map_free(comm->node_comm);
}
/* this process may not be a member of the node_roots_comm */
if (local_rank == 0) {
mpi_errno = MPIR_Comm_create(&comm->node_roots_comm);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
comm->node_roots_comm->context_id = comm->context_id + MPIR_CONTEXT_INTERNODE_OFFSET;
comm->node_roots_comm->recvcontext_id = comm->node_roots_comm->context_id;
comm->node_roots_comm->rank = external_rank;
comm->node_roots_comm->comm_kind = MPIR_COMM_KIND__INTRACOMM;
comm->node_roots_comm->hierarchy_kind = MPIR_COMM_HIERARCHY_KIND__NODE_ROOTS;
comm->node_roots_comm->local_comm = NULL;
MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Create node_roots_comm=%p\n",
comm->node_roots_comm);
comm->node_roots_comm->local_size = num_external;
comm->node_roots_comm->pof2 = MPL_pof2(comm->node_roots_comm->local_size);
comm->node_roots_comm->remote_size = num_external;
MPIR_Comm_map_irregular(comm->node_roots_comm, comm,
external_procs, num_external, MPIR_COMM_MAP_DIR__L2L, NULL);
/* Notify device of communicator creation */
mpi_errno = MPID_Comm_create_hook(comm->node_roots_comm);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
/* don't call MPIR_Comm_commit here */
/* Create collectives-specific infrastructure */
mpi_errno = MPIR_Coll_comm_init(comm->node_roots_comm);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
MPIR_Comm_map_free(comm->node_roots_comm);
}
comm->hierarchy_kind = MPIR_COMM_HIERARCHY_KIND__PARENT;
}
fn_exit:
if (comm == MPIR_Process.comm_world) {
mpi_errno = MPID_Comm_create_hook(comm);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
MPIR_Comm_map_free(comm);
}
if (external_procs != NULL)
MPL_free(external_procs);
if (local_procs != NULL)
MPL_free(local_procs);
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COMMIT);
return mpi_errno;
fn_fail:
goto fn_exit;
}
/* Returns true if the given communicator is aware of node topology information,
false otherwise. Such information could be used to implement more efficient
collective communication, for example. */
int MPIR_Comm_is_node_aware(MPIR_Comm * comm)
{
return (comm->hierarchy_kind == MPIR_COMM_HIERARCHY_KIND__PARENT);
}
/* Returns true if the communicator is node-aware and processes in all the nodes
are consecutive. For example, if node 0 contains "0, 1, 2, 3", node 1
contains "4, 5, 6", and node 2 contains "7", we shall return true. */
int MPII_Comm_is_node_consecutive(MPIR_Comm * comm)
{
int i = 0, curr_nodeidx = 0;
int *internode_table = comm->internode_table;
if (!MPIR_Comm_is_node_aware(comm))
return 0;
for (; i < comm->local_size; i++) {
if (internode_table[i] == curr_nodeidx + 1)
curr_nodeidx++;
else if (internode_table[i] != curr_nodeidx)
return 0;
}
return 1;
}
/*
* Copy a communicator, including creating a new context and copying the
* virtual connection tables and clearing the various fields.
* Does *not* copy attributes. If size is < the size of the local group
* in the input communicator, copy only the first size elements.
* If this process is not a member, return a null pointer in outcomm_ptr.
* This is only supported in the case where the communicator is in
* Intracomm (not an Intercomm). Note that this is all that is required
* for cart_create and graph_create.
*
* Used by cart_create, graph_create, and dup_create
*/
#undef FUNCNAME
#define FUNCNAME MPII_Comm_copy
#undef FCNAME
#define FCNAME "MPII_Comm_copy"
int MPII_Comm_copy(MPIR_Comm * comm_ptr, int size, MPIR_Comm ** outcomm_ptr)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Context_id_t new_context_id, new_recvcontext_id;
MPIR_Comm *newcomm_ptr = NULL;
MPIR_Comm_map_t *map = NULL;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COPY);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COPY);
/* Get a new context first. We need this to be collective over the
* input communicator */
/* If there is a context id cache in oldcomm, use it here. Otherwise,
* use the appropriate algorithm to get a new context id. Be careful
* of intercomms here */
if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
mpi_errno = MPIR_Get_intercomm_contextid(comm_ptr, &new_context_id, &new_recvcontext_id);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
} else {
mpi_errno = MPIR_Get_contextid_sparse(comm_ptr, &new_context_id, FALSE);
new_recvcontext_id = new_context_id;
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
MPIR_Assert(new_context_id != 0);
}
/* This is the local size, not the remote size, in the case of
* an intercomm */
if (comm_ptr->rank >= size) {
*outcomm_ptr = 0;
/* always free the recvcontext ID, never the "send" ID */
MPIR_Free_contextid(new_recvcontext_id);
goto fn_exit;
}
/* We're left with the processes that will have a non-null communicator.
* Create the object, initialize the data, and return the result */
mpi_errno = MPIR_Comm_create(&newcomm_ptr);
if (mpi_errno)
goto fn_fail;
newcomm_ptr->context_id = new_context_id;
newcomm_ptr->recvcontext_id = new_recvcontext_id;
/* Save the kind of the communicator */
newcomm_ptr->comm_kind = comm_ptr->comm_kind;
newcomm_ptr->local_comm = 0;
/* There are two cases here - size is the same as the old communicator,
* or it is smaller. If the size is the same, we can just add a reference.
* Otherwise, we need to create a new network address mapping. Note that this is the
* test that matches the test on rank above. */
if (size == comm_ptr->local_size) {
/* Duplicate the network address mapping */
if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM)
MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__L2L);
else
MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__R2R);
} else {
int i;
if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM)
MPIR_Comm_map_irregular(newcomm_ptr, comm_ptr, NULL, size, MPIR_COMM_MAP_DIR__L2L,
&map);
else
MPIR_Comm_map_irregular(newcomm_ptr, comm_ptr, NULL, size, MPIR_COMM_MAP_DIR__R2R,
&map);
for (i = 0; i < size; i++) {
/* For rank i in the new communicator, find the corresponding
* rank in the input communicator */
map->src_mapping[i] = i;
}
}
/* If it is an intercomm, duplicate the local network address references */
if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__L2L);
}
/* Set the sizes and ranks */
newcomm_ptr->rank = comm_ptr->rank;
if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
newcomm_ptr->local_size = comm_ptr->local_size;
newcomm_ptr->remote_size = comm_ptr->remote_size;
newcomm_ptr->is_low_group = comm_ptr->is_low_group;
} else {
newcomm_ptr->local_size = size;
newcomm_ptr->remote_size = size;
}
newcomm_ptr->pof2 = MPL_pof2(newcomm_ptr->local_size);
/* Inherit the error handler (if any) */
MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
newcomm_ptr->errhandler = comm_ptr->errhandler;
if (comm_ptr->errhandler) {
MPIR_Errhandler_add_ref(comm_ptr->errhandler);
}
MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
mpi_errno = MPIR_Comm_commit(newcomm_ptr);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
/* Start with no attributes on this communicator */
newcomm_ptr->attributes = 0;
/* Copy over the info hints from the original communicator. */
mpi_errno = MPIR_Info_dup_impl(comm_ptr->info, &(newcomm_ptr->info));
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
mpi_errno = MPII_Comm_apply_hints(newcomm_ptr, newcomm_ptr->info);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
*outcomm_ptr = newcomm_ptr;
fn_fail:
fn_exit:
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COPY);
return mpi_errno;
}
/* Copy a communicator, including copying the virtual connection tables and
* clearing the various fields. Does *not* allocate a context ID or commit the
* communicator. Does *not* copy attributes.
*
* Used by comm_idup.
*/
#undef FUNCNAME
#define FUNCNAME MPII_Comm_copy_data
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPII_Comm_copy_data(MPIR_Comm * comm_ptr, MPIR_Comm ** outcomm_ptr)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Comm *newcomm_ptr = NULL;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COPY_DATA);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COPY_DATA);
mpi_errno = MPIR_Comm_create(&newcomm_ptr);
if (mpi_errno)
goto fn_fail;
/* use a large garbage value to ensure errors are caught more easily */
newcomm_ptr->context_id = 32767;
newcomm_ptr->recvcontext_id = 32767;
/* Save the kind of the communicator */
newcomm_ptr->comm_kind = comm_ptr->comm_kind;
newcomm_ptr->local_comm = 0;
if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM)
MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__L2L);
else
MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__R2R);
/* If it is an intercomm, duplicate the network address mapping */
if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
MPIR_Comm_map_dup(newcomm_ptr, comm_ptr, MPIR_COMM_MAP_DIR__L2L);
}
/* Set the sizes and ranks */
newcomm_ptr->rank = comm_ptr->rank;
newcomm_ptr->local_size = comm_ptr->local_size;
newcomm_ptr->pof2 = comm_ptr->pof2;
newcomm_ptr->remote_size = comm_ptr->remote_size;
newcomm_ptr->is_low_group = comm_ptr->is_low_group; /* only relevant for intercomms */
/* Inherit the error handler (if any) */
MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
newcomm_ptr->errhandler = comm_ptr->errhandler;
if (comm_ptr->errhandler) {
MPIR_Errhandler_add_ref(comm_ptr->errhandler);
}
MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
/* Start with no attributes on this communicator */
newcomm_ptr->attributes = 0;
*outcomm_ptr = newcomm_ptr;
fn_fail:
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COPY_DATA);
return mpi_errno;
}
/* Common body between MPIR_Comm_release and MPIR_comm_release_always. This
* helper function frees the actual MPIR_Comm structure and any associated
* storage. It also releases any references to other objects.
* This function should only be called when the communicator's reference count
* has dropped to 0.
*
* !!! This routine should *never* be called outside of MPIR_Comm_release{,_always} !!!
*/
#undef FUNCNAME
#define FUNCNAME MPIR_Comm_delete_internal
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr)
{
int in_use;
int mpi_errno = MPI_SUCCESS;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_COMM_DELETE_INTERNAL);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_COMM_DELETE_INTERNAL);
MPIR_Assert(MPIR_Object_get_ref(comm_ptr) == 0); /* sanity check */
/* Remove the attributes, executing the attribute delete routine.
* Do this only if the attribute functions are defined.
* This must be done first, because if freeing the attributes
* returns an error, the communicator is not freed */
if (MPIR_Process.attr_free && comm_ptr->attributes) {
/* Temporarily add a reference to this communicator because
* the attr_free code requires a valid communicator */
MPIR_Object_add_ref(comm_ptr);
mpi_errno = MPIR_Process.attr_free(comm_ptr->handle, &comm_ptr->attributes);
/* Release the temporary reference added before the call to
* attr_free */
MPIR_Object_release_ref(comm_ptr, &in_use);
}
/* If the attribute delete functions return failure, the
* communicator must not be freed. That is the reason for the
* test on mpi_errno here. */
if (mpi_errno == MPI_SUCCESS) {
/* If this communicator is our parent, and we're disconnecting
* from the parent, mark that fact */
if (MPIR_Process.comm_parent == comm_ptr)
MPIR_Process.comm_parent = NULL;
/* Cleanup collectives-specific infrastructure */
mpi_errno = MPII_Coll_comm_cleanup(comm_ptr);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
/* Notify the device that the communicator is about to be
* destroyed */
mpi_errno = MPID_Comm_free_hook(comm_ptr);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
/* Free info hints */
if (comm_ptr->info != NULL) {
MPIR_Info_free(comm_ptr->info);
}
if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM && comm_ptr->local_comm)
MPIR_Comm_release(comm_ptr->local_comm);
/* Free the local and remote groups, if they exist */
if (comm_ptr->local_group)
MPIR_Group_release(comm_ptr->local_group);
if (comm_ptr->remote_group)
MPIR_Group_release(comm_ptr->remote_group);
/* free the intra/inter-node communicators, if they exist */
if (comm_ptr->node_comm)
MPIR_Comm_release(comm_ptr->node_comm);
if (comm_ptr->node_roots_comm)
MPIR_Comm_release(comm_ptr->node_roots_comm);
if (comm_ptr->intranode_table != NULL)
MPL_free(comm_ptr->intranode_table);
if (comm_ptr->internode_table != NULL)
MPL_free(comm_ptr->internode_table);
/* Free the context value. This should come after freeing the
* intra/inter-node communicators since those free calls won't
* release this context ID and releasing this before then could lead
* to races once we make threading finer grained. */
/* This must be the recvcontext_id (i.e. not the (send)context_id)
* because in the case of intercommunicators the send context ID is
* allocated out of the remote group's bit vector, not ours. */
MPIR_Free_contextid(comm_ptr->recvcontext_id);
#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
{
int thr_err;
MPID_Thread_mutex_destroy(&MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr), &thr_err);
MPIR_Assert(thr_err == 0);
}
#endif
/* We need to release the error handler */
if (comm_ptr->errhandler &&
!(HANDLE_GET_KIND(comm_ptr->errhandler->handle) == HANDLE_KIND_BUILTIN)) {
int errhInuse;
MPIR_Errhandler_release_ref(comm_ptr->errhandler, &errhInuse);
if (!errhInuse) {
MPIR_Handle_obj_free(&MPIR_Errhandler_mem, comm_ptr->errhandler);
}
}
/* Remove from the list of active communicators if
* we are supporting message-queue debugging. We make this
* conditional on having debugger support since the
* operation is not constant-time */
MPII_COMML_FORGET(comm_ptr);
/* Check for predefined communicators - these should not
* be freed */
if (!(HANDLE_GET_KIND(comm_ptr->handle) == HANDLE_KIND_BUILTIN))
MPIR_Handle_obj_free(&MPIR_Comm_mem, comm_ptr);
} else {
/* If the user attribute free function returns an error,
* then do not free the communicator */
MPIR_Comm_add_ref(comm_ptr);
}
fn_exit:
MPIR_FUNC_TERSE_EXIT(MPID_STATE_COMM_DELETE_INTERNAL);
return mpi_errno;
fn_fail:
goto fn_exit;
}
/* Release a reference to a communicator. If there are no pending
references, delete the communicator and recover all storage and
context ids. This version of the function always manipulates the reference
counts, even for predefined objects. */
#undef FUNCNAME
#define FUNCNAME MPIR_Comm_release_always
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Comm_release_always(MPIR_Comm * comm_ptr)
{
int mpi_errno = MPI_SUCCESS;
int in_use;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
/* we want to short-circuit any optimization that avoids reference counting
* predefined communicators, such as MPI_COMM_WORLD or MPI_COMM_SELF. */
MPIR_Object_release_ref_always(comm_ptr, &in_use);
if (!in_use) {
mpi_errno = MPIR_Comm_delete_internal(comm_ptr);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
}
fn_exit:
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
return mpi_errno;
fn_fail:
goto fn_exit;
}
/* Apply all known info hints in the specified info chain to the given
* communicator. */
#undef FUNCNAME
#define FUNCNAME MPII_Comm_apply_hints
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPII_Comm_apply_hints(MPIR_Comm * comm_ptr, MPIR_Info * info_ptr)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Info *hint = NULL;
char hint_name[MPI_MAX_INFO_KEY] = { 0 };
struct MPIR_Comm_hint_fn_elt *hint_fn = NULL;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_APPLY_HINTS);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_APPLY_HINTS);
LL_FOREACH(info_ptr, hint) {
/* Have we hit the default, empty info hint? */
if (hint->key == NULL)
continue;
MPL_strncpy(hint_name, hint->key, MPI_MAX_INFO_KEY);
HASH_FIND_STR(MPID_hint_fns, hint_name, hint_fn);
/* Skip hints that MPICH doesn't recognize. */
if (hint_fn) {
mpi_errno = hint_fn->fn(comm_ptr, hint, hint_fn->state);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
}
}
fn_exit:
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_APPLY_HINTS);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME free_hint_handles
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
static int free_hint_handles(void *ignore)
{
int mpi_errno = MPI_SUCCESS;
struct MPIR_Comm_hint_fn_elt *curr_hint = NULL, *tmp = NULL;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_FREE_HINT_HANDLES);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_FREE_HINT_HANDLES);
if (MPID_hint_fns) {
HASH_ITER(hh, MPID_hint_fns, curr_hint, tmp) {
HASH_DEL(MPID_hint_fns, curr_hint);
MPL_free(curr_hint);
}
}
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_FREE_HINT_HANDLES);
return mpi_errno;
}
/* The hint logic is stored in a uthash, with hint name as key and
* the function responsible for applying the hint as the value. */
#undef FUNCNAME
#define FUNCNAME MPIR_Comm_register_hint
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
int MPIR_Comm_register_hint(const char *hint_key, MPIR_Comm_hint_fn_t fn, void *state)
{
int mpi_errno = MPI_SUCCESS;
struct MPIR_Comm_hint_fn_elt *hint_elt = NULL;
MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_REGISTER_HINT);
MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_REGISTER_HINT);
if (MPID_hint_fns == NULL) {
MPIR_Add_finalize(free_hint_handles, NULL, MPIR_FINALIZE_CALLBACK_PRIO - 1);
}
hint_elt = MPL_malloc(sizeof(struct MPIR_Comm_hint_fn_elt), MPL_MEM_COMM);
MPL_strncpy(hint_elt->name, hint_key, MPI_MAX_INFO_KEY);
hint_elt->state = state;
hint_elt->fn = fn;
HASH_ADD_STR(MPID_hint_fns, name, hint_elt, MPL_MEM_COMM);
MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_REGISTER_HINT);
return mpi_errno;
}