|
Packit Service |
c5cf8c |
/* -*- Mode: c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
|
Packit Service |
c5cf8c |
/*
|
|
Packit Service |
c5cf8c |
* (C) 2011 by Argonne National Laboratory.
|
|
Packit Service |
c5cf8c |
* See COPYRIGHT in top-level directory.
|
|
Packit Service |
c5cf8c |
*/
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
#ifndef MPIR_NBC_H_INCLUDED
|
|
Packit Service |
c5cf8c |
#define MPIR_NBC_H_INCLUDED
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* This specifies the interface that must be exposed by the ADI in order to
|
|
Packit Service |
c5cf8c |
* support MPI-3 non-blocking collectives. MPIR_Sched_ routines are all
|
|
Packit Service |
c5cf8c |
* permitted to be inlines. They are not permitted to be macros.
|
|
Packit Service |
c5cf8c |
*
|
|
Packit Service |
c5cf8c |
* Most (currently all) devices will just use the default implementation that
|
|
Packit Service |
c5cf8c |
* lives in "src/mpid/common/sched" */
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* The device must supply a typedef for MPIR_Sched_t. MPIR_Sched_t is a handle
|
|
Packit Service |
c5cf8c |
* to the schedule (often a pointer under the hood), not the actual schedule.
|
|
Packit Service |
c5cf8c |
* This makes it easy to cheaply pass the schedule between functions. Many
|
|
Packit Service |
c5cf8c |
*
|
|
Packit Service |
c5cf8c |
* The device must also define a constant (possibly a macro) for an invalid
|
|
Packit Service |
c5cf8c |
* schedule: MPIR_SCHED_NULL */
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* Context/tag strategy for send/recv ops:
|
|
Packit Service |
c5cf8c |
* -------------------------------
|
|
Packit Service |
c5cf8c |
*
|
|
Packit Service |
c5cf8c |
* Blocking collectives were able to more or less safely separate all
|
|
Packit Service |
c5cf8c |
* communication between different collectives by using a fixed tag per
|
|
Packit Service |
c5cf8c |
* operation. This prevents some potentially very surprising message matching
|
|
Packit Service |
c5cf8c |
* patterns when two different collectives are posted on the same communicator
|
|
Packit Service |
c5cf8c |
* in rapid succession. But this strategy probably won't work for NBC because
|
|
Packit Service |
c5cf8c |
* multiple operations of any combination of types can be outstanding at the
|
|
Packit Service |
c5cf8c |
* same time.
|
|
Packit Service |
c5cf8c |
*
|
|
Packit Service |
c5cf8c |
* The MPI-3 draft standard says that all collective ops must be collectively
|
|
Packit Service |
c5cf8c |
* posted in a consistent order w.r.t. other collective operations, including
|
|
Packit Service |
c5cf8c |
* nonblocking collectives. This means that we can just use a counter to assign
|
|
Packit Service |
c5cf8c |
* tag values that is incremented at each collective start. We can jump through
|
|
Packit Service |
c5cf8c |
* some hoops to make sure that the blocking collective code is left
|
|
Packit Service |
c5cf8c |
* undisturbed, but it's cleaner to just update them to use the new counter
|
|
Packit Service |
c5cf8c |
* mechanism as well.
|
|
Packit Service |
c5cf8c |
*/
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
#define MPIR_SCHED_NULL (NULL)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* Open question: should tag allocation be rolled into Sched_start? Keeping it
|
|
Packit Service |
c5cf8c |
* separate potentially allows more parallelism in the future, but it also
|
|
Packit Service |
c5cf8c |
* pushes more work onto the clients of this interface. */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_next_tag(MPIR_Comm * comm_ptr, int *tag);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* the device must provide a typedef for MPIR_Sched_t in mpidpre.h */
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* creates a new opaque schedule object and returns a handle to it in (*sp) */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_create(MPIR_Sched_t * sp);
|
|
Packit Service |
c5cf8c |
/* clones orig and returns a handle to the new schedule in (*cloned) */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_clone(MPIR_Sched_t orig, MPIR_Sched_t * cloned);
|
|
Packit Service |
c5cf8c |
/* sets (*sp) to MPIR_SCHED_NULL and gives you back a request pointer in (*req).
|
|
Packit Service |
c5cf8c |
* The caller is giving up ownership of the opaque schedule object.
|
|
Packit Service |
c5cf8c |
*
|
|
Packit Service |
c5cf8c |
* comm should be the primary (user) communicator with which this collective is
|
|
Packit Service |
c5cf8c |
* associated, even if other hidden communicators are used for a subset of the
|
|
Packit Service |
c5cf8c |
* operations. It will be used for error handling and similar operations. */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_start(MPIR_Sched_t * sp, MPIR_Comm * comm, int tag, MPIR_Request ** req);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* send and recv take a comm ptr to enable hierarchical collectives */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
|
|
Packit Service |
c5cf8c |
MPIR_Comm * comm, MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int src, MPIR_Comm * comm,
|
|
Packit Service |
c5cf8c |
MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* just like MPI_Issend, can't complete until the matching recv is posted */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
|
|
Packit Service |
c5cf8c |
MPIR_Comm * comm, MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_reduce(const void *inbuf, void *inoutbuf, MPI_Aint count, MPI_Datatype datatype,
|
|
Packit Service |
c5cf8c |
MPI_Op op, MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
/* packing/unpacking can be accomplished by passing MPI_PACKED as either intype
|
|
Packit Service |
c5cf8c |
* or outtype */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_copy(const void *inbuf, MPI_Aint incount, MPI_Datatype intype,
|
|
Packit Service |
c5cf8c |
void *outbuf, MPI_Aint outcount, MPI_Datatype outtype, MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
/* require that all previously added ops are complete before subsequent ops
|
|
Packit Service |
c5cf8c |
* may begin to execute */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_barrier(MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* A convenience macro for the extremely common case that "mpi_errno" is the
|
|
Packit Service |
c5cf8c |
* variable used for tracking error state and MPIR_ERR_POP is needed. This
|
|
Packit Service |
c5cf8c |
* declutters the NBC code substantially. */
|
|
Packit Service |
c5cf8c |
#define MPIR_SCHED_BARRIER(sched_) \
|
|
Packit Service |
c5cf8c |
do { \
|
|
Packit Service |
c5cf8c |
mpi_errno = MPIR_Sched_barrier(sched_); \
|
|
Packit Service |
c5cf8c |
if (mpi_errno) MPIR_ERR_POP(mpi_errno); \
|
|
Packit Service |
c5cf8c |
} while (0)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* Defers evaluating (*count) until the entry actually begins to execute. This
|
|
Packit Service |
c5cf8c |
* permits algorithms that accumulate/dissipate bytes as rounds progress without
|
|
Packit Service |
c5cf8c |
* excessive (re)calculation of counts for/from other processes.
|
|
Packit Service |
c5cf8c |
*
|
|
Packit Service |
c5cf8c |
* A corresponding _recv_defer function is not currently provided because there
|
|
Packit Service |
c5cf8c |
* is no known use case. The recv count is just an upper bound, not an exact
|
|
Packit Service |
c5cf8c |
* amount to be received, so an oversized recv is used instead of deferral. */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_send_defer(const void *buf, const MPI_Aint * count, MPI_Datatype datatype, int dest,
|
|
Packit Service |
c5cf8c |
MPIR_Comm * comm, MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
/* Just like MPIR_Sched_recv except it populates the given status object with
|
|
Packit Service |
c5cf8c |
* the received count and error information, much like a normal recv. Often
|
|
Packit Service |
c5cf8c |
* useful in conjunction with MPIR_Sched_send_defer. */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_recv_status(void *buf, MPI_Aint count, MPI_Datatype datatype, int src,
|
|
Packit Service |
c5cf8c |
MPIR_Comm * comm, MPI_Status * status, MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* buffer management, fancy reductions, etc */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_cb(MPIR_Sched_cb_t * cb_p, void *cb_state, MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_cb2(MPIR_Sched_cb2_t * cb_p, void *cb_state, void *cb_state2, MPIR_Sched_t s);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* TODO: develop a caching infrastructure for use by the upper level as well,
|
|
Packit Service |
c5cf8c |
* hopefully s.t. uthash can be used somehow */
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* common callback utility functions */
|
|
Packit Service |
c5cf8c |
int MPIR_Sched_cb_free_buf(MPIR_Comm * comm, int tag, void *state);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* an upgraded version of MPIR_CHKPMEM_MALLOC/_DECL/_REAP/_COMMIT that adds
|
|
Packit Service |
c5cf8c |
* corresponding cleanup callbacks to the given schedule at _COMMIT time */
|
|
Packit Service |
c5cf8c |
#define MPIR_SCHED_CHKPMEM_DECL(n_) \
|
|
Packit Service |
c5cf8c |
void *(mpir_sched_chkpmem_stk_[n_]) = { NULL }; \
|
|
Packit Service |
c5cf8c |
int mpir_sched_chkpmem_stk_sp_=0; \
|
|
Packit Service |
c5cf8c |
MPIR_AssertDeclValue(const int mpir_sched_chkpmem_stk_sz_,n_)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
#define MPIR_SCHED_CHKPMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,class_,stmt_) \
|
|
Packit Service |
c5cf8c |
do { \
|
|
Packit Service |
c5cf8c |
(pointer_) = (type_)MPL_malloc(nbytes_,class_); \
|
|
Packit Service |
c5cf8c |
if (pointer_) { \
|
|
Packit Service |
c5cf8c |
MPIR_Assert(mpir_sched_chkpmem_stk_sp_ < mpir_sched_chkpmem_stk_sz_); \
|
|
Packit Service |
c5cf8c |
mpir_sched_chkpmem_stk_[mpir_sched_chkpmem_stk_sp_++] = (pointer_); \
|
|
Packit Service |
c5cf8c |
} else if ((nbytes_) > 0) { \
|
|
Packit Service |
c5cf8c |
MPIR_CHKMEM_SETERR((rc_),(nbytes_),(name_)); \
|
|
Packit Service |
c5cf8c |
stmt_; \
|
|
Packit Service |
c5cf8c |
} \
|
|
Packit Service |
c5cf8c |
} while (0)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
#define MPIR_SCHED_CHKPMEM_MALLOC(pointer_,type_,nbytes_,rc_,name_,class_) \
|
|
Packit Service |
c5cf8c |
MPIR_SCHED_CHKPMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,class_,goto fn_fail)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* just cleanup, don't add anything to the schedule */
|
|
Packit Service |
c5cf8c |
#define MPIR_SCHED_CHKPMEM_REAP(sched_) \
|
|
Packit Service |
c5cf8c |
do { \
|
|
Packit Service |
c5cf8c |
while (mpir_sched_chkpmem_stk_sp_ > 0) { \
|
|
Packit Service |
c5cf8c |
MPL_free(mpir_sched_chkpmem_stk_[--mpir_sched_chkpmem_stk_sp_]); \
|
|
Packit Service |
c5cf8c |
} \
|
|
Packit Service |
c5cf8c |
} while (0)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
#define MPIR_SCHED_CHKPMEM_COMMIT(sched_) \
|
|
Packit Service |
c5cf8c |
do { \
|
|
Packit Service |
c5cf8c |
MPIR_SCHED_BARRIER(s); \
|
|
Packit Service |
c5cf8c |
while (mpir_sched_chkpmem_stk_sp_ > 0) { \
|
|
Packit Service |
c5cf8c |
mpi_errno = MPIR_Sched_cb(&MPIR_Sched_cb_free_buf, \
|
|
Packit Service |
c5cf8c |
(mpir_sched_chkpmem_stk_[--mpir_sched_chkpmem_stk_sp_]), \
|
|
Packit Service |
c5cf8c |
(sched_)); \
|
|
Packit Service |
c5cf8c |
if (mpi_errno) MPIR_ERR_POP(mpi_errno); \
|
|
Packit Service |
c5cf8c |
} \
|
|
Packit Service |
c5cf8c |
} while (0)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
#endif /* MPIR_NBC_H_INCLUDED */
|