Blame src/util/thread/mpiu_thread_pobj.h

Packit 0848f5
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
Packit 0848f5
/*
Packit 0848f5
 *  (C) 2001 by Argonne National Laboratory.
Packit 0848f5
 *      See COPYRIGHT in top-level directory.
Packit 0848f5
 */
Packit 0848f5
Packit 0848f5
#if !defined(MPIU_THREAD_POBJ_H_INCLUDED)
Packit 0848f5
#define MPIU_THREAD_POBJ_H_INCLUDED
Packit 0848f5
Packit 0848f5
/* There are multiple locks, one for each (major) object */
Packit 0848f5
Packit 0848f5
/* MT FIXME the following description is almost right, but it needs minor
Packit 0848f5
 * updates and revision to account for the COMPLETION CS and other issues in the
Packit 0848f5
 * request */
Packit 0848f5
/* The fine-grained locking discipline for requests is unfortunately complicated:
Packit 0848f5
 *
Packit 0848f5
 * (1) Raw allocation and deallocation of requests is protected internally by
Packit 0848f5
 * the HANDLEALLOC critical section.  This is currently the same as the HANDLE
Packit 0848f5
 * CS, not sure why we have both...
Packit 0848f5
 *
Packit 0848f5
 * (2) Once allocated, a directly allocated request is intially held exclusively
Packit 0848f5
 * by a single thread.  Direct allocation is common for send requests, but recv
Packit 0848f5
 * requests are usually created differently.
Packit 0848f5
 *
Packit 0848f5
 * (3) Most receive requests are created as the result of a call to FDP_or_AEU
Packit 0848f5
 * or FDU_or_AEP.  Calls to these functions (along with the other receive queue
Packit 0848f5
 * functions) must be inside a MSGQUEUE CS.  This CS protects the queue data
Packit 0848f5
 * structures as well as any fields inside the requests while they are in the
Packit 0848f5
 * queue.  For example, assume a call to FDU_or_AEP, as in MPID_Recv.  If the
Packit 0848f5
 * FDU case hits, the MSGQUEUE CS may be released immediately after the call.
Packit 0848f5
 * If the AEP case hits, however, the MSGQUEUE CS must remain held until any
Packit 0848f5
 * request field manipulation (such as dev.recv_pending_count) is complete.
Packit 0848f5
 *
Packit 0848f5
 * (4) In both the send and receive request cases, there is usually a particular
Packit 0848f5
 * thread in some upper-level code (e.g. MPI_Send) with interest in the
Packit 0848f5
 * completion of the request.  This may or may not be a thread that is also
Packit 0848f5
 * making progress on this request (often not).  The upper level code must not
Packit 0848f5
 * attempt to access any request fields (such as the status) until completion is
Packit 0848f5
 * signalled by the lower layer.
Packit 0848f5
 *
Packit 0848f5
 * (5) Once removed from the receive queue, the request is once again
Packit 0848f5
 * exclusively owned by the dequeuing thread.  From here, the dequeuing thread
Packit 0848f5
 * may do whatever it wants with the request without holding any CS, until it
Packit 0848f5
 * signals the request's completion.  Signalling completion indicates that the
Packit 0848f5
 * thread in the upper layer polling on it may access the rest of the fields in
Packit 0848f5
 * the request.  This completion signalling is lock-free and must be implemented
Packit 0848f5
 * carefully to work correctly in the face of optimizing compilers and CPUs.
Packit 0848f5
 * The upper-level thread now wholly owns the request until it is deallocated.
Packit 0848f5
 *
Packit 0848f5
 * (6) In ch3:nemesis at least, multithreaded access to send requests is managed
Packit 0848f5
 * by the MPIDCOMM (progress engine) CS.  The completion signalling pattern
Packit 0848f5
 * applies here (think MPI_Isend/MPI_Wait).
Packit 0848f5
 *
Packit 0848f5
 * (7) Request cancellation is tricky-ish.  For send cancellation, it is
Packit 0848f5
 * possible that the completion counter is actually *incremented* because a
Packit 0848f5
 * pkt is sent to the recipient asking for remote cancellation.  By asking for
Packit 0848f5
 * cancellation (of any kind of req), the upper layer gives up its exclusive
Packit 0848f5
 * access to the request and must wait for the completion counter to drop to 0
Packit 0848f5
 * before exclusively accessing the request fields.
Packit 0848f5
 *
Packit 0848f5
 * The completion counter is a reference count, much like the object liveness
Packit 0848f5
 * reference count.  However it differs from a normal refcount because of
Packit 0848f5
 * guarantees in the MPI Standard.  Applications must not attempt to complete
Packit 0848f5
 * (wait/test/free) a given request concurrently in two separate threads.  So
Packit 0848f5
 * checking for cc==0 is safe because only one thread is ever allowed to make
Packit 0848f5
 * that check.
Packit 0848f5
 *
Packit 0848f5
 * A non-zero completion count must always be accompanied by a normal reference
Packit 0848f5
 * that is logically held by the progress engine.  Similarly, once the
Packit 0848f5
 * completion counter drops to zero, the progress engine is expected to release
Packit 0848f5
 * its reference.
Packit 0848f5
 */
Packit 0848f5
/* lock ordering: if MPIDCOMM+MSGQUEUE must be aquired at the same time, then
Packit 0848f5
 * the order should be to acquire MPIDCOMM first, then MSGQUEUE.  Release in
Packit 0848f5
 * reverse order. */
Packit 0848f5
Packit 0848f5
/* POBJ locks are all real recursive ops */
Packit 0848f5
#define MPIUI_THREAD_CS_ENTER_POBJ(mutex) MPIUI_THREAD_CS_ENTER_NONRECURSIVE("POBJ", mutex)
Packit 0848f5
#define MPIUI_THREAD_CS_EXIT_POBJ(mutex) MPIUI_THREAD_CS_EXIT_NONRECURSIVE("POBJ", mutex)
Packit 0848f5
#define MPIUI_THREAD_CS_YIELD_POBJ(mutex) MPIUI_THREAD_CS_YIELD_NONRECURSIVE("POBJ", mutex)
Packit 0848f5
Packit 0848f5
/* ALLGRAN locks are all real nonrecursive ops */
Packit 0848f5
#define MPIUI_THREAD_CS_ENTER_ALLGRAN(mutex) MPIUI_THREAD_CS_ENTER_NONRECURSIVE("ALLGRAN", mutex)
Packit 0848f5
#define MPIUI_THREAD_CS_EXIT_ALLGRAN(mutex) MPIUI_THREAD_CS_EXIT_NONRECURSIVE("ALLGRAN", mutex)
Packit 0848f5
#define MPIUI_THREAD_CS_YIELD_ALLGRAN(mutex) MPIUI_THREAD_CS_YIELD_NONRECURSIVE("ALLGRAN", mutex)
Packit 0848f5
Packit 0848f5
/* GLOBAL locks are all NO-OPs */
Packit 0848f5
#define MPIUI_THREAD_CS_ENTER_GLOBAL(mutex) do {} while (0)
Packit 0848f5
#define MPIUI_THREAD_CS_EXIT_GLOBAL(mutex) do {} while (0)
Packit 0848f5
#define MPIUI_THREAD_CS_YIELD_GLOBAL(mutex) do {} while (0)
Packit 0848f5
Packit 0848f5
/* define a type for the completion counter */
Packit 0848f5
#include "opa_primitives.h"
Packit 0848f5
Packit 0848f5
typedef OPA_int_t MPIU_cc_t;
Packit 0848f5
Packit 0848f5
/* implies no barrier, since this routine should only be used for request
Packit 0848f5
 * initialization */
Packit 0848f5
static inline void MPIU_cc_set(MPIU_cc_t * cc_ptr, int val)
Packit 0848f5
{
Packit 0848f5
    if (val == 0) {
Packit 0848f5
        /* values other than 0 do not enforce any ordering, and therefore do not
Packit 0848f5
         * start a HB arc */
Packit 0848f5
        /* MT FIXME using cc_set in this way is sloppy.  Sometimes the caller
Packit 0848f5
         * really does know that the cc value may cleared, but more likely this
Packit 0848f5
         * is just a hack to avoid the work of figuring out what the cc value
Packit 0848f5
         * currently is and decrementing it instead. */
Packit 0848f5
        /* barrier ensures that any state written before indicating completion is
Packit 0848f5
         * seen by the thread polling on the cc.  If OPA adds store-release
Packit 0848f5
         * semantics, we can convert to that instead. */
Packit 0848f5
        OPA_write_barrier();
Packit 0848f5
        MPL_VG_ANNOTATE_HAPPENS_BEFORE(cc_ptr);
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
#if defined(MPL_VG_AVAILABLE)
Packit 0848f5
    /* MT subtle: store_int is actually safe to use, but Helgrind/DRD/TSan all
Packit 0848f5
     * view the store/load pair as a race.  Using an atomic operation for the
Packit 0848f5
     * store side makes all three happy.  DRD & TSan also support
Packit 0848f5
     * ANNOTATE_BENIGN_RACE, but Helgrind does not. */
Packit 0848f5
    OPA_swap_int(cc_ptr, val);
Packit 0848f5
#else
Packit 0848f5
    OPA_store_int(cc_ptr, val);
Packit 0848f5
#endif
Packit 0848f5
}
Packit 0848f5
Packit 0848f5
ATTRIBUTE((unused))
Packit 0848f5
static MPL_DBG_INLINE_KEYWORD int MPIU_cc_is_complete(MPIU_cc_t * cc_ptr)
Packit 0848f5
{
Packit 0848f5
    int complete;
Packit 0848f5
Packit 0848f5
    complete = (0 == OPA_load_int(cc_ptr));
Packit 0848f5
    if (complete) {
Packit 0848f5
        MPL_VG_ANNOTATE_HAPPENS_AFTER(cc_ptr);
Packit 0848f5
        OPA_read_barrier();
Packit 0848f5
    }
Packit 0848f5
Packit 0848f5
    return complete;
Packit 0848f5
}
Packit 0848f5
Packit 0848f5
/* incomplete_==TRUE iff the cc > 0 after the decr */
Packit 0848f5
#define MPIU_cc_decr(cc_ptr_, incomplete_)                      \
Packit 0848f5
    do {                                                        \
Packit 0848f5
        OPA_write_barrier();                                    \
Packit 0848f5
        MPL_VG_ANNOTATE_HAPPENS_BEFORE(cc_ptr_);                \
Packit 0848f5
        *(incomplete_) = !OPA_decr_and_test_int(cc_ptr_);       \
Packit 0848f5
        /* TODO check if this HA is actually necessary */       \
Packit 0848f5
        if (!*(incomplete_)) {                                  \
Packit 0848f5
            MPL_VG_ANNOTATE_HAPPENS_AFTER(cc_ptr_);             \
Packit 0848f5
        }                                                       \
Packit 0848f5
    } while (0)
Packit 0848f5
Packit 0848f5
/* MT FIXME does this need a HB/HA annotation?  This macro is only used for
Packit 0848f5
 * cancel_send right now. */
Packit 0848f5
/* was_incomplete_==TRUE iff the cc==0 before the decr */
Packit 0848f5
#define MPIU_cc_incr(cc_ptr_, was_incomplete_)                  \
Packit 0848f5
    do {                                                        \
Packit 0848f5
        *(was_incomplete_) = OPA_fetch_and_incr_int(cc_ptr_);   \
Packit 0848f5
    } while (0)
Packit 0848f5
Packit 0848f5
#define MPIU_cc_get(cc_) OPA_load_int(&(cc_))
Packit 0848f5
Packit 0848f5
/* "publishes" the obj with handle value (handle_) via the handle pointer
Packit 0848f5
 * (hnd_lval_).  That is, it is a version of the following statement that fixes
Packit 0848f5
 * memory consistency issues:
Packit 0848f5
 *     (hnd_lval_) = (handle_);
Packit 0848f5
 *
Packit 0848f5
 * assumes that the following is always true: typeof(*hnd_lval_ptr_)==int
Packit 0848f5
 */
Packit 0848f5
/* This could potentially be generalized beyond MPI-handle objects, but we
Packit 0848f5
 * should only take that step after seeing good evidence of its use.  A general
Packit 0848f5
 * macro (that is portable to non-gcc compilers) will need type information to
Packit 0848f5
 * make the appropriate volatile cast. */
Packit 0848f5
/* Ideally _GLOBAL would use this too, but we don't want to count on OPA
Packit 0848f5
 * availability in _GLOBAL mode.  Instead the GLOBAL critical section should be
Packit 0848f5
 * used. */
Packit 0848f5
#define MPIU_OBJ_PUBLISH_HANDLE(hnd_lval_, handle_)                     \
Packit 0848f5
    do {                                                                \
Packit 0848f5
        if (MPIR_ThreadInfo.isThreaded) {                               \
Packit 0848f5
            /* wmb ensures all read-only object field values are seen before the */ \
Packit 0848f5
            /* handle value is seen at the application level */         \
Packit 0848f5
            OPA_write_barrier();                                        \
Packit 0848f5
            /* volatile ensures lval is not speculatively read or written */ \
Packit 0848f5
            *(volatile int *)&(hnd_lval_) = (handle_);                  \
Packit 0848f5
        }                                                               \
Packit 0848f5
        else {                                                          \
Packit 0848f5
            (hnd_lval_) = (handle_);                                    \
Packit 0848f5
        }                                                               \
Packit 0848f5
    } while (0)
Packit 0848f5
Packit 0848f5
#endif /* !defined(MPIU_THREAD_POBJ_H_INCLUDED) */