/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*
*/
#ifndef MPIR_REQUEST_H_INCLUDED
#define MPIR_REQUEST_H_INCLUDED
#include "mpir_process.h"
/*
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
categories :
- name : REQUEST
description : A category for requests mangement variables
cvars:
- name : MPIR_CVAR_REQUEST_POLL_FREQ
category : REQUEST
type : int
default : 8
class : device
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_LOCAL
description : >-
How frequent to poll during completion calls (wait/test) in terms
of number of processed requests before polling.
- name : MPIR_CVAR_REQUEST_BATCH_SIZE
category : REQUEST
type : int
default : 64
class : device
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_LOCAL
description : >-
The number of requests to make completion as a batch
in MPI_Waitall and MPI_Testall implementation. A large number
is likely to cause more cache misses.
=== END_MPI_T_CVAR_INFO_BLOCK ===
*/
/* NOTE-R1: MPIR_REQUEST_KIND__MPROBE signifies that this is a request created by
* MPI_Mprobe or MPI_Improbe. Since we use MPI_Request objects as our
* MPI_Message objects, we use this separate kind in order to provide stronger
* error checking. Once a message (backed by a request) is promoted to a real
* request by calling MPI_Mrecv/MPI_Imrecv, we actually modify the kind to be
* MPIR_REQUEST_KIND__RECV in order to keep completion logic as simple as possible. */
/*E
MPIR_Request_kind - Kinds of MPI Requests
Module:
Request-DS
E*/
typedef enum MPIR_Request_kind_t {
MPIR_REQUEST_KIND__UNDEFINED,
MPIR_REQUEST_KIND__SEND,
MPIR_REQUEST_KIND__RECV,
MPIR_REQUEST_KIND__PREQUEST_SEND,
MPIR_REQUEST_KIND__PREQUEST_RECV,
MPIR_REQUEST_KIND__GREQUEST,
MPIR_REQUEST_KIND__COLL,
MPIR_REQUEST_KIND__MPROBE, /* see NOTE-R1 */
MPIR_REQUEST_KIND__RMA,
MPIR_REQUEST_KIND__LAST
#ifdef MPID_REQUEST_KIND_DECL
, MPID_REQUEST_KIND_DECL
#endif
} MPIR_Request_kind_t;
/* This currently defines a single structure type for all requests.
Eventually, we may want a union type, as used in MPICH-1 */
/* Typedefs for Fortran generalized requests */
typedef void (MPIR_Grequest_f77_cancel_function) (void *, MPI_Fint *, MPI_Fint *);
typedef void (MPIR_Grequest_f77_free_function) (void *, MPI_Fint *);
typedef void (MPIR_Grequest_f77_query_function) (void *, MPI_Fint *, MPI_Fint *);
/* vtable-ish structure holding generalized request function pointers and other
* state. Saves ~48 bytes in pt2pt requests on many platforms. */
struct MPIR_Grequest_fns {
MPI_Grequest_cancel_function *cancel_fn;
MPI_Grequest_free_function *free_fn;
MPI_Grequest_query_function *query_fn;
MPIX_Grequest_poll_function *poll_fn;
MPIX_Grequest_wait_function *wait_fn;
void *grequest_extra_state;
MPIX_Grequest_class greq_class;
MPIR_Lang_t greq_lang; /* language that defined
* the generalize req */
};
typedef struct MPIR_Grequest_class {
MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
MPI_Grequest_query_function *query_fn;
MPI_Grequest_free_function *free_fn;
MPI_Grequest_cancel_function *cancel_fn;
MPIX_Grequest_poll_function *poll_fn;
MPIX_Grequest_wait_function *wait_fn;
struct MPIR_Grequest_class *next;
} MPIR_Grequest_class;
#define MPIR_Request_extract_status(request_ptr_, status_) \
{ \
if ((status_) != MPI_STATUS_IGNORE) \
{ \
int error__; \
\
/* According to the MPI 1.1 standard page 22 lines 9-12, \
* the MPI_ERROR field may not be modified except by the \
* functions in section 3.7.5 which return \
* MPI_ERR_IN_STATUSES (MPI_Wait{all,some} and \
* MPI_Test{all,some}). */ \
error__ = (status_)->MPI_ERROR; \
*(status_) = (request_ptr_)->status; \
(status_)->MPI_ERROR = error__; \
} \
}
#define MPIR_Request_is_complete(req_) (MPIR_cc_is_complete((req_)->cc_ptr))
/*S
MPIR_Request - Description of the Request data structure
Module:
Request-DS
Notes:
If it is necessary to remember the MPI datatype, this information is
saved within the device-specific fields provided by 'MPID_DEV_REQUEST_DECL'.
Requests come in many flavors, as stored in the 'kind' field. It is
expected that each kind of request will have its own structure type
(e.g., 'MPIR_Request_send_t') that extends the 'MPIR_Request'.
S*/
struct MPIR_Request {
MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
MPIR_Request_kind_t kind;
/* pointer to the completion counter. This is necessary for the
* case when an operation is described by a list of requests */
MPIR_cc_t *cc_ptr;
/* the actual completion counter. Ensure cc and status are in the
* same cache line, assuming the cache line size is a multiple of
* 32 bytes and 32-bit integers */
MPIR_cc_t cc;
#ifdef MPICH_THREAD_USE_MDTA
/* Synchronization variable for wait/signal */
MPIR_Thread_sync_t *sync;
#endif
/* completion notification counter: this must be decremented by
* the request completion routine, when the completion count hits
* zero. this counter allows us to keep track of the completion
* of multiple requests in a single place. */
MPIR_cc_t *completion_notification;
/* A comm is needed to find the proper error handler */
MPIR_Comm *comm;
/* Status is needed for wait/test/recv */
MPI_Status status;
union {
struct {
struct MPIR_Grequest_fns *greq_fns;
} ureq; /* kind : MPIR_REQUEST_KIND__GREQUEST */
struct {
MPIR_Errflag_t errflag;
MPII_Coll_req_t coll;
} nbc; /* kind : MPIR_REQUEST_KIND__COLL */
#if defined HAVE_DEBUGGER_SUPPORT
struct {
struct MPIR_Sendq *dbg_next;
} send; /* kind : MPID_REQUEST_SEND */
#endif /* HAVE_DEBUGGER_SUPPORT */
struct {
#if defined HAVE_DEBUGGER_SUPPORT
struct MPIR_Sendq *dbg_next;
#endif /* HAVE_DEBUGGER_SUPPORT */
/* Persistent requests have their own "real" requests */
struct MPIR_Request *real_request;
} persist; /* kind : MPID_PREQUEST_SEND or MPID_PREQUEST_RECV */
} u;
/* Other, device-specific information */
#ifdef MPID_DEV_REQUEST_DECL
MPID_DEV_REQUEST_DECL
#endif
};
#define MPIR_REQUEST_PREALLOC 8
extern MPIR_Object_alloc_t MPIR_Request_mem;
/* Preallocated request objects */
extern MPIR_Request MPIR_Request_direct[];
static inline int MPIR_Request_is_persistent(MPIR_Request * req_ptr)
{
return (req_ptr->kind == MPIR_REQUEST_KIND__PREQUEST_SEND ||
req_ptr->kind == MPIR_REQUEST_KIND__PREQUEST_RECV);
}
/* Return whether a request is active.
* A persistent request and the handle to it are "inactive"
* if the request is not associated with any ongoing communication.
* A handle is "active" if it is neither null nor "inactive". */
static inline int MPIR_Request_is_active(MPIR_Request * req_ptr)
{
if (req_ptr == NULL)
return 0;
else
return (!MPIR_Request_is_persistent(req_ptr) || (req_ptr)->u.persist.real_request != NULL);
}
#define MPIR_REQUESTS_PROPERTY__NO_NULL (1 << 1)
#define MPIR_REQUESTS_PROPERTY__NO_GREQUESTS (1 << 2)
#define MPIR_REQUESTS_PROPERTY__SEND_RECV_ONLY (1 << 3)
#define MPIR_REQUESTS_PROPERTY__OPT_ALL (MPIR_REQUESTS_PROPERTY__NO_NULL \
| MPIR_REQUESTS_PROPERTY__NO_GREQUESTS \
| MPIR_REQUESTS_PROPERTY__SEND_RECV_ONLY)
static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind)
{
MPIR_Request *req;
req = MPIR_Handle_obj_alloc(&MPIR_Request_mem);
if (req != NULL) {
MPL_DBG_MSG_P(MPIR_DBG_REQUEST, VERBOSE, "allocated request, handle=0x%08x", req->handle);
#ifdef MPICH_DBG_OUTPUT
/*MPIR_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPIR_REQUEST); */
if (HANDLE_GET_MPI_KIND(req->handle) != MPIR_REQUEST) {
int mpi_errno;
mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
FCNAME, __LINE__, MPI_ERR_OTHER,
"**invalid_handle", "**invalid_handle %d",
req->handle);
MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
}
#endif
/* FIXME: This makes request creation expensive. We need to
* trim this to the basics, with additional setup for
* special-purpose requests (think base class and
* inheritance). For example, do we really* want to set the
* kind to UNDEFINED? And should the RMA values be set only
* for RMA requests? */
MPIR_Object_set_ref(req, 1);
req->kind = kind;
MPIR_cc_set(&req->cc, 1);
req->cc_ptr = &req->cc;
req->completion_notification = NULL;
req->status.MPI_ERROR = MPI_SUCCESS;
MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE);
req->comm = NULL;
#ifdef MPICH_THREAD_USE_MDTA
req->sync = NULL;
#endif
switch (kind) {
case MPIR_REQUEST_KIND__SEND:
MPII_REQUEST_CLEAR_DBG(req);
break;
case MPIR_REQUEST_KIND__COLL:
req->u.nbc.errflag = MPIR_ERR_NONE;
break;
default:
break;
}
MPID_Request_create_hook(req);
} else {
/* FIXME: This fails to fail if debugging is turned off */
MPL_DBG_MSG(MPIR_DBG_REQUEST, TYPICAL, "unable to allocate a request");
}
return req;
}
#define MPIR_Request_add_ref(req_p_) \
do { MPIR_Object_add_ref(req_p_); } while (0)
#define MPIR_Request_release_ref(req_p_, inuse_) \
do { MPIR_Object_release_ref(req_p_, inuse_); } while (0)
MPL_STATIC_INLINE_PREFIX MPIR_Request *MPIR_Request_create_complete(MPIR_Request_kind_t kind)
{
MPIR_Request *req;
#ifdef HAVE_DEBUGGER_SUPPORT
req = MPIR_Request_create(kind);
MPIR_cc_set(&req->cc, 0);
#else
req = MPIR_Process.lw_req;
MPIR_Request_add_ref(req);
#endif
return req;
}
static inline void MPIR_Request_free(MPIR_Request * req)
{
int inuse;
MPIR_Request_release_ref(req, &inuse);
/* inform the device that we are decrementing the ref-count on
* this request */
MPID_Request_free_hook(req);
#ifdef MPICH_THREAD_USE_MDTA
/* We signal the possible waiter to complete this request. */
if (req->sync) {
MPIR_Thread_sync_signal(req->sync, 0);
req->sync = NULL;
}
#endif
if (inuse == 0) {
MPL_DBG_MSG_P(MPIR_DBG_REQUEST, VERBOSE, "freeing request, handle=0x%08x", req->handle);
#ifdef MPICH_DBG_OUTPUT
if (HANDLE_GET_MPI_KIND(req->handle) != MPIR_REQUEST) {
int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
FCNAME, __LINE__, MPI_ERR_OTHER,
"**invalid_handle", "**invalid_handle %d",
req->handle);
MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
}
if (req->ref_count != 0) {
int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
FCNAME, __LINE__, MPI_ERR_OTHER,
"**invalid_refcount", "**invalid_refcount %d",
req->ref_count);
MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
}
#endif
/* FIXME: We need a better way to handle these so that we do
* not always need to initialize these fields and check them
* when we destroy a request */
/* FIXME: We need a way to call these routines ONLY when the
* related ref count has become zero. */
if (req->comm != NULL) {
MPIR_Comm_release(req->comm);
}
if (req->kind == MPIR_REQUEST_KIND__GREQUEST && req->u.ureq.greq_fns != NULL) {
MPL_free(req->u.ureq.greq_fns);
}
MPID_Request_destroy_hook(req);
MPIR_Handle_obj_free(&MPIR_Request_mem, req);
}
}
#ifdef MPICH_THREAD_USE_MDTA
MPL_STATIC_INLINE_PREFIX void MPIR_Request_attach_sync(MPIR_Request * req_ptr,
MPIR_Thread_sync_t * sync)
{
req_ptr->sync = sync;
if (MPIR_Request_is_persistent(req_ptr)) {
req_ptr->u.persist.real_request->sync = sync;
}
}
#endif
/* The "fastpath" version of MPIR_Request_completion_processing. It only handles
* MPIR_REQUEST_KIND__SEND and MPIR_REQUEST_KIND__RECV kinds, and it does not attempt to
* deal with status structures under the assumption that bleeding fast code will
* pass either MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE as appropriate. This
* routine (or some a variation of it) is an unfortunately necessary stunt to
* get high message rates on key benchmarks for high-end systems.
*/
#undef FUNCNAME
#define FUNCNAME MPIR_Request_completion_processing_fastpath
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIR_Request_completion_processing_fastpath(MPI_Request * request,
MPIR_Request * request_ptr)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Assert(request_ptr->kind == MPIR_REQUEST_KIND__SEND ||
request_ptr->kind == MPIR_REQUEST_KIND__RECV);
if (request_ptr->kind == MPIR_REQUEST_KIND__SEND) {
/* FIXME: are Ibsend requests added to the send queue? */
MPII_SENDQ_FORGET(request_ptr);
}
/* the completion path for SEND and RECV is the same at this time, modulo
* the SENDQ hook above */
mpi_errno = request_ptr->status.MPI_ERROR;
MPIR_Request_free(request_ptr);
*request = MPI_REQUEST_NULL;
return mpi_errno;
}
int MPIR_Request_completion_processing(MPIR_Request *, MPI_Status *, int *);
int MPIR_Request_get_error(MPIR_Request *);
MPL_STATIC_INLINE_PREFIX int MPID_Request_is_anysource(MPIR_Request *);
MPL_STATIC_INLINE_PREFIX int MPID_Comm_AS_enabled(MPIR_Comm *);
extern int MPIR_CVAR_ENABLE_FT;
/* The following routines are ULFM helpers. */
/* This routine check if the request is "anysource" but the communicator is not,
* which happens usually due to a failure of a process in the communicator. */
MPL_STATIC_INLINE_PREFIX int MPIR_Request_is_anysrc_mismatched(MPIR_Request * req_ptr)
{
return (MPIR_CVAR_ENABLE_FT &&
!MPIR_Request_is_complete(req_ptr) &&
MPID_Request_is_anysource(req_ptr) && !MPID_Comm_AS_enabled((req_ptr)->comm));
}
/* This routine handle the request when its associated process failed. */
int MPIR_Request_handle_proc_failed(MPIR_Request * request_ptr);
/* The following routines perform the callouts to the user routines registered
as part of a generalized request. They handle any language binding issues
that are necessary. They are used when completing, freeing, cancelling or
extracting the status from a generalized request. */
int MPIR_Grequest_cancel(MPIR_Request * request_ptr, int complete);
int MPIR_Grequest_query(MPIR_Request * request_ptr);
int MPIR_Grequest_free(MPIR_Request * request_ptr);
void MPIR_Grequest_complete(MPIR_Request * request_ptr);
int MPIR_Grequest_start(MPI_Grequest_query_function * query_fn,
MPI_Grequest_free_function * free_fn,
MPI_Grequest_cancel_function * cancel_fn,
void *extra_state, MPIR_Request ** request_ptr);
int MPIX_Grequest_start_impl(MPI_Grequest_query_function *,
MPI_Grequest_free_function *,
MPI_Grequest_cancel_function *,
MPIX_Grequest_poll_function *,
MPIX_Grequest_wait_function *, void *, MPIR_Request **);
/* These routines below are helpers for the Extended generalized requests. */
MPL_STATIC_INLINE_PREFIX int MPIR_Request_has_poll_fn(MPIR_Request * request_ptr)
{
return (request_ptr->kind == MPIR_REQUEST_KIND__GREQUEST &&
request_ptr->u.ureq.greq_fns != NULL && request_ptr->u.ureq.greq_fns->poll_fn != NULL);
}
MPL_STATIC_INLINE_PREFIX int MPIR_Request_has_wait_fn(MPIR_Request * request_ptr)
{
return (request_ptr->kind == MPIR_REQUEST_KIND__GREQUEST &&
request_ptr->u.ureq.greq_fns != NULL && request_ptr->u.ureq.greq_fns->wait_fn != NULL);
}
MPL_STATIC_INLINE_PREFIX int MPIR_Grequest_wait(MPIR_Request * request_ptr, MPI_Status * status)
{
return (request_ptr->u.ureq.greq_fns->wait_fn) (1,
&request_ptr->u.ureq.greq_fns->
grequest_extra_state, 0, status);
}
MPL_STATIC_INLINE_PREFIX int MPIR_Grequest_poll(MPIR_Request * request_ptr, MPI_Status * status)
{
return (request_ptr->u.ureq.greq_fns->poll_fn) (request_ptr->u.ureq.
greq_fns->grequest_extra_state, status);
}
int MPIR_Test_impl(MPIR_Request * request, int *flag, MPI_Status * status);
int MPIR_Testall_impl(int count, MPIR_Request * request_ptrs[], int *flag,
MPI_Status array_of_statuses[], int requests_property);
int MPIR_Testany_impl(int count, MPIR_Request * request_ptrs[],
int *indx, int *flag, MPI_Status * status);
int MPIR_Testsome_impl(int incount, MPIR_Request * request_ptrs[],
int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]);
int MPIR_Wait_impl(MPIR_Request * request_ptr, MPI_Status * status);
int MPIR_Waitall_impl(int count, MPIR_Request * request_ptrs[], MPI_Status array_of_statuses[],
int request_properties);
int MPIR_Waitany_impl(int count, MPIR_Request * request_ptrs[], int *indx, MPI_Status * status);
int MPIR_Waitsome_impl(int incount, MPIR_Request * request_ptrs[],
int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]);
int MPIR_Test(MPI_Request * request, int *flag, MPI_Status * status);
int MPIR_Testall(int count, MPI_Request array_of_requests[], int *flag,
MPI_Status array_of_statuses[]);
int MPIR_Wait(MPI_Request * request, MPI_Status * status);
int MPIR_Waitall(int count, MPI_Request array_of_requests[], MPI_Status array_of_statuses[]);
#endif /* MPIR_REQUEST_H_INCLUDED */