/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ /* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. * */ #ifndef MPIR_REQUEST_H_INCLUDED #define MPIR_REQUEST_H_INCLUDED #include "mpir_process.h" /* === BEGIN_MPI_T_CVAR_INFO_BLOCK === categories : - name : REQUEST description : A category for requests mangement variables cvars: - name : MPIR_CVAR_REQUEST_POLL_FREQ category : REQUEST type : int default : 8 class : device verbosity : MPI_T_VERBOSITY_USER_BASIC scope : MPI_T_SCOPE_LOCAL description : >- How frequent to poll during completion calls (wait/test) in terms of number of processed requests before polling. - name : MPIR_CVAR_REQUEST_BATCH_SIZE category : REQUEST type : int default : 64 class : device verbosity : MPI_T_VERBOSITY_USER_BASIC scope : MPI_T_SCOPE_LOCAL description : >- The number of requests to make completion as a batch in MPI_Waitall and MPI_Testall implementation. A large number is likely to cause more cache misses. === END_MPI_T_CVAR_INFO_BLOCK === */ /* NOTE-R1: MPIR_REQUEST_KIND__MPROBE signifies that this is a request created by * MPI_Mprobe or MPI_Improbe. Since we use MPI_Request objects as our * MPI_Message objects, we use this separate kind in order to provide stronger * error checking. Once a message (backed by a request) is promoted to a real * request by calling MPI_Mrecv/MPI_Imrecv, we actually modify the kind to be * MPIR_REQUEST_KIND__RECV in order to keep completion logic as simple as possible. */ /*E MPIR_Request_kind - Kinds of MPI Requests Module: Request-DS E*/ typedef enum MPIR_Request_kind_t { MPIR_REQUEST_KIND__UNDEFINED, MPIR_REQUEST_KIND__SEND, MPIR_REQUEST_KIND__RECV, MPIR_REQUEST_KIND__PREQUEST_SEND, MPIR_REQUEST_KIND__PREQUEST_RECV, MPIR_REQUEST_KIND__GREQUEST, MPIR_REQUEST_KIND__COLL, MPIR_REQUEST_KIND__MPROBE, /* see NOTE-R1 */ MPIR_REQUEST_KIND__RMA, MPIR_REQUEST_KIND__LAST #ifdef MPID_REQUEST_KIND_DECL , MPID_REQUEST_KIND_DECL #endif } MPIR_Request_kind_t; /* This currently defines a single structure type for all requests. Eventually, we may want a union type, as used in MPICH-1 */ /* Typedefs for Fortran generalized requests */ typedef void (MPIR_Grequest_f77_cancel_function) (void *, MPI_Fint *, MPI_Fint *); typedef void (MPIR_Grequest_f77_free_function) (void *, MPI_Fint *); typedef void (MPIR_Grequest_f77_query_function) (void *, MPI_Fint *, MPI_Fint *); /* vtable-ish structure holding generalized request function pointers and other * state. Saves ~48 bytes in pt2pt requests on many platforms. */ struct MPIR_Grequest_fns { MPI_Grequest_cancel_function *cancel_fn; MPI_Grequest_free_function *free_fn; MPI_Grequest_query_function *query_fn; MPIX_Grequest_poll_function *poll_fn; MPIX_Grequest_wait_function *wait_fn; void *grequest_extra_state; MPIX_Grequest_class greq_class; MPIR_Lang_t greq_lang; /* language that defined * the generalize req */ }; typedef struct MPIR_Grequest_class { MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */ MPI_Grequest_query_function *query_fn; MPI_Grequest_free_function *free_fn; MPI_Grequest_cancel_function *cancel_fn; MPIX_Grequest_poll_function *poll_fn; MPIX_Grequest_wait_function *wait_fn; struct MPIR_Grequest_class *next; } MPIR_Grequest_class; #define MPIR_Request_extract_status(request_ptr_, status_) \ { \ if ((status_) != MPI_STATUS_IGNORE) \ { \ int error__; \ \ /* According to the MPI 1.1 standard page 22 lines 9-12, \ * the MPI_ERROR field may not be modified except by the \ * functions in section 3.7.5 which return \ * MPI_ERR_IN_STATUSES (MPI_Wait{all,some} and \ * MPI_Test{all,some}). */ \ error__ = (status_)->MPI_ERROR; \ *(status_) = (request_ptr_)->status; \ (status_)->MPI_ERROR = error__; \ } \ } #define MPIR_Request_is_complete(req_) (MPIR_cc_is_complete((req_)->cc_ptr)) /*S MPIR_Request - Description of the Request data structure Module: Request-DS Notes: If it is necessary to remember the MPI datatype, this information is saved within the device-specific fields provided by 'MPID_DEV_REQUEST_DECL'. Requests come in many flavors, as stored in the 'kind' field. It is expected that each kind of request will have its own structure type (e.g., 'MPIR_Request_send_t') that extends the 'MPIR_Request'. S*/ struct MPIR_Request { MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */ MPIR_Request_kind_t kind; /* pointer to the completion counter. This is necessary for the * case when an operation is described by a list of requests */ MPIR_cc_t *cc_ptr; /* the actual completion counter. Ensure cc and status are in the * same cache line, assuming the cache line size is a multiple of * 32 bytes and 32-bit integers */ MPIR_cc_t cc; #ifdef MPICH_THREAD_USE_MDTA /* Synchronization variable for wait/signal */ MPIR_Thread_sync_t *sync; #endif /* completion notification counter: this must be decremented by * the request completion routine, when the completion count hits * zero. this counter allows us to keep track of the completion * of multiple requests in a single place. */ MPIR_cc_t *completion_notification; /* A comm is needed to find the proper error handler */ MPIR_Comm *comm; /* Status is needed for wait/test/recv */ MPI_Status status; union { struct { struct MPIR_Grequest_fns *greq_fns; } ureq; /* kind : MPIR_REQUEST_KIND__GREQUEST */ struct { MPIR_Errflag_t errflag; MPII_Coll_req_t coll; } nbc; /* kind : MPIR_REQUEST_KIND__COLL */ #if defined HAVE_DEBUGGER_SUPPORT struct { struct MPIR_Sendq *dbg_next; } send; /* kind : MPID_REQUEST_SEND */ #endif /* HAVE_DEBUGGER_SUPPORT */ struct { #if defined HAVE_DEBUGGER_SUPPORT struct MPIR_Sendq *dbg_next; #endif /* HAVE_DEBUGGER_SUPPORT */ /* Persistent requests have their own "real" requests */ struct MPIR_Request *real_request; } persist; /* kind : MPID_PREQUEST_SEND or MPID_PREQUEST_RECV */ } u; /* Other, device-specific information */ #ifdef MPID_DEV_REQUEST_DECL MPID_DEV_REQUEST_DECL #endif }; #define MPIR_REQUEST_PREALLOC 8 extern MPIR_Object_alloc_t MPIR_Request_mem; /* Preallocated request objects */ extern MPIR_Request MPIR_Request_direct[]; static inline int MPIR_Request_is_persistent(MPIR_Request * req_ptr) { return (req_ptr->kind == MPIR_REQUEST_KIND__PREQUEST_SEND || req_ptr->kind == MPIR_REQUEST_KIND__PREQUEST_RECV); } /* Return whether a request is active. * A persistent request and the handle to it are "inactive" * if the request is not associated with any ongoing communication. * A handle is "active" if it is neither null nor "inactive". */ static inline int MPIR_Request_is_active(MPIR_Request * req_ptr) { if (req_ptr == NULL) return 0; else return (!MPIR_Request_is_persistent(req_ptr) || (req_ptr)->u.persist.real_request != NULL); } #define MPIR_REQUESTS_PROPERTY__NO_NULL (1 << 1) #define MPIR_REQUESTS_PROPERTY__NO_GREQUESTS (1 << 2) #define MPIR_REQUESTS_PROPERTY__SEND_RECV_ONLY (1 << 3) #define MPIR_REQUESTS_PROPERTY__OPT_ALL (MPIR_REQUESTS_PROPERTY__NO_NULL \ | MPIR_REQUESTS_PROPERTY__NO_GREQUESTS \ | MPIR_REQUESTS_PROPERTY__SEND_RECV_ONLY) static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind) { MPIR_Request *req; req = MPIR_Handle_obj_alloc(&MPIR_Request_mem); if (req != NULL) { MPL_DBG_MSG_P(MPIR_DBG_REQUEST, VERBOSE, "allocated request, handle=0x%08x", req->handle); #ifdef MPICH_DBG_OUTPUT /*MPIR_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPIR_REQUEST); */ if (HANDLE_GET_MPI_KIND(req->handle) != MPIR_REQUEST) { int mpi_errno; mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**invalid_handle", "**invalid_handle %d", req->handle); MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL); } #endif /* FIXME: This makes request creation expensive. We need to * trim this to the basics, with additional setup for * special-purpose requests (think base class and * inheritance). For example, do we really* want to set the * kind to UNDEFINED? And should the RMA values be set only * for RMA requests? */ MPIR_Object_set_ref(req, 1); req->kind = kind; MPIR_cc_set(&req->cc, 1); req->cc_ptr = &req->cc; req->completion_notification = NULL; req->status.MPI_ERROR = MPI_SUCCESS; MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE); req->comm = NULL; #ifdef MPICH_THREAD_USE_MDTA req->sync = NULL; #endif switch (kind) { case MPIR_REQUEST_KIND__SEND: MPII_REQUEST_CLEAR_DBG(req); break; case MPIR_REQUEST_KIND__COLL: req->u.nbc.errflag = MPIR_ERR_NONE; break; default: break; } MPID_Request_create_hook(req); } else { /* FIXME: This fails to fail if debugging is turned off */ MPL_DBG_MSG(MPIR_DBG_REQUEST, TYPICAL, "unable to allocate a request"); } return req; } #define MPIR_Request_add_ref(req_p_) \ do { MPIR_Object_add_ref(req_p_); } while (0) #define MPIR_Request_release_ref(req_p_, inuse_) \ do { MPIR_Object_release_ref(req_p_, inuse_); } while (0) MPL_STATIC_INLINE_PREFIX MPIR_Request *MPIR_Request_create_complete(MPIR_Request_kind_t kind) { MPIR_Request *req; #ifdef HAVE_DEBUGGER_SUPPORT req = MPIR_Request_create(kind); MPIR_cc_set(&req->cc, 0); #else req = MPIR_Process.lw_req; MPIR_Request_add_ref(req); #endif return req; } static inline void MPIR_Request_free(MPIR_Request * req) { int inuse; MPIR_Request_release_ref(req, &inuse); /* inform the device that we are decrementing the ref-count on * this request */ MPID_Request_free_hook(req); #ifdef MPICH_THREAD_USE_MDTA /* We signal the possible waiter to complete this request. */ if (req->sync) { MPIR_Thread_sync_signal(req->sync, 0); req->sync = NULL; } #endif if (inuse == 0) { MPL_DBG_MSG_P(MPIR_DBG_REQUEST, VERBOSE, "freeing request, handle=0x%08x", req->handle); #ifdef MPICH_DBG_OUTPUT if (HANDLE_GET_MPI_KIND(req->handle) != MPIR_REQUEST) { int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**invalid_handle", "**invalid_handle %d", req->handle); MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL); } if (req->ref_count != 0) { int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**invalid_refcount", "**invalid_refcount %d", req->ref_count); MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL); } #endif /* FIXME: We need a better way to handle these so that we do * not always need to initialize these fields and check them * when we destroy a request */ /* FIXME: We need a way to call these routines ONLY when the * related ref count has become zero. */ if (req->comm != NULL) { MPIR_Comm_release(req->comm); } if (req->kind == MPIR_REQUEST_KIND__GREQUEST && req->u.ureq.greq_fns != NULL) { MPL_free(req->u.ureq.greq_fns); } MPID_Request_destroy_hook(req); MPIR_Handle_obj_free(&MPIR_Request_mem, req); } } #ifdef MPICH_THREAD_USE_MDTA MPL_STATIC_INLINE_PREFIX void MPIR_Request_attach_sync(MPIR_Request * req_ptr, MPIR_Thread_sync_t * sync) { req_ptr->sync = sync; if (MPIR_Request_is_persistent(req_ptr)) { req_ptr->u.persist.real_request->sync = sync; } } #endif /* The "fastpath" version of MPIR_Request_completion_processing. It only handles * MPIR_REQUEST_KIND__SEND and MPIR_REQUEST_KIND__RECV kinds, and it does not attempt to * deal with status structures under the assumption that bleeding fast code will * pass either MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE as appropriate. This * routine (or some a variation of it) is an unfortunately necessary stunt to * get high message rates on key benchmarks for high-end systems. */ #undef FUNCNAME #define FUNCNAME MPIR_Request_completion_processing_fastpath #undef FCNAME #define FCNAME MPL_QUOTE(FUNCNAME) MPL_STATIC_INLINE_PREFIX int MPIR_Request_completion_processing_fastpath(MPI_Request * request, MPIR_Request * request_ptr) { int mpi_errno = MPI_SUCCESS; MPIR_Assert(request_ptr->kind == MPIR_REQUEST_KIND__SEND || request_ptr->kind == MPIR_REQUEST_KIND__RECV); if (request_ptr->kind == MPIR_REQUEST_KIND__SEND) { /* FIXME: are Ibsend requests added to the send queue? */ MPII_SENDQ_FORGET(request_ptr); } /* the completion path for SEND and RECV is the same at this time, modulo * the SENDQ hook above */ mpi_errno = request_ptr->status.MPI_ERROR; MPIR_Request_free(request_ptr); *request = MPI_REQUEST_NULL; return mpi_errno; } int MPIR_Request_completion_processing(MPIR_Request *, MPI_Status *, int *); int MPIR_Request_get_error(MPIR_Request *); MPL_STATIC_INLINE_PREFIX int MPID_Request_is_anysource(MPIR_Request *); MPL_STATIC_INLINE_PREFIX int MPID_Comm_AS_enabled(MPIR_Comm *); extern int MPIR_CVAR_ENABLE_FT; /* The following routines are ULFM helpers. */ /* This routine check if the request is "anysource" but the communicator is not, * which happens usually due to a failure of a process in the communicator. */ MPL_STATIC_INLINE_PREFIX int MPIR_Request_is_anysrc_mismatched(MPIR_Request * req_ptr) { return (MPIR_CVAR_ENABLE_FT && !MPIR_Request_is_complete(req_ptr) && MPID_Request_is_anysource(req_ptr) && !MPID_Comm_AS_enabled((req_ptr)->comm)); } /* This routine handle the request when its associated process failed. */ int MPIR_Request_handle_proc_failed(MPIR_Request * request_ptr); /* The following routines perform the callouts to the user routines registered as part of a generalized request. They handle any language binding issues that are necessary. They are used when completing, freeing, cancelling or extracting the status from a generalized request. */ int MPIR_Grequest_cancel(MPIR_Request * request_ptr, int complete); int MPIR_Grequest_query(MPIR_Request * request_ptr); int MPIR_Grequest_free(MPIR_Request * request_ptr); void MPIR_Grequest_complete(MPIR_Request * request_ptr); int MPIR_Grequest_start(MPI_Grequest_query_function * query_fn, MPI_Grequest_free_function * free_fn, MPI_Grequest_cancel_function * cancel_fn, void *extra_state, MPIR_Request ** request_ptr); int MPIX_Grequest_start_impl(MPI_Grequest_query_function *, MPI_Grequest_free_function *, MPI_Grequest_cancel_function *, MPIX_Grequest_poll_function *, MPIX_Grequest_wait_function *, void *, MPIR_Request **); /* These routines below are helpers for the Extended generalized requests. */ MPL_STATIC_INLINE_PREFIX int MPIR_Request_has_poll_fn(MPIR_Request * request_ptr) { return (request_ptr->kind == MPIR_REQUEST_KIND__GREQUEST && request_ptr->u.ureq.greq_fns != NULL && request_ptr->u.ureq.greq_fns->poll_fn != NULL); } MPL_STATIC_INLINE_PREFIX int MPIR_Request_has_wait_fn(MPIR_Request * request_ptr) { return (request_ptr->kind == MPIR_REQUEST_KIND__GREQUEST && request_ptr->u.ureq.greq_fns != NULL && request_ptr->u.ureq.greq_fns->wait_fn != NULL); } MPL_STATIC_INLINE_PREFIX int MPIR_Grequest_wait(MPIR_Request * request_ptr, MPI_Status * status) { return (request_ptr->u.ureq.greq_fns->wait_fn) (1, &request_ptr->u.ureq.greq_fns-> grequest_extra_state, 0, status); } MPL_STATIC_INLINE_PREFIX int MPIR_Grequest_poll(MPIR_Request * request_ptr, MPI_Status * status) { return (request_ptr->u.ureq.greq_fns->poll_fn) (request_ptr->u.ureq. greq_fns->grequest_extra_state, status); } int MPIR_Test_impl(MPIR_Request * request, int *flag, MPI_Status * status); int MPIR_Testall_impl(int count, MPIR_Request * request_ptrs[], int *flag, MPI_Status array_of_statuses[], int requests_property); int MPIR_Testany_impl(int count, MPIR_Request * request_ptrs[], int *indx, int *flag, MPI_Status * status); int MPIR_Testsome_impl(int incount, MPIR_Request * request_ptrs[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]); int MPIR_Wait_impl(MPIR_Request * request_ptr, MPI_Status * status); int MPIR_Waitall_impl(int count, MPIR_Request * request_ptrs[], MPI_Status array_of_statuses[], int request_properties); int MPIR_Waitany_impl(int count, MPIR_Request * request_ptrs[], int *indx, MPI_Status * status); int MPIR_Waitsome_impl(int incount, MPIR_Request * request_ptrs[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]); int MPIR_Test(MPI_Request * request, int *flag, MPI_Status * status); int MPIR_Testall(int count, MPI_Request array_of_requests[], int *flag, MPI_Status array_of_statuses[]); int MPIR_Wait(MPI_Request * request, MPI_Status * status); int MPIR_Waitall(int count, MPI_Request array_of_requests[], MPI_Status array_of_statuses[]); #endif /* MPIR_REQUEST_H_INCLUDED */