/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* (C) 2006 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*
* Portions of this code were written by Intel Corporation.
* Copyright (C) 2011-2016 Intel Corporation. Intel provides this material
* to Argonne National Laboratory subject to Software Grant and Corporate
* Contributor License Agreement dated February 8, 2012.
*/
#ifndef CH4R_RMA_H_INCLUDED
#define CH4R_RMA_H_INCLUDED
#include "ch4_impl.h"
extern MPIR_T_pvar_timer_t PVAR_TIMER_rma_amhdr_set ATTRIBUTE((unused));
#undef FUNCNAME
#define FUNCNAME MPIDI_do_put
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
static inline int MPIDI_do_put(const void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype, MPIR_Win * win,
MPIR_Request ** sreq_ptr)
{
int mpi_errno = MPI_SUCCESS, n_iov, c;
MPIR_Request *sreq = NULL;
MPIDI_CH4U_put_msg_t am_hdr;
uint64_t offset;
size_t data_sz;
MPI_Aint last, num_iov;
MPIR_Segment *segment_ptr;
struct iovec *dt_iov, am_iov[2];
size_t am_hdr_max_size;
#ifndef MPIDI_CH4_DIRECT_NETMOD
int is_local;
#endif
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_DO_PUT);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_DO_PUT);
#ifndef MPIDI_CH4_DIRECT_NETMOD
is_local = MPIDI_CH4_rank_is_local(target_rank, win->comm_ptr);
#endif
MPIDI_CH4U_RMA_OP_CHECK_SYNC(target_rank, win);
if (target_rank == MPI_PROC_NULL)
goto fn_exit;
MPIDI_Datatype_check_size(origin_datatype, origin_count, data_sz);
if (data_sz == 0)
goto fn_exit;
if (target_rank == win->comm_ptr->rank) {
offset = win->disp_unit * target_disp;
mpi_errno = MPIR_Localcopy(origin_addr,
origin_count,
origin_datatype,
(char *) win->base + offset, target_count, target_datatype);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
goto fn_exit;
}
/* Only create request when issuing is not completed.
* We initialize two ref_count for progress engine and request-based OP,
* then put needs to free the second ref_count.*/
sreq = MPIDI_CH4I_am_request_create(MPIR_REQUEST_KIND__RMA, 2);
MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
MPIDI_CH4U_REQUEST(sreq, req->preq.win_ptr) = win;
MPIR_cc_incr(sreq->cc_ptr, &c);
MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
am_hdr.src_rank = win->comm_ptr->rank;
am_hdr.target_disp = target_disp;
am_hdr.count = target_count;
am_hdr.datatype = target_datatype;
am_hdr.preq_ptr = (uint64_t) sreq;
am_hdr.win_id = MPIDI_CH4U_WIN(win, win_id);
/* Increase local and remote completion counters and set the local completion
* counter in request, thus it can be decreased at request completion. */
MPIDI_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
MPIDI_CH4U_REQUEST(sreq, rank) = target_rank;
if (HANDLE_GET_KIND(target_datatype) == HANDLE_KIND_BUILTIN) {
am_hdr.n_iov = 0;
MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
MPIDI_CH4U_REQUEST(sreq, req->preq.dt_iov) = NULL;
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_PUT_REQ,
&am_hdr, sizeof(am_hdr), origin_addr,
origin_count, origin_datatype, sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_PUT_REQ,
&am_hdr, sizeof(am_hdr), origin_addr,
origin_count, origin_datatype, sreq);
}
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
goto fn_exit;
}
segment_ptr = MPIR_Segment_alloc();
MPIR_Assert(segment_ptr);
MPIR_Segment_init(NULL, target_count, target_datatype, segment_ptr);
last = data_sz;
MPIR_Segment_count_contig_blocks(segment_ptr, 0, &last, &num_iov);
n_iov = (int) num_iov;
MPIR_Assert(n_iov > 0);
am_hdr.n_iov = n_iov;
dt_iov = (struct iovec *) MPL_malloc(n_iov * sizeof(struct iovec), MPL_MEM_BUFFER);
MPIR_Assert(dt_iov);
last = data_sz;
MPIR_Segment_pack_vector(segment_ptr, 0, &last, dt_iov, &n_iov);
MPIR_Assert(last == (MPI_Aint) data_sz);
MPL_free(segment_ptr);
am_iov[0].iov_base = &am_hdr;
am_iov[0].iov_len = sizeof(am_hdr);
am_iov[1].iov_base = dt_iov;
am_iov[1].iov_len = sizeof(struct iovec) * am_hdr.n_iov;
MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
MPIDI_CH4U_REQUEST(sreq, req->preq.dt_iov) = dt_iov;
#ifndef MPIDI_CH4_DIRECT_NETMOD
am_hdr_max_size = is_local ? MPIDI_SHM_am_hdr_max_sz() : MPIDI_NM_am_hdr_max_sz();
#else
am_hdr_max_size = MPIDI_NM_am_hdr_max_sz();
#endif
if ((am_iov[0].iov_len + am_iov[1].iov_len) <= am_hdr_max_size) {
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isendv(target_rank, win->comm_ptr, MPIDI_CH4U_PUT_REQ,
&am_iov[0], 2, origin_addr, origin_count,
origin_datatype, sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isendv(target_rank, win->comm_ptr, MPIDI_CH4U_PUT_REQ,
&am_iov[0], 2, origin_addr, origin_count,
origin_datatype, sreq);
}
} else {
MPIDI_CH4U_REQUEST(sreq, req->preq.origin_addr) = (void *) origin_addr;
MPIDI_CH4U_REQUEST(sreq, req->preq.origin_count) = origin_count;
MPIDI_CH4U_REQUEST(sreq, req->preq.origin_datatype) = origin_datatype;
MPIR_Datatype_add_ref_if_not_builtin(origin_datatype);
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_PUT_IOV_REQ,
&am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
am_iov[1].iov_len, MPI_BYTE, sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_PUT_IOV_REQ,
&am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
am_iov[1].iov_len, MPI_BYTE, sreq);
}
}
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
*sreq_ptr = sreq;
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_DO_PUT);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_do_get
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
static inline int MPIDI_do_get(void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype, MPIR_Win * win,
MPIR_Request ** sreq_ptr)
{
int mpi_errno = MPI_SUCCESS, n_iov, c;
size_t offset;
MPIR_Request *sreq = NULL;
MPIDI_CH4U_get_req_msg_t am_hdr;
size_t data_sz;
MPI_Aint last, num_iov;
MPIR_Segment *segment_ptr;
struct iovec *dt_iov;
#ifndef MPIDI_CH4_DIRECT_NETMOD
int is_local;
#endif
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_DO_GET);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_DO_GET);
#ifndef MPIDI_CH4_DIRECT_NETMOD
is_local = MPIDI_CH4_rank_is_local(target_rank, win->comm_ptr);
#endif
MPIDI_CH4U_RMA_OP_CHECK_SYNC(target_rank, win);
if (target_rank == MPI_PROC_NULL)
goto fn_exit;
MPIDI_Datatype_check_size(origin_datatype, origin_count, data_sz);
if (data_sz == 0)
goto fn_exit;
if (target_rank == win->comm_ptr->rank) {
offset = win->disp_unit * target_disp;
mpi_errno = MPIR_Localcopy((char *) win->base + offset,
target_count,
target_datatype, origin_addr, origin_count, origin_datatype);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
goto fn_exit;
}
/* Only create request when issuing is not completed.
* We initialize two ref_count for progress engine and request-based OP,
* then get needs to free the second ref_count.*/
sreq = MPIDI_CH4I_am_request_create(MPIR_REQUEST_KIND__RMA, 2);
MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
MPIDI_CH4U_REQUEST(sreq, req->greq.win_ptr) = win;
MPIDI_CH4U_REQUEST(sreq, req->greq.addr) = (uint64_t) ((char *) origin_addr);
MPIDI_CH4U_REQUEST(sreq, req->greq.count) = origin_count;
MPIDI_CH4U_REQUEST(sreq, req->greq.datatype) = origin_datatype;
MPIDI_CH4U_REQUEST(sreq, rank) = target_rank;
MPIR_cc_incr(sreq->cc_ptr, &c);
MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
am_hdr.target_disp = target_disp;
am_hdr.count = target_count;
am_hdr.datatype = target_datatype;
am_hdr.greq_ptr = (uint64_t) sreq;
am_hdr.win_id = MPIDI_CH4U_WIN(win, win_id);
am_hdr.src_rank = win->comm_ptr->rank;
/* Increase local and remote completion counters and set the local completion
* counter in request, thus it can be decreased at request completion. */
MPIDI_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
if (HANDLE_GET_KIND(target_datatype) == HANDLE_KIND_BUILTIN) {
am_hdr.n_iov = 0;
MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
MPIDI_CH4U_REQUEST(sreq, req->greq.dt_iov) = NULL;
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr,
MPIDI_CH4U_GET_REQ, &am_hdr, sizeof(am_hdr),
NULL, 0, MPI_DATATYPE_NULL, sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr,
MPIDI_CH4U_GET_REQ, &am_hdr, sizeof(am_hdr),
NULL, 0, MPI_DATATYPE_NULL, sreq);
}
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
goto fn_exit;
}
segment_ptr = MPIR_Segment_alloc();
MPIR_Assert(segment_ptr);
MPIR_Segment_init(NULL, target_count, target_datatype, segment_ptr);
last = data_sz;
MPIR_Segment_count_contig_blocks(segment_ptr, 0, &last, &num_iov);
n_iov = (int) num_iov;
MPIR_Assert(n_iov > 0);
am_hdr.n_iov = n_iov;
dt_iov = (struct iovec *) MPL_malloc(n_iov * sizeof(struct iovec), MPL_MEM_BUFFER);
MPIR_Assert(dt_iov);
last = data_sz;
MPIR_Segment_pack_vector(segment_ptr, 0, &last, dt_iov, &n_iov);
MPIR_Assert(last == (MPI_Aint) data_sz);
MPL_free(segment_ptr);
MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
MPIDI_CH4U_REQUEST(sreq, req->greq.dt_iov) = dt_iov;
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_GET_REQ,
&am_hdr, sizeof(am_hdr), dt_iov,
sizeof(struct iovec) * am_hdr.n_iov, MPI_BYTE, sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_GET_REQ,
&am_hdr, sizeof(am_hdr), dt_iov,
sizeof(struct iovec) * am_hdr.n_iov, MPI_BYTE, sreq);
}
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
*sreq_ptr = sreq;
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_DO_GET);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_do_accumulate
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_do_accumulate(const void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype,
MPI_Op op, MPIR_Win * win,
MPIR_Request ** sreq_ptr)
{
int mpi_errno = MPI_SUCCESS, c, n_iov;
MPIR_Request *sreq = NULL;
size_t basic_type_size;
MPIDI_CH4U_acc_req_msg_t am_hdr;
uint64_t data_sz, target_data_sz;
MPI_Aint last, num_iov;
MPIR_Segment *segment_ptr;
struct iovec *dt_iov, am_iov[2];
MPIR_Datatype *dt_ptr;
int am_hdr_max_sz;
#ifndef MPIDI_CH4_DIRECT_NETMOD
int is_local;
#endif
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_DO_ACCUMULATE);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_DO_ACCUMULATE);
#ifndef MPIDI_CH4_DIRECT_NETMOD
is_local = MPIDI_CH4_rank_is_local(target_rank, win->comm_ptr);
#endif
MPIDI_CH4U_RMA_OP_CHECK_SYNC(target_rank, win);
if (target_rank == MPI_PROC_NULL)
goto fn_exit;
MPIDI_Datatype_get_size_dt_ptr(origin_count, origin_datatype, data_sz, dt_ptr);
MPIDI_Datatype_check_size(target_datatype, target_count, target_data_sz);
if (data_sz == 0 || target_data_sz == 0) {
goto fn_exit;
}
/* Only create request when issuing is not completed.
* We initialize two ref_count for progress engine and request-based OP,
* then acc needs to free the second ref_count.*/
sreq = MPIDI_CH4I_am_request_create(MPIR_REQUEST_KIND__RMA, 2);
MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
MPIDI_CH4U_REQUEST(sreq, req->areq.win_ptr) = win;
MPIR_cc_incr(sreq->cc_ptr, &c);
MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
am_hdr.req_ptr = (uint64_t) sreq;
am_hdr.origin_count = origin_count;
if (HANDLE_GET_KIND(origin_datatype) == HANDLE_KIND_BUILTIN) {
am_hdr.origin_datatype = origin_datatype;
} else {
am_hdr.origin_datatype = (dt_ptr) ? dt_ptr->basic_type : MPI_DATATYPE_NULL;
MPIR_Datatype_get_size_macro(am_hdr.origin_datatype, basic_type_size);
am_hdr.origin_count = (basic_type_size > 0) ? data_sz / basic_type_size : 0;
}
am_hdr.target_count = target_count;
am_hdr.target_datatype = target_datatype;
am_hdr.target_disp = target_disp;
am_hdr.op = op;
am_hdr.win_id = MPIDI_CH4U_WIN(win, win_id);
am_hdr.src_rank = win->comm_ptr->rank;
/* Increase local and remote completion counters and set the local completion
* counter in request, thus it can be decreased at request completion. */
MPIDI_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
/* Increase remote completion counter for acc. */
MPIDI_win_remote_acc_cmpl_cnt_incr(win, target_rank);
MPIDI_CH4U_REQUEST(sreq, rank) = target_rank;
MPIDI_CH4U_REQUEST(sreq, req->areq.data_sz) = data_sz;
if (HANDLE_GET_KIND(target_datatype) == HANDLE_KIND_BUILTIN) {
am_hdr.n_iov = 0;
MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
MPIDI_CH4U_REQUEST(sreq, req->areq.dt_iov) = NULL;
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_ACC_REQ,
&am_hdr, sizeof(am_hdr), origin_addr,
(op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_ACC_REQ,
&am_hdr, sizeof(am_hdr), origin_addr,
(op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
sreq);
}
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
goto fn_exit;
}
MPIDI_Datatype_get_size_dt_ptr(target_count, target_datatype, data_sz, dt_ptr);
am_hdr.target_datatype = dt_ptr->basic_type;
am_hdr.target_count = dt_ptr->n_builtin_elements;
segment_ptr = MPIR_Segment_alloc();
MPIR_Assert(segment_ptr);
MPIR_Segment_init(NULL, target_count, target_datatype, segment_ptr);
last = data_sz;
MPIR_Segment_count_contig_blocks(segment_ptr, 0, &last, &num_iov);
n_iov = (int) num_iov;
MPIR_Assert(n_iov > 0);
am_hdr.n_iov = n_iov;
dt_iov = (struct iovec *) MPL_malloc(n_iov * sizeof(struct iovec), MPL_MEM_BUFFER);
MPIR_Assert(dt_iov);
last = data_sz;
MPIR_Segment_pack_vector(segment_ptr, 0, &last, dt_iov, &n_iov);
MPIR_Assert(last == (MPI_Aint) data_sz);
MPL_free(segment_ptr);
am_iov[0].iov_base = &am_hdr;
am_iov[0].iov_len = sizeof(am_hdr);
am_iov[1].iov_base = dt_iov;
am_iov[1].iov_len = sizeof(struct iovec) * am_hdr.n_iov;
MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
MPIDI_CH4U_REQUEST(sreq, req->areq.dt_iov) = dt_iov;
#ifndef MPIDI_CH4_DIRECT_NETMOD
am_hdr_max_sz = is_local ? MPIDI_SHM_am_hdr_max_sz() : MPIDI_NM_am_hdr_max_sz();
#else
am_hdr_max_sz = MPIDI_NM_am_hdr_max_sz();
#endif
if ((am_iov[0].iov_len + am_iov[1].iov_len) <= am_hdr_max_sz) {
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isendv(target_rank, win->comm_ptr, MPIDI_CH4U_ACC_REQ,
&am_iov[0], 2, origin_addr,
(op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isendv(target_rank, win->comm_ptr, MPIDI_CH4U_ACC_REQ,
&am_iov[0], 2, origin_addr,
(op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
sreq);
}
} else {
MPIDI_CH4U_REQUEST(sreq, req->areq.origin_addr) = (void *) origin_addr;
MPIDI_CH4U_REQUEST(sreq, req->areq.origin_count) = origin_count;
MPIDI_CH4U_REQUEST(sreq, req->areq.origin_datatype) = origin_datatype;
MPIR_Datatype_add_ref_if_not_builtin(origin_datatype);
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_ACC_IOV_REQ,
&am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
am_iov[1].iov_len, MPI_BYTE, sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_ACC_IOV_REQ,
&am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
am_iov[1].iov_len, MPI_BYTE, sreq);
}
}
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
*sreq_ptr = sreq;
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_DO_ACCUMULATE);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_do_get_accumulate
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_do_get_accumulate(const void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
void *result_addr,
int result_count,
MPI_Datatype result_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype,
MPI_Op op, MPIR_Win * win,
MPIR_Request ** sreq_ptr)
{
int mpi_errno = MPI_SUCCESS, c, n_iov;
MPIR_Request *sreq = NULL;
size_t basic_type_size;
MPIDI_CH4U_get_acc_req_msg_t am_hdr;
uint64_t data_sz, result_data_sz, target_data_sz;
MPI_Aint last, num_iov;
MPIR_Segment *segment_ptr;
struct iovec *dt_iov, am_iov[2];
MPIR_Datatype *dt_ptr;
int am_hdr_max_sz;
#ifndef MPIDI_CH4_DIRECT_NETMOD
int is_local;
#endif
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_DO_GET_ACCUMULATE);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_DO_GET_ACCUMULATE);
#ifndef MPIDI_CH4_DIRECT_NETMOD
is_local = MPIDI_CH4_rank_is_local(target_rank, win->comm_ptr);
#endif
MPIDI_CH4U_RMA_OP_CHECK_SYNC(target_rank, win);
if (target_rank == MPI_PROC_NULL)
goto fn_exit;
MPIDI_Datatype_get_size_dt_ptr(origin_count, origin_datatype, data_sz, dt_ptr);
MPIDI_Datatype_check_size(target_datatype, target_count, target_data_sz);
MPIDI_Datatype_check_size(result_datatype, result_count, result_data_sz);
if (target_data_sz == 0 || (data_sz == 0 && result_data_sz == 0)) {
goto fn_exit;
}
/* Only create request when issuing is not completed.
* We initialize two ref_count for progress engine and request-based OP,
* then get_acc needs to free the second ref_count.*/
sreq = MPIDI_CH4I_am_request_create(MPIR_REQUEST_KIND__RMA, 2);
MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
MPIDI_CH4U_REQUEST(sreq, req->areq.win_ptr) = win;
MPIDI_CH4U_REQUEST(sreq, req->areq.result_addr) = result_addr;
MPIDI_CH4U_REQUEST(sreq, req->areq.result_count) = result_count;
MPIDI_CH4U_REQUEST(sreq, req->areq.result_datatype) = result_datatype;
MPIR_Datatype_add_ref_if_not_builtin(result_datatype);
MPIR_cc_incr(sreq->cc_ptr, &c);
/* TODO: have common routine for accumulate/get_accumulate */
MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
am_hdr.req_ptr = (uint64_t) sreq;
am_hdr.origin_count = origin_count;
if (HANDLE_GET_KIND(origin_datatype) == HANDLE_KIND_BUILTIN) {
am_hdr.origin_datatype = origin_datatype;
} else {
am_hdr.origin_datatype = (dt_ptr) ? dt_ptr->basic_type : MPI_DATATYPE_NULL;
MPIR_Datatype_get_size_macro(am_hdr.origin_datatype, basic_type_size);
am_hdr.origin_count = (basic_type_size > 0) ? data_sz / basic_type_size : 0;
}
am_hdr.target_count = target_count;
am_hdr.target_datatype = target_datatype;
am_hdr.target_disp = target_disp;
am_hdr.op = op;
am_hdr.win_id = MPIDI_CH4U_WIN(win, win_id);
am_hdr.src_rank = win->comm_ptr->rank;
am_hdr.result_data_sz = result_data_sz;
/* Increase local and remote completion counters and set the local completion
* counter in request, thus it can be decreased at request completion. */
MPIDI_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
/* Increase remote completion counter for acc. */
MPIDI_win_remote_acc_cmpl_cnt_incr(win, target_rank);
MPIDI_CH4U_REQUEST(sreq, rank) = target_rank;
MPIDI_CH4U_REQUEST(sreq, req->areq.data_sz) = data_sz;
if (HANDLE_GET_KIND(target_datatype) == HANDLE_KIND_BUILTIN) {
am_hdr.n_iov = 0;
MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
MPIDI_CH4U_REQUEST(sreq, req->areq.dt_iov) = NULL;
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_GET_ACC_REQ,
&am_hdr, sizeof(am_hdr), origin_addr,
(op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_GET_ACC_REQ,
&am_hdr, sizeof(am_hdr), origin_addr,
(op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
sreq);
}
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
goto fn_exit;
}
MPIDI_Datatype_get_size_dt_ptr(target_count, target_datatype, data_sz, dt_ptr);
am_hdr.target_datatype = dt_ptr->basic_type;
am_hdr.target_count = dt_ptr->n_builtin_elements;
segment_ptr = MPIR_Segment_alloc();
MPIR_Assert(segment_ptr);
MPIR_Segment_init(NULL, target_count, target_datatype, segment_ptr);
last = data_sz;
MPIR_Segment_count_contig_blocks(segment_ptr, 0, &last, &num_iov);
n_iov = (int) num_iov;
MPIR_Assert(n_iov > 0);
am_hdr.n_iov = n_iov;
dt_iov = (struct iovec *) MPL_malloc(n_iov * sizeof(struct iovec), MPL_MEM_BUFFER);
MPIR_Assert(dt_iov);
last = data_sz;
MPIR_Segment_pack_vector(segment_ptr, 0, &last, dt_iov, &n_iov);
MPIR_Assert(last == (MPI_Aint) data_sz);
MPL_free(segment_ptr);
am_iov[0].iov_base = &am_hdr;
am_iov[0].iov_len = sizeof(am_hdr);
am_iov[1].iov_base = dt_iov;
am_iov[1].iov_len = sizeof(struct iovec) * am_hdr.n_iov;
MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
MPIDI_CH4U_REQUEST(sreq, req->areq.dt_iov) = dt_iov;
#ifndef MPIDI_CH4_DIRECT_NETMOD
am_hdr_max_sz = is_local ? MPIDI_SHM_am_hdr_max_sz() : MPIDI_NM_am_hdr_max_sz();
#else
am_hdr_max_sz = MPIDI_NM_am_hdr_max_sz();
#endif
if ((am_iov[0].iov_len + am_iov[1].iov_len) <= am_hdr_max_sz) {
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isendv(target_rank, win->comm_ptr, MPIDI_CH4U_GET_ACC_REQ,
&am_iov[0], 2, origin_addr,
(op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isendv(target_rank, win->comm_ptr, MPIDI_CH4U_GET_ACC_REQ,
&am_iov[0], 2, origin_addr,
(op == MPI_NO_OP) ? 0 : origin_count, origin_datatype,
sreq);
}
} else {
MPIDI_CH4U_REQUEST(sreq, req->areq.origin_addr) = (void *) origin_addr;
MPIDI_CH4U_REQUEST(sreq, req->areq.origin_count) = origin_count;
MPIDI_CH4U_REQUEST(sreq, req->areq.origin_datatype) = origin_datatype;
MPIR_Datatype_add_ref_if_not_builtin(origin_datatype);
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (is_local)
mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_GET_ACC_IOV_REQ,
&am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
am_iov[1].iov_len, MPI_BYTE, sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_GET_ACC_IOV_REQ,
&am_hdr, sizeof(am_hdr), am_iov[1].iov_base,
am_iov[1].iov_len, MPI_BYTE, sreq);
}
}
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
*sreq_ptr = sreq;
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_DO_GET_ACCUMULATE);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_put
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_put(const void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count, MPI_Datatype target_datatype,
MPIR_Win * win)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Request *sreq = NULL;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_PUT);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_PUT);
mpi_errno = MPIDI_do_put(origin_addr, origin_count, origin_datatype,
target_rank, target_disp, target_count, target_datatype, win, &sreq);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
/* always release my ref, only request-ops track. */
if (sreq != NULL)
MPIR_Request_free(sreq);
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_PUT);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_rput
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_rput(const void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype,
MPIR_Win * win, MPIR_Request ** request)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Request *sreq = NULL;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_RPUT);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_RPUT);
mpi_errno = MPIDI_do_put(origin_addr, origin_count, origin_datatype,
target_rank, target_disp, target_count, target_datatype, win, &sreq);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
if (sreq == NULL) {
/* create a completed request for user if issuing is completed immediately. */
sreq = MPIR_Request_create(MPIR_REQUEST_KIND__RMA);
MPIR_ERR_CHKANDSTMT((sreq) == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
MPIR_Request_add_ref(sreq);
MPID_Request_complete(sreq);
}
fn_exit:
*request = sreq;
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_RPUT);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_get
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_get(void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count, MPI_Datatype target_datatype,
MPIR_Win * win)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Request *sreq = NULL;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_GET);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_GET);
mpi_errno = MPIDI_do_get(origin_addr, origin_count, origin_datatype,
target_rank, target_disp, target_count, target_datatype, win, &sreq);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
/* always release my ref, only request-ops track. */
if (sreq != NULL)
MPIR_Request_free(sreq);
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_GET);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_rget
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_rget(void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype,
MPIR_Win * win, MPIR_Request ** request)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Request *sreq = NULL;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_RGET);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_RGET);
mpi_errno = MPIDI_do_get(origin_addr, origin_count, origin_datatype,
target_rank, target_disp, target_count, target_datatype, win, &sreq);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
if (sreq == NULL) {
/* create a completed request for user if issuing is completed immediately. */
sreq = MPIR_Request_create(MPIR_REQUEST_KIND__RMA);
MPIR_ERR_CHKANDSTMT((sreq) == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
MPIR_Request_add_ref(sreq);
MPID_Request_complete(sreq);
}
fn_exit:
*request = sreq;
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_RGET);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_raccumulate
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_raccumulate(const void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype,
MPI_Op op, MPIR_Win * win,
MPIR_Request ** request)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Request *sreq = NULL;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_RACCUMULATE);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_RACCUMULATE);
mpi_errno = MPIDI_do_accumulate(origin_addr, origin_count, origin_datatype,
target_rank, target_disp, target_count, target_datatype, op,
win, &sreq);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
if (sreq == NULL) {
/* create a completed request for user if issuing is completed immediately. */
sreq = MPIR_Request_create(MPIR_REQUEST_KIND__RMA);
MPIR_ERR_CHKANDSTMT((sreq) == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
MPIR_Request_add_ref(sreq);
MPID_Request_complete(sreq);
}
fn_exit:
*request = sreq;
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_RACCUMULATE);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_accumulate
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_accumulate(const void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype, MPI_Op op,
MPIR_Win * win)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Request *sreq = NULL;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_ACCUMULATE);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_ACCUMULATE);
mpi_errno = MPIDI_do_accumulate(origin_addr, origin_count, origin_datatype,
target_rank, target_disp, target_count, target_datatype, op,
win, &sreq);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
/* always release my ref, only request-ops track. */
if (sreq != NULL)
MPIR_Request_free(sreq);
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_ACCUMULATE);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_rget_accumulate
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_rget_accumulate(const void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
void *result_addr,
int result_count,
MPI_Datatype result_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype,
MPI_Op op, MPIR_Win * win,
MPIR_Request ** request)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Request *sreq = NULL;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_RGET_ACCUMULATE);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_RGET_ACCUMULATE);
mpi_errno = MPIDI_do_get_accumulate(origin_addr, origin_count, origin_datatype,
result_addr, result_count, result_datatype,
target_rank, target_disp, target_count, target_datatype, op,
win, &sreq);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
if (sreq == NULL) {
/* create a completed request for user if issuing is completed immediately. */
sreq = MPIR_Request_create(MPIR_REQUEST_KIND__RMA);
MPIR_ERR_CHKANDSTMT((sreq) == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
MPIR_Request_add_ref(sreq);
MPID_Request_complete(sreq);
}
fn_exit:
*request = sreq;
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_RGET_ACCUMULATE);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_get_accumulate
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_get_accumulate(const void *origin_addr,
int origin_count,
MPI_Datatype origin_datatype,
void *result_addr,
int result_count,
MPI_Datatype result_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
MPI_Datatype target_datatype,
MPI_Op op, MPIR_Win * win)
{
int mpi_errno = MPI_SUCCESS;
MPIR_Request *sreq = NULL;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_GET_ACCUMULATE);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_GET_ACCUMULATE);
mpi_errno = MPIDI_do_get_accumulate(origin_addr, origin_count, origin_datatype,
result_addr, result_count, result_datatype,
target_rank, target_disp, target_count, target_datatype, op,
win, &sreq);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
/* always release my ref, only request-ops track. */
if (sreq != NULL)
MPIR_Request_free(sreq);
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_GET_ACCUMULATE);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_compare_and_swap
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_compare_and_swap(const void *origin_addr,
const void *compare_addr,
void *result_addr,
MPI_Datatype datatype,
int target_rank,
MPI_Aint target_disp, MPIR_Win * win)
{
int mpi_errno = MPI_SUCCESS, c;
MPIR_Request *sreq = NULL;
MPIDI_CH4U_cswap_req_msg_t am_hdr;
size_t data_sz;
void *p_data;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_COMPARE_AND_SWAP);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_COMPARE_AND_SWAP);
MPIDI_CH4U_RMA_OP_CHECK_SYNC(target_rank, win);
if (target_rank == MPI_PROC_NULL)
goto fn_exit;
MPIDI_Datatype_check_size(datatype, 1, data_sz);
if (data_sz == 0)
goto fn_exit;
p_data = MPL_malloc(data_sz * 2, MPL_MEM_BUFFER);
MPIR_Assert(p_data);
MPIR_Memcpy(p_data, (char *) origin_addr, data_sz);
MPIR_Memcpy((char *) p_data + data_sz, (char *) compare_addr, data_sz);
sreq = MPIDI_CH4I_am_request_create(MPIR_REQUEST_KIND__RMA, 1);
MPIR_ERR_CHKANDSTMT(sreq == NULL, mpi_errno, MPIX_ERR_NOREQ, goto fn_fail, "**nomemreq");
MPIDI_CH4U_REQUEST(sreq, req->creq.win_ptr) = win;
MPIDI_CH4U_REQUEST(sreq, req->creq.addr) = (uint64_t) ((char *) result_addr);
MPIDI_CH4U_REQUEST(sreq, req->creq.datatype) = datatype;
MPIDI_CH4U_REQUEST(sreq, req->creq.result_addr) = result_addr;
MPIDI_CH4U_REQUEST(sreq, req->creq.data) = p_data;
MPIDI_CH4U_REQUEST(sreq, rank) = target_rank;
MPIR_cc_incr(sreq->cc_ptr, &c);
MPIR_T_PVAR_TIMER_START(RMA, rma_amhdr_set);
am_hdr.target_disp = target_disp;
am_hdr.datatype = datatype;
am_hdr.req_ptr = (uint64_t) sreq;
am_hdr.win_id = MPIDI_CH4U_WIN(win, win_id);
am_hdr.src_rank = win->comm_ptr->rank;
MPIR_T_PVAR_TIMER_END(RMA, rma_amhdr_set);
MPIDI_win_cmpl_cnts_incr(win, target_rank, &sreq->completion_notification);
/* Increase remote completion counter for acc. */
MPIDI_win_remote_acc_cmpl_cnt_incr(win, target_rank);
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (MPIDI_CH4_rank_is_local(target_rank, win->comm_ptr))
mpi_errno = MPIDI_SHM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_CSWAP_REQ,
&am_hdr, sizeof(am_hdr), (char *) p_data, 2, datatype, sreq);
else
#endif
{
mpi_errno = MPIDI_NM_am_isend(target_rank, win->comm_ptr, MPIDI_CH4U_CSWAP_REQ,
&am_hdr, sizeof(am_hdr), (char *) p_data, 2, datatype, sreq);
}
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_COMPARE_AND_SWAP);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#undef FUNCNAME
#define FUNCNAME MPIDI_CH4U_mpi_fetch_and_op
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
MPL_STATIC_INLINE_PREFIX int MPIDI_CH4U_mpi_fetch_and_op(const void *origin_addr,
void *result_addr,
MPI_Datatype datatype,
int target_rank,
MPI_Aint target_disp, MPI_Op op,
MPIR_Win * win)
{
int mpi_errno = MPI_SUCCESS;
MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH4U_MPI_FETCH_AND_OP);
MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH4U_MPI_FETCH_AND_OP);
mpi_errno = MPIDI_CH4U_mpi_get_accumulate(origin_addr, 1, datatype,
result_addr, 1, datatype,
target_rank, target_disp, 1, datatype, op, win);
if (mpi_errno)
MPIR_ERR_POP(mpi_errno);
fn_exit:
MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH4U_MPI_FETCH_AND_OP);
return mpi_errno;
fn_fail:
goto fn_exit;
}
#endif /* CH4R_RMA_H_INCLUDED */