Blob Blame History Raw
/**
* Copyright (C) Mellanox Technologies Ltd. 2016-2020.  ALL RIGHTS RESERVED.

* See file LICENSE for terms.
*/

#ifndef UCT_DC_EP_H
#define UCT_DC_EP_H

#include <uct/api/uct.h>
#include <ucs/datastruct/arbiter.h>
#include <ucs/sys/compiler_def.h>

#include "dc_mlx5.h"

#define UCT_DC_MLX5_EP_NO_DCI ((uint8_t)-1)


enum {
    /* Indicates that FC grant has been requested, but is not received yet.
     * Flush will not complete until an outgoing grant request is acked.
     * It is needed to avoid the following cases:
     * 1) Grant arrives for the recently deleted ep.
     * 2) QP resources are available, but there are some pending requests. */
    UCT_DC_MLX5_EP_FC_FLAG_WAIT_FOR_GRANT = UCS_BIT(0)
};

struct uct_dc_mlx5_ep {
    /*
     * per value of 'flags':
     * INVALID   - 'list' is added to iface->tx.gc_list.
     * Otherwise - 'super' and 'arb_group' are used.
     */
    union {
        struct {
            uct_base_ep_t         super;
            ucs_arbiter_group_t   arb_group;
        };
        ucs_list_link_t           list;
    };

    uint8_t                       dci;
    uint8_t                       flags;
    uint16_t                      atomic_mr_offset;
    uct_rc_fc_t                   fc;
    uct_ib_mlx5_base_av_t         av;
};

typedef struct {
    uct_dc_mlx5_ep_t                    super;
    struct mlx5_grh_av                  grh_av;
} uct_dc_mlx5_grh_ep_t;

typedef struct {
    uct_pending_req_priv_arb_t arb;
    uct_dc_mlx5_ep_t           *ep;
} uct_dc_mlx5_pending_req_priv_t;


UCS_CLASS_DECLARE(uct_dc_mlx5_ep_t, uct_dc_mlx5_iface_t *, const uct_dc_mlx5_iface_addr_t *,
                  uct_ib_mlx5_base_av_t *);

UCS_CLASS_DECLARE(uct_dc_mlx5_grh_ep_t, uct_dc_mlx5_iface_t *,
                  const uct_dc_mlx5_iface_addr_t *,
                  uct_ib_mlx5_base_av_t *, struct mlx5_grh_av *);


ucs_status_t uct_dc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *payload,
                                      unsigned length, uint64_t remote_addr,
                                      uct_rkey_t rkey);

ssize_t uct_dc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb,
                                 void *arg, uint64_t remote_addr, uct_rkey_t rkey);

ucs_status_t uct_dc_mlx5_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt,
                                      uint64_t remote_addr, uct_rkey_t rkey,
                                      uct_completion_t *comp);

ucs_status_t uct_dc_mlx5_ep_get_bcopy(uct_ep_h tl_ep,
                                      uct_unpack_callback_t unpack_cb,
                                      void *arg, size_t length,
                                      uint64_t remote_addr, uct_rkey_t rkey,
                                      uct_completion_t *comp);

ucs_status_t uct_dc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt,
                                      uint64_t remote_addr, uct_rkey_t rkey,
                                      uct_completion_t *comp);

ucs_status_t uct_dc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr,
                                     const void *buffer, unsigned length);

ssize_t uct_dc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id,
                                uct_pack_callback_t pack_cb, void *arg,
                                unsigned flags);

ucs_status_t uct_dc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header,
                                     unsigned header_length, const uct_iov_t *iov,
                                     size_t iovcnt, unsigned flags,
                                     uct_completion_t *comp);

ucs_status_t uct_dc_mlx5_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, uint64_t swap,
                                           uint64_t remote_addr, uct_rkey_t rkey,
                                           uint64_t *result, uct_completion_t *comp);

ucs_status_t uct_dc_mlx5_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap,
                                           uint64_t remote_addr, uct_rkey_t rkey,
                                           uint32_t *result, uct_completion_t *comp);

ucs_status_t uct_dc_mlx5_ep_atomic64_post(uct_ep_h ep, unsigned opcode, uint64_t value,
                                          uint64_t remote_addr, uct_rkey_t rkey);

ucs_status_t uct_dc_mlx5_ep_atomic32_post(uct_ep_h ep, unsigned opcode, uint32_t value,
                                          uint64_t remote_addr, uct_rkey_t rkey);

ucs_status_t uct_dc_mlx5_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode,
                                           uint64_t value, uint64_t *result,
                                           uint64_t remote_addr, uct_rkey_t rkey,
                                           uct_completion_t *comp);

ucs_status_t uct_dc_mlx5_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode,
                                           uint32_t value, uint32_t *result,
                                           uint64_t remote_addr, uct_rkey_t rkey,
                                           uct_completion_t *comp);

#if IBV_HW_TM
ucs_status_t uct_dc_mlx5_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag,
                                            const void *data, size_t length);

ssize_t uct_dc_mlx5_ep_tag_eager_bcopy(uct_ep_h tl_ep, uct_tag_t tag,
                                       uint64_t imm,
                                       uct_pack_callback_t pack_cb,
                                       void *arg, unsigned flags);

ucs_status_t uct_dc_mlx5_ep_tag_eager_zcopy(uct_ep_h tl_ep, uct_tag_t tag,
                                            uint64_t imm, const uct_iov_t *iov,
                                            size_t iovcnt, unsigned flags,
                                            uct_completion_t *comp);

ucs_status_ptr_t uct_dc_mlx5_ep_tag_rndv_zcopy(uct_ep_h tl_ep, uct_tag_t tag,
                                               const void *header,
                                               unsigned header_length,
                                               const uct_iov_t *iov,
                                               size_t iovcnt, unsigned flags,
                                               uct_completion_t *comp);

ucs_status_t uct_dc_mlx5_ep_tag_rndv_request(uct_ep_h tl_ep, uct_tag_t tag,
                                             const void* header,
                                             unsigned header_length,
                                             unsigned flags);

ucs_status_t uct_dc_mlx5_iface_tag_recv_zcopy(uct_iface_h tl_iface,
                                              uct_tag_t tag,
                                              uct_tag_t tag_mask,
                                              const uct_iov_t *iov,
                                              size_t iovcnt,
                                              uct_tag_context_t *ctx);

ucs_status_t uct_dc_mlx5_iface_tag_recv_cancel(uct_iface_h tl_iface,
                                               uct_tag_context_t *ctx,
                                               int force);
#endif

ucs_status_t uct_dc_mlx5_ep_fence(uct_ep_h tl_ep, unsigned flags);

ucs_status_t uct_dc_mlx5_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp);

ucs_status_t uct_dc_mlx5_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op,
                                    uct_rc_fc_request_t *req);

ucs_arbiter_cb_result_t
uct_dc_mlx5_iface_dci_do_pending_wait(ucs_arbiter_t *arbiter,
                                      ucs_arbiter_elem_t *elem,
                                      void *arg);

ucs_arbiter_cb_result_t
uct_dc_mlx5_iface_dci_do_dcs_pending_tx(ucs_arbiter_t *arbiter,
                                        ucs_arbiter_elem_t *elem,
                                        void *arg);

ucs_arbiter_cb_result_t
uct_dc_mlx5_iface_dci_do_rand_pending_tx(ucs_arbiter_t *arbiter,
                                         ucs_arbiter_elem_t *elem,
                                         void *arg);

ucs_status_t uct_dc_mlx5_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *r,
                                        unsigned flags);
void uct_dc_mlx5_ep_pending_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, void *arg);

void uct_dc_mlx5_ep_pending_common(uct_dc_mlx5_iface_t *iface,
                                   uct_dc_mlx5_ep_t *ep, uct_pending_req_t *r,
                                   unsigned flags, int push_to_head);

void uct_dc_mlx5_ep_cleanup(uct_ep_h tl_ep, ucs_class_t *cls);

void uct_dc_mlx5_ep_release(uct_dc_mlx5_ep_t *ep);

static UCS_F_ALWAYS_INLINE uct_dc_mlx5_pending_req_priv_t *
uct_dc_mlx5_pending_req_priv(uct_pending_req_t *req)
{
    return (uct_dc_mlx5_pending_req_priv_t *)&(req)->priv;
}

static UCS_F_ALWAYS_INLINE int uct_dc_mlx5_iface_is_dci_rand(uct_dc_mlx5_iface_t *iface)
{
    return iface->tx.policy == UCT_DC_TX_POLICY_RAND;
}

static UCS_F_ALWAYS_INLINE ucs_arbiter_group_t*
uct_dc_mlx5_ep_rand_arb_group(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
{
    ucs_assert(uct_dc_mlx5_iface_is_dci_rand(iface) &&
               (ep->dci != UCT_DC_MLX5_EP_NO_DCI));
    /* If DCI random policy is used, DCI is always assigned to EP */
    return &iface->tx.dcis[ep->dci].arb_group;
}

static UCS_F_ALWAYS_INLINE ucs_arbiter_group_t*
uct_dc_mlx5_ep_arb_group(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
{
    return (uct_dc_mlx5_iface_is_dci_rand(iface)) ?
            uct_dc_mlx5_ep_rand_arb_group(iface, ep) : &ep->arb_group;
}

static UCS_F_ALWAYS_INLINE void
uct_dc_mlx5_iface_dci_sched_tx(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
{
    if (uct_dc_mlx5_iface_is_dci_rand(iface)) {
        ucs_arbiter_group_schedule(uct_dc_mlx5_iface_tx_waitq(iface),
                                   uct_dc_mlx5_ep_rand_arb_group(iface, ep));
    } else if (uct_dc_mlx5_iface_dci_has_tx_resources(iface, ep->dci)) {
        ucs_arbiter_group_schedule(uct_dc_mlx5_iface_tx_waitq(iface),
                                   &ep->arb_group);
    }
}

static UCS_F_ALWAYS_INLINE uct_dc_mlx5_ep_t *
uct_dc_mlx5_ep_from_dci(uct_dc_mlx5_iface_t *iface, uint8_t dci)
{
    /* Can be used with dcs* policies only, with rand policy every dci may
     * be used by many eps */
    ucs_assert(!uct_dc_mlx5_iface_is_dci_rand(iface));
    return iface->tx.dcis[dci].ep;
}

static UCS_F_ALWAYS_INLINE void
uct_dc_mlx5_ep_clear_fc_grant_flag(uct_dc_mlx5_iface_t *iface,
                                   uct_dc_mlx5_ep_t *ep)
{
    ucs_assert((ep->fc.flags & UCT_DC_MLX5_EP_FC_FLAG_WAIT_FOR_GRANT) &&
               iface->tx.fc_grants);
    ep->fc.flags &= ~UCT_DC_MLX5_EP_FC_FLAG_WAIT_FOR_GRANT;
    --iface->tx.fc_grants;
}

enum uct_dc_mlx5_ep_flags {
    UCT_DC_MLX5_EP_FLAG_TX_WAIT  = UCS_BIT(0), /* ep is in the tx_wait state. See
                                                  description of the dcs+quota dci
                                                  selection policy above */
    UCT_DC_MLX5_EP_FLAG_GRH      = UCS_BIT(1), /* ep has GRH address. Used by
                                                  dc_mlx5 endpoint */
    UCT_DC_MLX5_EP_FLAG_VALID    = UCS_BIT(2)  /* ep is a valid endpoint */
};


void uct_dc_mlx5_ep_handle_failure(uct_dc_mlx5_ep_t *ep, void *arg,
                                   ucs_status_t status);

static UCS_F_ALWAYS_INLINE ucs_status_t
uct_dc_mlx5_ep_basic_init(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
{
    ucs_arbiter_group_init(&ep->arb_group);

    if (uct_dc_mlx5_iface_is_dci_rand(iface)) {
        /* coverity[dont_call] */
        ep->dci = rand_r(&iface->tx.rand_seed) % iface->tx.ndci;
    } else {
        ep->dci = UCT_DC_MLX5_EP_NO_DCI;
    }

    /* valid = 1, global = 0, tx_wait = 0 */
    ep->flags = UCT_DC_MLX5_EP_FLAG_VALID;

    return uct_rc_fc_init(&ep->fc, iface->super.super.config.fc_wnd_size
                          UCS_STATS_ARG(ep->super.stats));
}

static UCS_F_ALWAYS_INLINE int
uct_dc_mlx5_iface_dci_can_alloc(uct_dc_mlx5_iface_t *iface)
{
    return iface->tx.stack_top < iface->tx.ndci;
}

static UCS_F_ALWAYS_INLINE void
uct_dc_mlx5_iface_progress_pending(uct_dc_mlx5_iface_t *iface)
{
    do {
        /**
         * Pending op on the tx_waitq can complete with the UCS_OK
         * status without actually sending anything on the dci.
         * In this case pending ops on the waitq may never be
         * scheduled.
         *
         * So we keep progressing pending while dci_waitq is not
         * empty and it is possible to allocate a dci.
         * NOTE: in case of rand dci allocation policy, dci_waitq is always
         * empty.
         */
        if (uct_dc_mlx5_iface_dci_can_alloc(iface) &&
            !uct_dc_mlx5_iface_is_dci_rand(iface)) {
            ucs_arbiter_dispatch(uct_dc_mlx5_iface_dci_waitq(iface), 1,
                                 uct_dc_mlx5_iface_dci_do_pending_wait, NULL);
        }
        ucs_arbiter_dispatch(uct_dc_mlx5_iface_tx_waitq(iface), 1,
                             iface->tx.pend_cb, NULL);

    } while (ucs_unlikely(!ucs_arbiter_is_empty(uct_dc_mlx5_iface_dci_waitq(iface)) &&
                           uct_dc_mlx5_iface_dci_can_alloc(iface)));
}

static inline int uct_dc_mlx5_iface_dci_ep_can_send(uct_dc_mlx5_ep_t *ep)
{
    uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_mlx5_iface_t);
    return (!(ep->flags & UCT_DC_MLX5_EP_FLAG_TX_WAIT)) &&
           uct_rc_fc_has_resources(&iface->super.super, &ep->fc) &&
           uct_dc_mlx5_iface_dci_has_tx_resources(iface, ep->dci);
}

static UCS_F_ALWAYS_INLINE
void uct_dc_mlx5_iface_schedule_dci_alloc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
{
    /* If FC window is empty the group will be scheduled when
     * grant is received */
    if (uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) {
        ucs_arbiter_group_schedule(uct_dc_mlx5_iface_dci_waitq(iface), &ep->arb_group);
    }
}

static inline void uct_dc_mlx5_iface_dci_put(uct_dc_mlx5_iface_t *iface, uint8_t dci)
{
    uct_dc_mlx5_ep_t *ep;

    if (uct_dc_mlx5_iface_is_dci_rand(iface)) {
        return;
    }

    ep = uct_dc_mlx5_ep_from_dci(iface, dci);

    ucs_assert(iface->tx.stack_top > 0);

    if (uct_dc_mlx5_iface_dci_has_outstanding(iface, dci)) {
        if (iface->tx.policy == UCT_DC_TX_POLICY_DCS_QUOTA) {
            /* in tx_wait state:
             * -  if there are no eps are waiting for dci allocation
             *    ep goes back to normal state
             */
            if (ep->flags & UCT_DC_MLX5_EP_FLAG_TX_WAIT) {
                if (!ucs_arbiter_is_empty(uct_dc_mlx5_iface_dci_waitq(iface))) {
                    return;
                }
                ep->flags &= ~UCT_DC_MLX5_EP_FLAG_TX_WAIT;
            }
        }
        ucs_arbiter_group_schedule(uct_dc_mlx5_iface_tx_waitq(iface), &ep->arb_group);
        return;
    }
    iface->tx.stack_top--;
    iface->tx.dcis_stack[iface->tx.stack_top] = dci;
#if UCS_ENABLE_ASSERT
    iface->tx.dcis[dci].flags = 0;
#endif

    if (ucs_unlikely(ep == NULL)) {
        return;
    }

    ucs_assert(uct_dc_mlx5_ep_from_dci(iface, dci)->dci != UCT_DC_MLX5_EP_NO_DCI);
    ep->dci    = UCT_DC_MLX5_EP_NO_DCI;
    ep->flags &= ~UCT_DC_MLX5_EP_FLAG_TX_WAIT;
    iface->tx.dcis[dci].ep = NULL;

    /* it is possible that dci is released while ep still has scheduled pending ops.
     * move the group to the 'wait for dci alloc' state
     */
    ucs_arbiter_group_desched(uct_dc_mlx5_iface_tx_waitq(iface), &ep->arb_group);
    uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep);
}

static inline void uct_dc_mlx5_iface_dci_alloc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
{
    /* take a first available dci from stack.
     * There is no need to check txqp because
     * dci must have resources to transmit.
     */
    ucs_assert(!uct_dc_mlx5_iface_is_dci_rand(iface));
    ep->dci = iface->tx.dcis_stack[iface->tx.stack_top];
    ucs_assert(ep->dci < iface->tx.ndci);
    ucs_assert(uct_dc_mlx5_ep_from_dci(iface, ep->dci) == NULL);
    ucs_assert(iface->tx.dcis[ep->dci].flags == 0);
    iface->tx.dcis[ep->dci].ep = ep;
    iface->tx.stack_top++;
}

static inline void uct_dc_mlx5_iface_dci_free(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
{
    uint8_t dci;

    if (uct_dc_mlx5_iface_is_dci_rand(iface)) {
        return;
    }

    dci = ep->dci;

    ucs_assert(dci != UCT_DC_MLX5_EP_NO_DCI);
    ucs_assert(iface->tx.stack_top > 0);

    if (uct_dc_mlx5_iface_dci_has_outstanding(iface, dci)) {
        return;
    }

    iface->tx.stack_top--;
    iface->tx.dcis_stack[iface->tx.stack_top] = dci;
    iface->tx.dcis[dci].ep                    = NULL;
#if UCS_ENABLE_ASSERT
    iface->tx.dcis[ep->dci].flags             = 0;
#endif

    ep->dci    = UCT_DC_MLX5_EP_NO_DCI;
    ep->flags &= ~UCT_DC_MLX5_EP_FLAG_TX_WAIT;
}

static inline ucs_status_t uct_dc_mlx5_iface_dci_get(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
{
    uct_rc_txqp_t *txqp;
    int16_t available;

    ucs_assert(!iface->super.super.config.tx_moderation);

    if (uct_dc_mlx5_iface_is_dci_rand(iface)) {
        if (uct_dc_mlx5_iface_dci_has_tx_resources(iface, ep->dci)) {
            return UCS_OK;
        } else {
            UCS_STATS_UPDATE_COUNTER(iface->tx.dcis[ep->dci].txqp.stats,
                                     UCT_RC_TXQP_STAT_QP_FULL, 1);
            goto out_no_res;
        }
    }

    if (ep->dci != UCT_DC_MLX5_EP_NO_DCI) {
        /* dci is already assigned - keep using it */
        if ((iface->tx.policy == UCT_DC_TX_POLICY_DCS_QUOTA) &&
            (ep->flags & UCT_DC_MLX5_EP_FLAG_TX_WAIT)) {
            goto out_no_res;
        }

        /* if dci has sent more than quota, and there are eps waiting for dci
         * allocation ep goes into tx_wait state.
         */
        txqp      = &iface->tx.dcis[ep->dci].txqp;
        available = uct_rc_txqp_available(txqp);
        if ((iface->tx.policy == UCT_DC_TX_POLICY_DCS_QUOTA) &&
            (available <= iface->tx.available_quota) &&
            !ucs_arbiter_is_empty(uct_dc_mlx5_iface_dci_waitq(iface)))
        {
            ep->flags |= UCT_DC_MLX5_EP_FLAG_TX_WAIT;
            goto out_no_res;
        }

        if (available <= 0) {
            UCS_STATS_UPDATE_COUNTER(txqp->stats, UCT_RC_TXQP_STAT_QP_FULL, 1);
            goto out_no_res;
        }

        return UCS_OK;
    }

    /* Do not alloc dci if no TX desc resources,
     * otherwise this dci may never be released. */
    if (uct_dc_mlx5_iface_dci_can_alloc(iface) &&
        uct_dc_mlx5_iface_has_tx_resources(iface)) {
        uct_dc_mlx5_iface_dci_alloc(iface, ep);
        return UCS_OK;
    }

out_no_res:
    /* we will have to wait until someone releases dci */
    UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1);
    return UCS_ERR_NO_RESOURCE;
}

static UCS_F_ALWAYS_INLINE int uct_dc_mlx5_ep_fc_wait_for_grant(uct_dc_mlx5_ep_t *ep)
{
    return ep->fc.flags & UCT_DC_MLX5_EP_FC_FLAG_WAIT_FOR_GRANT;
}

ucs_status_t uct_dc_mlx5_ep_check_fc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep);

static inline struct mlx5_grh_av *uct_dc_mlx5_ep_get_grh(uct_dc_mlx5_ep_t *ep)
{
   return (ep->flags & UCT_DC_MLX5_EP_FLAG_GRH) ?
          &(ucs_derived_of(ep, uct_dc_mlx5_grh_ep_t)->grh_av) : NULL;
}


#define UCT_DC_MLX5_TXQP_DECL(_txqp, _txwq) \
    uct_rc_txqp_t *_txqp; \
    uct_ib_mlx5_txwq_t *_txwq;


#define UCT_DC_MLX5_CHECK_RES(_iface, _ep) \
    { \
        ucs_status_t status = uct_dc_mlx5_iface_dci_get(_iface, _ep); \
        if (ucs_unlikely(status != UCS_OK)) { \
            return status; \
        } \
    }


#define UCT_DC_CHECK_RES_PTR(_iface, _ep) \
    { \
        ucs_status_t status = uct_dc_mlx5_iface_dci_get(_iface, _ep); \
        if (ucs_unlikely(status != UCS_OK)) { \
            return UCS_STATUS_PTR(status); \
        } \
    }


/* First, check whether we have FC window. If hard threshold is reached, credit
 * request will be sent by "fc_ctrl" as a separate message. TX resources
 * are checked after FC, because fc credits request may consume latest
 * available TX resources. */
#define UCT_DC_CHECK_RES_AND_FC(_iface, _ep) \
    { \
        if (ucs_unlikely((_ep)->fc.fc_wnd <= \
                         (_iface)->super.super.config.fc_hard_thresh)) { \
            ucs_status_t status = uct_dc_mlx5_ep_check_fc(_iface, _ep); \
            if (ucs_unlikely(status != UCS_OK)) { \
                if (((_ep)->dci != UCT_DC_MLX5_EP_NO_DCI) && \
                    !uct_dc_mlx5_iface_is_dci_rand(_iface)) { \
                    ucs_assertv_always(uct_dc_mlx5_iface_dci_has_outstanding(_iface, (_ep)->dci), \
                                       "iface (%p) ep (%p) dci leak detected: dci=%d", \
                                       _iface, _ep, (_ep)->dci); \
                } \
                return status; \
            } \
        } \
        UCT_DC_MLX5_CHECK_RES(_iface, _ep) \
    }


#endif