/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
#include "ib_mlx5.h"
#include "ib_mlx5.inl"
#include "ib_mlx5_log.h"
#include <uct/ib/mlx5/exp/ib_exp.h>
#include <uct/ib/base/ib_verbs.h>
#include <uct/ib/base/ib_device.h>
#include <ucs/arch/bitops.h>
#include <ucs/debug/log.h>
#include <ucs/sys/compiler.h>
#include <ucs/sys/sys.h>
#include <string.h>
static const char *uct_ib_mlx5_mmio_modes[] = {
[UCT_IB_MLX5_MMIO_MODE_BF_POST] = "bf_post",
[UCT_IB_MLX5_MMIO_MODE_BF_POST_MT] = "bf_post_mt",
[UCT_IB_MLX5_MMIO_MODE_DB] = "db",
[UCT_IB_MLX5_MMIO_MODE_AUTO] = "auto",
[UCT_IB_MLX5_MMIO_MODE_LAST] = NULL
};
ucs_config_field_t uct_ib_mlx5_iface_config_table[] = {
#if HAVE_IBV_DM
{"DM_SIZE", "2k",
"Device Memory segment size (0 - disabled)",
ucs_offsetof(uct_ib_mlx5_iface_config_t, dm.seg_len), UCS_CONFIG_TYPE_MEMUNITS},
{"DM_COUNT", "1",
"Device Memory segments count (0 - disabled)",
ucs_offsetof(uct_ib_mlx5_iface_config_t, dm.count), UCS_CONFIG_TYPE_UINT},
#endif
{"MMIO_MODE", "auto",
"How to write to MMIO register when posting sends on a QP. One of the following:\n"
" bf_post - BlueFlame post, write the WQE fully to MMIO register.\n"
" bf_post_mt - Thread-safe BlueFlame, same as bf_post but same MMIO register can be used\n"
" by multiple threads.\n"
" db - Doorbell mode, write only 8 bytes to MMIO register, followed by a memory\n"
" store fence, which makes sure the doorbell goes out on the bus.\n"
" auto - Select best according to worker thread mode.",
ucs_offsetof(uct_ib_mlx5_iface_config_t, mmio_mode),
UCS_CONFIG_TYPE_ENUM(uct_ib_mlx5_mmio_modes)},
{NULL}
};
ucs_status_t uct_ib_mlx5_create_cq(struct ibv_context *context, int cqe,
struct ibv_comp_channel *channel,
int comp_vector, int ignore_overrun,
size_t *inl, struct ibv_cq **cq_p)
{
#if HAVE_DECL_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE
struct ibv_cq *cq;
struct ibv_cq_init_attr_ex cq_attr = {};
struct mlx5dv_cq_init_attr dv_attr = {};
cq_attr.cqe = cqe;
cq_attr.channel = channel;
cq_attr.comp_vector = comp_vector;
if (ignore_overrun) {
cq_attr.comp_mask = IBV_CQ_INIT_ATTR_MASK_FLAGS;
cq_attr.flags = IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN;
}
dv_attr.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE;
dv_attr.cqe_size = uct_ib_get_cqe_size(*inl > 32 ? 128 : 64);
cq = ibv_cq_ex_to_cq(mlx5dv_create_cq(context, &cq_attr, &dv_attr));
if (!cq) {
ucs_error("mlx5dv_create_cq(cqe=%d) failed: %m", cqe);
return UCS_ERR_IO_ERROR;
}
*cq_p = cq;
*inl = dv_attr.cqe_size / 2;
return UCS_OK;
#else
return uct_ib_verbs_create_cq(context, cqe, channel, comp_vector,
ignore_overrun, inl, cq_p);
#endif
}
ucs_status_t uct_ib_mlx5_get_cq(struct ibv_cq *cq, uct_ib_mlx5_cq_t *mlx5_cq)
{
uct_ib_mlx5dv_cq_t dcq = {};
uct_ib_mlx5dv_t obj = {};
struct mlx5_cqe64 *cqe;
unsigned cqe_size;
ucs_status_t status;
int ret, i;
obj.dv.cq.in = cq;
obj.dv.cq.out = &dcq.dv;
status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ);
if (status != UCS_OK) {
return UCS_ERR_IO_ERROR;
}
mlx5_cq->cq_buf = dcq.dv.buf;
mlx5_cq->cq_ci = 0;
mlx5_cq->cq_sn = 0;
mlx5_cq->cq_length = dcq.dv.cqe_cnt;
mlx5_cq->cq_num = dcq.dv.cqn;
#if HAVE_STRUCT_MLX5DV_CQ_CQ_UAR
mlx5_cq->uar = dcq.dv.cq_uar;
#else
/* coverity[var_deref_model] */
mlx5_cq->uar = uct_dv_get_info_uar0(dcq.dv.uar);
#endif
mlx5_cq->dbrec = dcq.dv.dbrec;
cqe_size = dcq.dv.cqe_size;
/* Move buffer forward for 128b CQE, so we would get pointer to the 2nd
* 64b when polling.
*/
mlx5_cq->cq_buf = UCS_PTR_BYTE_OFFSET(mlx5_cq->cq_buf,
cqe_size - sizeof(struct mlx5_cqe64));
ret = ibv_exp_cq_ignore_overrun(cq);
if (ret != 0) {
ucs_error("Failed to modify send CQ to ignore overrun: %s", strerror(ret));
return UCS_ERR_UNSUPPORTED;
}
mlx5_cq->cqe_size_log = ucs_ilog2(cqe_size);
ucs_assert_always((1ul << mlx5_cq->cqe_size_log) == cqe_size);
/* Set owner bit for all CQEs, so that CQE would look like it is in HW
* ownership. In this case CQ polling functions will return immediately if
* no any CQE ready, there is no need to check opcode for
* MLX5_CQE_INVALID value anymore. */
for (i = 0; i < mlx5_cq->cq_length; ++i) {
cqe = uct_ib_mlx5_get_cqe(mlx5_cq, i);
cqe->op_own |= MLX5_CQE_OWNER_MASK;
}
return UCS_OK;
}
static int
uct_ib_mlx5_res_domain_cmp(uct_ib_mlx5_res_domain_t *res_domain,
uct_ib_md_t *md, uct_priv_worker_t *worker)
{
#if HAVE_IBV_EXP_RES_DOMAIN
return res_domain->ibv_domain->context == md->dev.ibv_context;
#elif HAVE_DECL_IBV_ALLOC_TD
return res_domain->pd->context == md->dev.ibv_context;
#else
return 1;
#endif
}
static ucs_status_t
uct_ib_mlx5_res_domain_init(uct_ib_mlx5_res_domain_t *res_domain,
uct_ib_md_t *md, uct_priv_worker_t *worker)
{
#if HAVE_IBV_EXP_RES_DOMAIN
struct ibv_exp_res_domain_init_attr attr;
attr.comp_mask = IBV_EXP_RES_DOMAIN_THREAD_MODEL |
IBV_EXP_RES_DOMAIN_MSG_MODEL;
attr.msg_model = IBV_EXP_MSG_LOW_LATENCY;
switch (worker->thread_mode) {
case UCS_THREAD_MODE_SINGLE:
attr.thread_model = IBV_EXP_THREAD_SINGLE;
break;
case UCS_THREAD_MODE_SERIALIZED:
attr.thread_model = IBV_EXP_THREAD_UNSAFE;
break;
default:
attr.thread_model = IBV_EXP_THREAD_SAFE;
break;
}
res_domain->ibv_domain = ibv_exp_create_res_domain(md->dev.ibv_context, &attr);
if (res_domain->ibv_domain == NULL) {
ucs_error("ibv_exp_create_res_domain() on %s failed: %m",
uct_ib_device_name(&md->dev));
return UCS_ERR_IO_ERROR;
}
#elif HAVE_DECL_IBV_ALLOC_TD
struct ibv_parent_domain_init_attr attr;
struct ibv_td_init_attr td_attr;
if (worker->thread_mode == UCS_THREAD_MODE_MULTI) {
td_attr.comp_mask = 0;
res_domain->td = ibv_alloc_td(md->dev.ibv_context, &td_attr);
if (res_domain->td == NULL) {
ucs_error("ibv_alloc_td() on %s failed: %m",
uct_ib_device_name(&md->dev));
return UCS_ERR_IO_ERROR;
}
} else {
res_domain->td = NULL;
res_domain->pd = md->pd;
return UCS_OK;
}
attr.td = res_domain->td;
attr.pd = md->pd;
attr.comp_mask = 0;
res_domain->pd = ibv_alloc_parent_domain(md->dev.ibv_context, &attr);
if (res_domain->pd == NULL) {
ucs_error("ibv_alloc_parent_domain() on %s failed: %m",
uct_ib_device_name(&md->dev));
ibv_dealloc_td(res_domain->td);
return UCS_ERR_IO_ERROR;
}
#endif
return UCS_OK;
}
static void uct_ib_mlx5_res_domain_cleanup(uct_ib_mlx5_res_domain_t *res_domain)
{
#if HAVE_IBV_EXP_RES_DOMAIN
struct ibv_exp_destroy_res_domain_attr attr;
int ret;
attr.comp_mask = 0;
ret = ibv_exp_destroy_res_domain(res_domain->ibv_domain->context,
res_domain->ibv_domain, &attr);
if (ret != 0) {
ucs_warn("ibv_exp_destroy_res_domain() failed: %m");
}
#elif HAVE_DECL_IBV_ALLOC_TD
int ret;
if (res_domain->td != NULL) {
ret = ibv_dealloc_pd(res_domain->pd);
if (ret != 0) {
ucs_warn("ibv_dealloc_pd() failed: %m");
return;
}
ret = ibv_dealloc_td(res_domain->td);
if (ret != 0) {
ucs_warn("ibv_dealloc_td() failed: %m");
}
}
#endif
}
ucs_status_t uct_ib_mlx5_iface_get_res_domain(uct_ib_iface_t *iface,
uct_ib_mlx5_qp_t *qp)
{
qp->verbs.rd = uct_worker_tl_data_get(iface->super.worker,
UCT_IB_MLX5_RES_DOMAIN_KEY,
uct_ib_mlx5_res_domain_t,
uct_ib_mlx5_res_domain_cmp,
uct_ib_mlx5_res_domain_init,
uct_ib_iface_md(iface),
iface->super.worker);
if (UCS_PTR_IS_ERR(qp->verbs.rd)) {
return UCS_PTR_STATUS(qp->verbs.rd);
}
qp->type = UCT_IB_MLX5_OBJ_TYPE_VERBS;
return UCS_OK;
}
void uct_ib_mlx5_iface_put_res_domain(uct_ib_mlx5_qp_t *qp)
{
if (qp->type == UCT_IB_MLX5_OBJ_TYPE_VERBS) {
uct_worker_tl_data_put(qp->verbs.rd, uct_ib_mlx5_res_domain_cleanup);
}
}
ucs_status_t uct_ib_mlx5_iface_create_qp(uct_ib_iface_t *iface,
uct_ib_mlx5_qp_t *qp,
uct_ib_qp_attr_t *attr)
{
ucs_status_t status;
status = uct_ib_mlx5_iface_fill_attr(iface, qp, attr);
if (status != UCS_OK) {
return status;
}
uct_ib_exp_qp_fill_attr(iface, attr);
status = uct_ib_iface_create_qp(iface, attr, &qp->verbs.qp);
if (status != UCS_OK) {
return status;
}
qp->qp_num = qp->verbs.qp->qp_num;
return UCS_OK;
}
#if !HAVE_DEVX
ucs_status_t uct_ib_mlx5_get_compact_av(uct_ib_iface_t *iface, int *compact_av)
{
struct mlx5_wqe_av mlx5_av;
struct ibv_ah *ah;
uct_ib_address_t *ib_addr;
ucs_status_t status;
struct ibv_ah_attr ah_attr;
/* coverity[result_independent_of_operands] */
ib_addr = ucs_alloca((size_t)iface->addr_size);
status = uct_ib_iface_get_device_address(&iface->super.super,
(uct_device_addr_t*)ib_addr);
if (status != UCS_OK) {
return status;
}
uct_ib_iface_fill_ah_attr_from_addr(iface, ib_addr, &ah_attr);
ah_attr.is_global = iface->config.force_global_addr;
status = uct_ib_iface_create_ah(iface, &ah_attr, &ah);
if (status != UCS_OK) {
return status;
}
uct_ib_mlx5_get_av(ah, &mlx5_av);
/* copy MLX5_EXTENDED_UD_AV from the driver, if the flag is not present then
* the device supports compact address vector. */
*compact_av = !(mlx5_av_base(&mlx5_av)->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV);
return UCS_OK;
}
#endif
void uct_ib_mlx5_check_completion(uct_ib_iface_t *iface, uct_ib_mlx5_cq_t *cq,
struct mlx5_cqe64 *cqe)
{
ucs_status_t status;
switch (cqe->op_own >> 4) {
case MLX5_CQE_REQ_ERR:
/* update ci before invoking error callback, since it can poll on cq */
UCS_STATIC_ASSERT(MLX5_CQE_REQ_ERR & (UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK >> 4));
++cq->cq_ci;
status = uct_ib_mlx5_completion_with_err(iface, (void*)cqe, NULL,
UCS_LOG_LEVEL_DEBUG);
iface->ops->handle_failure(iface, cqe, status);
return;
case MLX5_CQE_RESP_ERR:
/* Local side failure - treat as fatal */
UCS_STATIC_ASSERT(MLX5_CQE_RESP_ERR & (UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK >> 4));
++cq->cq_ci;
uct_ib_mlx5_completion_with_err(iface, (void*)cqe, NULL,
UCS_LOG_LEVEL_FATAL);
return;
default:
/* CQE might have been updated by HW. Skip it now, and it would be handled
* in next polling. */
return;
}
}
static int uct_ib_mlx5_mmio_cmp(uct_ib_mlx5_mmio_reg_t *reg, uintptr_t addr,
unsigned bf_size)
{
return (reg->addr.uint & ~UCT_IB_MLX5_BF_REG_SIZE) ==
(addr & ~UCT_IB_MLX5_BF_REG_SIZE);
}
static ucs_status_t uct_ib_mlx5_mmio_init(uct_ib_mlx5_mmio_reg_t *reg,
uintptr_t addr,
uct_ib_mlx5_mmio_mode_t mmio_mode)
{
reg->addr.uint = addr;
reg->mode = mmio_mode;
return UCS_OK;
}
static void uct_ib_mlx5_mmio_cleanup(uct_ib_mlx5_mmio_reg_t *reg)
{
}
int uct_ib_mlx5_devx_uar_cmp(uct_ib_mlx5_devx_uar_t *uar,
uct_ib_mlx5_md_t *md,
uct_ib_mlx5_mmio_mode_t mmio_mode)
{
return uar->ctx == md->super.dev.ibv_context;
}
ucs_status_t uct_ib_mlx5_devx_uar_init(uct_ib_mlx5_devx_uar_t *uar,
uct_ib_mlx5_md_t *md,
uct_ib_mlx5_mmio_mode_t mmio_mode)
{
#if HAVE_DEVX
uar->uar = mlx5dv_devx_alloc_uar(md->super.dev.ibv_context, 0);
if (uar->uar == NULL) {
ucs_error("mlx5dv_devx_alloc_uar() failed: %m");
return UCS_ERR_NO_MEMORY;
}
uar->super.addr.ptr = uar->uar->reg_addr;
uar->super.mode = mmio_mode;
uar->ctx = md->super.dev.ibv_context;
return UCS_OK;
#else
return UCS_ERR_UNSUPPORTED;
#endif
}
void uct_ib_mlx5_devx_uar_cleanup(uct_ib_mlx5_devx_uar_t *uar)
{
#if HAVE_DEVX
mlx5dv_devx_free_uar(uar->uar);
#endif
}
ucs_status_t uct_ib_mlx5_txwq_init_devx(uct_priv_worker_t *worker,
uct_ib_mlx5_md_t *md,
uct_ib_mlx5_txwq_t *txwq,
uct_ib_mlx5_mmio_mode_t mode)
{
uct_ib_mlx5_devx_uar_t *uar;
uar = uct_worker_tl_data_get(worker,
UCT_IB_MLX5_DEVX_UAR_KEY,
uct_ib_mlx5_devx_uar_t,
uct_ib_mlx5_devx_uar_cmp,
uct_ib_mlx5_devx_uar_init,
md, mode);
if (UCS_PTR_IS_ERR(uar)) {
return UCS_PTR_STATUS(uar);
}
txwq->reg = &uar->super;
txwq->super.type = UCT_IB_MLX5_OBJ_TYPE_DEVX;
return UCS_OK;
}
void uct_ib_mlx5_txwq_reset(uct_ib_mlx5_txwq_t *txwq)
{
txwq->curr = txwq->qstart;
txwq->sw_pi = 0;
txwq->prev_sw_pi = UINT16_MAX;
#if UCS_ENABLE_ASSERT
txwq->hw_ci = 0xFFFF;
#endif
uct_ib_fence_info_init(&txwq->fi);
memset(txwq->qstart, 0, UCS_PTR_BYTE_DIFF(txwq->qstart, txwq->qend));
}
ucs_status_t uct_ib_mlx5_txwq_init(uct_priv_worker_t *worker,
uct_ib_mlx5_mmio_mode_t cfg_mmio_mode,
uct_ib_mlx5_txwq_t *txwq,
struct ibv_qp *verbs_qp)
{
uct_ib_mlx5_mmio_mode_t mmio_mode;
uct_ib_mlx5dv_qp_t qp_info = {};
uct_ib_mlx5dv_t obj = {};
ucs_status_t status;
obj.dv.qp.in = verbs_qp;
obj.dv.qp.out = &qp_info.dv;
status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_QP);
if (status != UCS_OK) {
return UCS_ERR_IO_ERROR;
}
if ((qp_info.dv.sq.stride != MLX5_SEND_WQE_BB) || !ucs_is_pow2(qp_info.dv.sq.wqe_cnt) ||
((qp_info.dv.bf.size != 0) && (qp_info.dv.bf.size != UCT_IB_MLX5_BF_REG_SIZE)))
{
ucs_error("mlx5 device parameters not suitable for transport "
"bf.size(%d) %d, sq.stride(%d) %d, wqe_cnt %d",
UCT_IB_MLX5_BF_REG_SIZE, qp_info.dv.bf.size,
MLX5_SEND_WQE_BB, qp_info.dv.sq.stride, qp_info.dv.sq.wqe_cnt);
return UCS_ERR_IO_ERROR;
}
if (cfg_mmio_mode != UCT_IB_MLX5_MMIO_MODE_AUTO) {
mmio_mode = cfg_mmio_mode;
} else if (qp_info.dv.bf.size > 0) {
if (worker->thread_mode == UCS_THREAD_MODE_SINGLE) {
mmio_mode = UCT_IB_MLX5_MMIO_MODE_BF_POST;
} else if (worker->thread_mode == UCS_THREAD_MODE_SERIALIZED) {
mmio_mode = UCT_IB_MLX5_MMIO_MODE_BF_POST_MT;
} else {
ucs_error("unsupported thread mode for mlx5: %d", worker->thread_mode);
return UCS_ERR_UNSUPPORTED;
}
} else {
mmio_mode = UCT_IB_MLX5_MMIO_MODE_DB;
}
ucs_debug("tx wq %d bytes [bb=%d, nwqe=%d] mmio_mode %s",
qp_info.dv.sq.stride * qp_info.dv.sq.wqe_cnt,
qp_info.dv.sq.stride, qp_info.dv.sq.wqe_cnt,
uct_ib_mlx5_mmio_modes[mmio_mode]);
txwq->qstart = qp_info.dv.sq.buf;
txwq->qend = UCS_PTR_BYTE_OFFSET(qp_info.dv.sq.buf,
qp_info.dv.sq.stride * qp_info.dv.sq.wqe_cnt);
txwq->reg = uct_worker_tl_data_get(worker,
UCT_IB_MLX5_WORKER_BF_KEY,
uct_ib_mlx5_mmio_reg_t,
uct_ib_mlx5_mmio_cmp,
uct_ib_mlx5_mmio_init,
(uintptr_t)qp_info.dv.bf.reg,
mmio_mode);
if (UCS_PTR_IS_ERR(txwq->reg)) {
return UCS_PTR_STATUS(txwq->reg);
}
/* cppcheck-suppress autoVariables */
txwq->dbrec = &qp_info.dv.dbrec[MLX5_SND_DBR];
/* need to reserve 2x because:
* - on completion we only get the index of last wqe and we do not
* really know how many bb is there (but no more than max bb
* - on send we check that there is at least one bb. We know
* exact number of bbs once we actually are sending.
*/
txwq->bb_max = qp_info.dv.sq.wqe_cnt - 2 * UCT_IB_MLX5_MAX_BB;
ucs_assert_always(txwq->bb_max > 0);
uct_ib_mlx5_txwq_reset(txwq);
return UCS_OK;
}
void uct_ib_mlx5_txwq_cleanup(uct_ib_mlx5_txwq_t* txwq)
{
uct_ib_mlx5_devx_uar_t *uar = ucs_derived_of(txwq->reg,
uct_ib_mlx5_devx_uar_t);
switch (txwq->super.type) {
case UCT_IB_MLX5_OBJ_TYPE_DEVX:
uct_worker_tl_data_put(uar, uct_ib_mlx5_devx_uar_cleanup);
break;
case UCT_IB_MLX5_OBJ_TYPE_VERBS:
uct_ib_mlx5_iface_put_res_domain(&txwq->super);
uct_worker_tl_data_put(txwq->reg, uct_ib_mlx5_mmio_cleanup);
break;
case UCT_IB_MLX5_OBJ_TYPE_LAST:
if (txwq->reg != NULL) {
uct_worker_tl_data_put(txwq->reg, uct_ib_mlx5_mmio_cleanup);
}
}
}
ucs_status_t uct_ib_mlx5_get_rxwq(struct ibv_qp *verbs_qp, uct_ib_mlx5_rxwq_t *rxwq)
{
uct_ib_mlx5dv_qp_t qp_info = {};
uct_ib_mlx5dv_t obj = {};
ucs_status_t status;
obj.dv.qp.in = verbs_qp;
obj.dv.qp.out = &qp_info.dv;
status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_QP);
if (status != UCS_OK) {
return UCS_ERR_IO_ERROR;
}
if (!ucs_is_pow2(qp_info.dv.rq.wqe_cnt) ||
qp_info.dv.rq.stride != sizeof(struct mlx5_wqe_data_seg)) {
ucs_error("mlx5 rx wq [count=%d stride=%d] has invalid parameters",
qp_info.dv.rq.wqe_cnt,
qp_info.dv.rq.stride);
return UCS_ERR_IO_ERROR;
}
rxwq->wqes = qp_info.dv.rq.buf;
rxwq->rq_wqe_counter = 0;
rxwq->cq_wqe_counter = 0;
rxwq->mask = qp_info.dv.rq.wqe_cnt - 1;
/* cppcheck-suppress autoVariables */
rxwq->dbrec = &qp_info.dv.dbrec[MLX5_RCV_DBR];
memset(rxwq->wqes, 0, qp_info.dv.rq.wqe_cnt * sizeof(struct mlx5_wqe_data_seg));
return UCS_OK;
}
ucs_status_t uct_ib_mlx5_srq_init(uct_ib_mlx5_srq_t *srq, struct ibv_srq *verbs_srq,
size_t sg_byte_count, int sge_num)
{
uct_ib_mlx5dv_srq_t srq_info = {};
uct_ib_mlx5dv_t obj = {};
ucs_status_t status;
uint16_t stride;
obj.dv.srq.in = verbs_srq;
obj.dv.srq.out = &srq_info.dv;
status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_SRQ);
if (status != UCS_OK) {
return status;
}
if (srq_info.dv.head != 0) {
ucs_error("SRQ head is not 0 (%d)", srq_info.dv.head);
return UCS_ERR_NO_DEVICE;
}
stride = uct_ib_mlx5_srq_stride(sge_num);
if (srq_info.dv.stride != stride) {
ucs_error("SRQ stride is not %u (%d), sgenum %d",
stride, srq_info.dv.stride, sge_num);
return UCS_ERR_NO_DEVICE;
}
if (!ucs_is_pow2(srq_info.dv.tail + 1)) {
ucs_error("SRQ length is not power of 2 (%d)", srq_info.dv.tail + 1);
return UCS_ERR_NO_DEVICE;
}
srq->buf = srq_info.dv.buf;
srq->db = srq_info.dv.dbrec;
uct_ib_mlx5_srq_buff_init(srq, srq_info.dv.head, srq_info.dv.tail,
sg_byte_count, sge_num);
return UCS_OK;
}
void uct_ib_mlx5_srq_buff_init(uct_ib_mlx5_srq_t *srq, uint32_t head,
uint32_t tail, size_t sg_byte_count, int sge_num)
{
uct_ib_mlx5_srq_seg_t *seg;
unsigned i, j;
srq->free_idx = tail;
srq->ready_idx = UINT16_MAX;
srq->sw_pi = UINT16_MAX;
srq->mask = tail;
srq->tail = tail;
srq->stride = uct_ib_mlx5_srq_stride(sge_num);
for (i = head; i <= tail; ++i) {
seg = uct_ib_mlx5_srq_get_wqe(srq, i);
seg->srq.next_wqe_index = htons((i + 1) & tail);
seg->srq.ptr_mask = 0;
seg->srq.free = 0;
seg->srq.desc = NULL;
seg->srq.strides = sge_num;
for (j = 0; j < sge_num; ++j) {
seg->dptr[j].byte_count = htonl(sg_byte_count);
}
}
}
void uct_ib_mlx5_srq_cleanup(uct_ib_mlx5_srq_t *srq, struct ibv_srq *verbs_srq)
{
uct_ib_mlx5dv_srq_t srq_info = {};
uct_ib_mlx5dv_t obj = {};
ucs_status_t status;
if (srq->type != UCT_IB_MLX5_OBJ_TYPE_VERBS) {
return;
}
/* check if mlx5 driver didn't modified SRQ */
obj.dv.srq.in = verbs_srq;
obj.dv.srq.out = &srq_info.dv;
status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_SRQ);
ucs_assert_always(status == UCS_OK);
ucs_assertv_always(srq->tail == srq_info.dv.tail, "srq->tail=%d srq_info.tail=%d",
srq->tail, srq_info.dv.tail);
}