/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
#include "ib_mlx5_ifc.h"
#include <uct/ib/mlx5/ib_mlx5.h>
#include <ucs/arch/bitops.h>
#if HAVE_DECL_MLX5DV_INIT_OBJ
ucs_status_t uct_ib_mlx5dv_init_obj(uct_ib_mlx5dv_t *obj, uint64_t type)
{
int ret;
ret = mlx5dv_init_obj(&obj->dv, type);
#if HAVE_IBV_EXP_DM
if (!ret && (type & MLX5DV_OBJ_DM)) {
ret = uct_ib_mlx5_get_dm_info(obj->dv_dm.in, obj->dv_dm.out);
}
#endif
if (ret != 0) {
ucs_error("DV failed to get mlx5 information. Type %lx.", type);
return UCS_ERR_NO_DEVICE;
}
return UCS_OK;
}
#endif
#if HAVE_DEVX
ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
uct_ib_mlx5_qp_t *qp,
uct_ib_mlx5_txwq_t *tx,
uct_ib_qp_attr_t *attr)
{
uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.md, uct_ib_mlx5_md_t);
uct_ib_device_t *dev = &md->super.dev;
char in[UCT_IB_MLX5DV_ST_SZ_BYTES(create_qp_in)] = {};
char out[UCT_IB_MLX5DV_ST_SZ_BYTES(create_qp_out)] = {};
char in_2init[UCT_IB_MLX5DV_ST_SZ_BYTES(rst2init_qp_in)] = {};
char out_2init[UCT_IB_MLX5DV_ST_SZ_BYTES(rst2init_qp_out)] = {};
ucs_status_t status = UCS_ERR_NO_MEMORY;
struct mlx5dv_pd dvpd = {};
struct mlx5dv_cq dvscq = {};
struct mlx5dv_cq dvrcq = {};
struct mlx5dv_srq dvsrq = {};
struct mlx5dv_obj dv = {};
uct_ib_mlx5_devx_uar_t *uar;
int max_tx, max_rx, len_tx, len;
int wqe_size;
int dvflags;
void *qpc;
int ret;
uct_ib_iface_fill_attr(iface, attr);
uar = uct_worker_tl_data_get(iface->super.worker,
UCT_IB_MLX5_DEVX_UAR_KEY,
uct_ib_mlx5_devx_uar_t,
uct_ib_mlx5_devx_uar_cmp,
uct_ib_mlx5_devx_uar_init,
md, UCT_IB_MLX5_MMIO_MODE_BF_POST);
if (UCS_PTR_IS_ERR(uar)) {
status = UCS_PTR_STATUS(uar);
goto err;
}
wqe_size = sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_umr_ctrl_seg) +
sizeof(struct mlx5_wqe_mkey_context_seg) +
ucs_max(sizeof(struct mlx5_wqe_umr_klm_seg), 64) +
ucs_max(attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg),
ucs_align_up(sizeof(struct mlx5_wqe_inl_data_seg) +
attr->cap.max_inline_data, 16));
len_tx = ucs_roundup_pow2_or0(attr->cap.max_send_wr * wqe_size);
max_tx = len_tx / MLX5_SEND_WQE_BB;
max_rx = ucs_roundup_pow2_or0(attr->cap.max_recv_wr);
len = len_tx + max_rx * UCT_IB_MLX5_MAX_BB * UCT_IB_MLX5_WQE_SEG_SIZE;
if (tx != NULL) {
ret = ucs_posix_memalign(&qp->devx.wq_buf, ucs_get_page_size(), len,
"qp umem");
if (ret != 0) {
ucs_error("failed to allocate QP buffer of %d bytes: %m", len);
goto err_uar;
}
qp->devx.mem = mlx5dv_devx_umem_reg(dev->ibv_context, qp->devx.wq_buf, len, 0);
if (!qp->devx.mem) {
ucs_error("mlx5dv_devx_umem_reg() failed: %m");
goto err_free_buf;
}
} else {
qp->devx.wq_buf = qp->devx.mem = NULL;
}
qp->devx.dbrec = uct_ib_mlx5_get_dbrec(md);
if (!qp->devx.dbrec) {
goto err_free_mem;
}
dv.pd.in = attr->ibv.pd;
dv.pd.out = &dvpd;
dv.cq.in = attr->ibv.send_cq;
dv.cq.out = &dvscq;
dvflags = MLX5DV_OBJ_PD | MLX5DV_OBJ_CQ;
if (attr->srq) {
dv.srq.in = attr->srq;
dvflags |= MLX5DV_OBJ_SRQ;
dv.srq.out = &dvsrq;
dvsrq.comp_mask = MLX5DV_SRQ_MASK_SRQN;
} else {
dvsrq.srqn = attr->srq_num;
}
mlx5dv_init_obj(&dv, dvflags);
dv.cq.in = attr->ibv.recv_cq;
dv.cq.out = &dvrcq;
mlx5dv_init_obj(&dv, MLX5DV_OBJ_CQ);
UCT_IB_MLX5DV_SET(create_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_CREATE_QP);
qpc = UCT_IB_MLX5DV_ADDR_OF(create_qp_in, in, qpc);
UCT_IB_MLX5DV_SET(qpc, qpc, st, UCT_IB_MLX5_QPC_ST_RC);
UCT_IB_MLX5DV_SET(qpc, qpc, pm_state, UCT_IB_MLX5_QPC_PM_STATE_MIGRATED);
UCT_IB_MLX5DV_SET(qpc, qpc, pd, dvpd.pdn);
UCT_IB_MLX5DV_SET(qpc, qpc, uar_page, uar->uar->page_id);
UCT_IB_MLX5DV_SET(qpc, qpc, rq_type, !!dvsrq.srqn);
UCT_IB_MLX5DV_SET(qpc, qpc, srqn_rmpn_xrqn, dvsrq.srqn);
UCT_IB_MLX5DV_SET(qpc, qpc, cqn_snd, dvscq.cqn);
UCT_IB_MLX5DV_SET(qpc, qpc, cqn_rcv, dvrcq.cqn);
UCT_IB_MLX5DV_SET(qpc, qpc, log_sq_size, ucs_ilog2_or0(max_tx));
UCT_IB_MLX5DV_SET(qpc, qpc, log_rq_size, ucs_ilog2_or0(max_rx));
UCT_IB_MLX5DV_SET(qpc, qpc, cs_req, UCT_IB_MLX5_QPC_CS_REQ_UP_TO_64B);
UCT_IB_MLX5DV_SET(qpc, qpc, cs_res,
uct_ib_mlx5_qpc_cs_res(attr->max_inl_resp));
UCT_IB_MLX5DV_SET64(qpc, qpc, dbr_addr, qp->devx.dbrec->offset);
UCT_IB_MLX5DV_SET(qpc, qpc, dbr_umem_id, qp->devx.dbrec->mem_id);
if (qp->devx.mem == NULL) {
UCT_IB_MLX5DV_SET(qpc, qpc, no_sq, true);
UCT_IB_MLX5DV_SET(qpc, qpc, offload_type, true);
UCT_IB_MLX5DV_SET(create_qp_in, in, wq_umem_id, md->zero_mem->umem_id);
} else {
UCT_IB_MLX5DV_SET(create_qp_in, in, wq_umem_id, qp->devx.mem->umem_id);
}
status = UCS_ERR_IO_ERROR;
qp->devx.obj = mlx5dv_devx_obj_create(dev->ibv_context, in, sizeof(in),
out, sizeof(out));
if (!qp->devx.obj) {
ucs_error("mlx5dv_devx_obj_create(QP) failed, syndrome %x: %m",
UCT_IB_MLX5DV_GET(create_qp_out, out, syndrome));
goto err_free_db;
}
qp->qp_num = UCT_IB_MLX5DV_GET(create_qp_out, out, qpn);
qpc = UCT_IB_MLX5DV_ADDR_OF(rst2init_qp_in, in_2init, qpc);
UCT_IB_MLX5DV_SET(rst2init_qp_in, in_2init, opcode, UCT_IB_MLX5_CMD_OP_RST2INIT_QP);
UCT_IB_MLX5DV_SET(rst2init_qp_in, in_2init, qpn, qp->qp_num);
UCT_IB_MLX5DV_SET(qpc, qpc, pm_state, UCT_IB_MLX5_QPC_PM_STATE_MIGRATED);
UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port);
UCT_IB_MLX5DV_SET(qpc, qpc, rwe, true);
ret = mlx5dv_devx_obj_modify(qp->devx.obj, in_2init, sizeof(in_2init),
out_2init, sizeof(out_2init));
if (ret) {
ucs_error("mlx5dv_devx_obj_modify(2INIT_QP) failed, syndrome %x: %m",
UCT_IB_MLX5DV_GET(rst2init_qp_out, out_2init, syndrome));
goto err_free;
}
qp->type = UCT_IB_MLX5_OBJ_TYPE_DEVX;
attr->cap.max_send_wr = max_tx;
attr->cap.max_recv_wr = max_rx;
if (tx != NULL) {
tx->reg = &uar->super;
tx->qstart = qp->devx.wq_buf;
tx->qend = UCS_PTR_BYTE_OFFSET(qp->devx.wq_buf, len_tx);
tx->dbrec = &qp->devx.dbrec->db[MLX5_SND_DBR];
tx->bb_max = max_tx - 2 * UCT_IB_MLX5_MAX_BB;
uct_ib_mlx5_txwq_reset(tx);
} else {
uct_worker_tl_data_put(uar, uct_ib_mlx5_devx_uar_cleanup);
}
return UCS_OK;
err_free:
mlx5dv_devx_obj_destroy(qp->devx.obj);
err_free_db:
uct_ib_mlx5_put_dbrec(qp->devx.dbrec);
err_free_mem:
if (qp->devx.mem != NULL) {
mlx5dv_devx_umem_dereg(qp->devx.mem);
}
err_free_buf:
ucs_free(qp->devx.wq_buf);
err_uar:
uct_worker_tl_data_put(uar, uct_ib_mlx5_devx_uar_cleanup);
err:
return status;
}
ucs_status_t uct_ib_mlx5_devx_modify_qp(uct_ib_mlx5_qp_t *qp,
const void *in, size_t inlen,
void *out, size_t outlen)
{
int ret;
switch (qp->type) {
case UCT_IB_MLX5_OBJ_TYPE_VERBS:
ret = mlx5dv_devx_qp_modify(qp->verbs.qp, in, inlen, out, outlen);
if (ret) {
ucs_error("mlx5dv_devx_qp_modify(%x) failed, syndrome %x: %m",
UCT_IB_MLX5DV_GET(modify_qp_in, in, opcode),
UCT_IB_MLX5DV_GET(modify_qp_out, out, syndrome));
return UCS_ERR_IO_ERROR;
}
break;
case UCT_IB_MLX5_OBJ_TYPE_DEVX:
ret = mlx5dv_devx_obj_modify(qp->devx.obj, in, inlen, out, outlen);
if (ret) {
ucs_error("mlx5dv_devx_obj_modify(%x) failed, syndrome %x: %m",
UCT_IB_MLX5DV_GET(modify_qp_in, in, opcode),
UCT_IB_MLX5DV_GET(modify_qp_out, out, syndrome));
return UCS_ERR_IO_ERROR;
}
break;
case UCT_IB_MLX5_OBJ_TYPE_LAST:
return UCS_ERR_UNSUPPORTED;
}
return UCS_OK;
}
ucs_status_t uct_ib_mlx5_devx_modify_qp_state(uct_ib_mlx5_qp_t *qp,
enum ibv_qp_state state)
{
char in[UCT_IB_MLX5DV_ST_SZ_BYTES(modify_qp_in)] = {};
char out[UCT_IB_MLX5DV_ST_SZ_BYTES(modify_qp_out)] = {};
switch (state) {
case IBV_QPS_ERR:
UCT_IB_MLX5DV_SET(modify_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_2ERR_QP);
break;
case IBV_QPS_RESET:
UCT_IB_MLX5DV_SET(modify_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_2RST_QP);
break;
default:
return UCS_ERR_UNSUPPORTED;
}
UCT_IB_MLX5DV_SET(modify_qp_in, in, qpn, qp->qp_num);
return uct_ib_mlx5_devx_modify_qp(qp, in, sizeof(in), out, sizeof(out));
}
void uct_ib_mlx5_devx_destroy_qp(uct_ib_mlx5_qp_t *qp)
{
int ret = mlx5dv_devx_obj_destroy(qp->devx.obj);
if (ret) {
ucs_error("mlx5dv_devx_obj_destroy(QP) failed: %m");
}
uct_ib_mlx5_put_dbrec(qp->devx.dbrec);
if (qp->devx.mem != NULL) {
mlx5dv_devx_umem_dereg(qp->devx.mem);
}
ucs_free(qp->devx.wq_buf);
}
#endif
ucs_status_t uct_ib_mlx5dv_arm_cq(uct_ib_mlx5_cq_t *cq, int solicited)
{
uint64_t doorbell, sn_ci_cmd;
uint32_t sn, ci, cmd;
sn = cq->cq_sn & 3;
ci = cq->cq_ci & 0xffffff;
cmd = solicited ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT;
sn_ci_cmd = (sn << 28) | cmd | ci;
cq->dbrec[UCT_IB_MLX5_CQ_ARM_DB] = htobe32(sn_ci_cmd);
ucs_memory_cpu_fence();
doorbell = (sn_ci_cmd << 32) | cq->cq_num;
*(uint64_t *)((uint8_t *)cq->uar + MLX5_CQ_DOORBELL) = htobe64(doorbell);
ucs_memory_bus_store_fence();
return UCS_OK;
}
#if HAVE_DECL_MLX5DV_OBJ_AH
void uct_ib_mlx5_get_av(struct ibv_ah *ah, struct mlx5_wqe_av *av)
{
struct mlx5dv_obj dv;
struct mlx5dv_ah dah;
dv.ah.in = ah;
dv.ah.out = &dah;
mlx5dv_init_obj(&dv, MLX5DV_OBJ_AH);
*av = *(dah.av);
av->dqp_dct |= UCT_IB_MLX5_EXTENDED_UD_AV;
}
#elif !HAVE_INFINIBAND_MLX5_HW_H
void uct_ib_mlx5_get_av(struct ibv_ah *ah, struct mlx5_wqe_av *av)
{
ucs_bug("MLX5DV_OBJ_AH not supported");
}
#endif
#if HAVE_DEVX
ucs_status_t uct_ib_mlx5_get_compact_av(uct_ib_iface_t *iface, int *compact_av)
{
*compact_av = !!(uct_ib_iface_device(iface)->flags & UCT_IB_DEVICE_FLAG_AV);
return UCS_OK;
}
#endif