/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
#include "ib_mlx5_log.h"
#include <uct/ib/base/ib_device.h>
#include <string.h>
static void uct_ib_mlx5_wqe_dump(uct_ib_iface_t *iface, void *wqe, void *qstart,
void *qend, int max_sge, int dump_qp,
uct_log_data_dump_func_t packet_dump_cb,
char *buffer, size_t max, uct_ib_log_sge_t *log_sge);
static const char *uct_ib_mlx5_cqe_err_opcode(uct_ib_mlx5_err_cqe_t *ecqe)
{
uint8_t wqe_err_opcode = ntohl(ecqe->s_wqe_opcode_qpn) >> 24;
switch (ecqe->op_own >> 4) {
case MLX5_CQE_REQ_ERR:
switch (wqe_err_opcode) {
case MLX5_OPCODE_RDMA_WRITE_IMM:
case MLX5_OPCODE_RDMA_WRITE:
return "RDMA_WRITE";
case MLX5_OPCODE_SEND_IMM:
case MLX5_OPCODE_SEND:
case MLX5_OPCODE_SEND_INVAL:
return "SEND";
case MLX5_OPCODE_RDMA_READ:
return "RDMA_READ";
case MLX5_OPCODE_ATOMIC_CS:
return "CSWAP";
case MLX5_OPCODE_ATOMIC_FA:
return "FETCH_ADD";
case MLX5_OPCODE_ATOMIC_MASKED_CS:
return "MASKED_CSWAP";
case MLX5_OPCODE_ATOMIC_MASKED_FA:
return "MASKED_FETCH_ADD";
default:
return "";
}
case MLX5_CQE_RESP_ERR:
return "RECV";
default:
return "";
}
}
ucs_status_t uct_ib_mlx5_completion_with_err(uct_ib_iface_t *iface,
uct_ib_mlx5_err_cqe_t *ecqe,
uct_ib_mlx5_txwq_t *txwq,
ucs_log_level_t log_level)
{
ucs_status_t status = UCS_ERR_IO_ERROR;
char err_info[256] = {};
char wqe_info[256] = {};
uint16_t wqe_index;
uint32_t qp_num;
void *wqe;
wqe_index = ntohs(ecqe->wqe_counter);
qp_num = ntohl(ecqe->s_wqe_opcode_qpn) & UCS_MASK(UCT_IB_QPN_ORDER);
if (txwq != NULL) {
wqe_index %= UCS_PTR_BYTE_DIFF(txwq->qstart, txwq->qend) / MLX5_SEND_WQE_BB;
}
if (ecqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
ucs_trace("QP 0x%x wqe[%d] is flushed", qp_num, wqe_index);
return status;
}
switch (ecqe->syndrome) {
case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
snprintf(err_info, sizeof(err_info), "Local length");
break;
case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
snprintf(err_info, sizeof(err_info), "Local QP operation");
break;
case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
snprintf(err_info, sizeof(err_info), "Local protection");
break;
case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
snprintf(err_info, sizeof(err_info), "WR flushed because QP in error state");
break;
case MLX5_CQE_SYNDROME_MW_BIND_ERR:
snprintf(err_info, sizeof(err_info), "Memory window bind");
break;
case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
snprintf(err_info, sizeof(err_info), "Bad response");
break;
case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
snprintf(err_info, sizeof(err_info), "Local access");
break;
case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
snprintf(err_info, sizeof(err_info), "Invalid request");
break;
case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
snprintf(err_info, sizeof(err_info), "Remote access");
break;
case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
snprintf(err_info, sizeof(err_info), "Remote QP");
break;
case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
snprintf(err_info, sizeof(err_info), "Transport retry count exceeded");
status = UCS_ERR_ENDPOINT_TIMEOUT;
break;
case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
snprintf(err_info, sizeof(err_info), "Receive-no-ready retry count exceeded");
status = UCS_ERR_ENDPOINT_TIMEOUT;
break;
case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
snprintf(err_info, sizeof(err_info), "Remote side aborted");
status = UCS_ERR_ENDPOINT_TIMEOUT;
break;
default:
snprintf(err_info, sizeof(err_info), "Generic");
break;
}
if ((txwq != NULL) && ((ecqe->op_own >> 4) == MLX5_CQE_REQ_ERR)) {
wqe = UCS_PTR_BYTE_OFFSET(txwq->qstart, MLX5_SEND_WQE_BB * wqe_index);
uct_ib_mlx5_wqe_dump(iface, wqe, txwq->qstart, txwq->qend, INT_MAX, 0,
NULL, wqe_info, sizeof(wqe_info) - 1, NULL);
} else {
snprintf(wqe_info, sizeof(wqe_info) - 1, "opcode %s",
uct_ib_mlx5_cqe_err_opcode(ecqe));
}
ucs_log(log_level,
"%s on "UCT_IB_IFACE_FMT"/%s (synd 0x%x vend 0x%x hw_synd %d/%d)\n"
"%s QP 0x%x wqe[%d]: %s",
err_info, UCT_IB_IFACE_ARG(iface),
uct_ib_iface_is_roce(iface) ? "RoCE" : "IB",
ecqe->syndrome, ecqe->vendor_err_synd, ecqe->hw_synd_type >> 4,
ecqe->hw_err_synd, uct_ib_qp_type_str(iface->config.qp_type),
qp_num, wqe_index, wqe_info);
return status;
}
static unsigned uct_ib_mlx5_parse_dseg(void **dseg_p, void *qstart, void *qend,
struct ibv_sge *sg_list, int *index,
int *is_inline)
{
struct mlx5_wqe_data_seg *dpseg;
struct mlx5_wqe_inl_data_seg *inl;
struct ibv_sge *sg = &sg_list[*index];
int byte_count;
void *addr;
int ds;
if (*dseg_p == qend) {
*dseg_p = qstart;
}
inl = *dseg_p;
if (inl->byte_count & htonl(MLX5_INLINE_SEG)) {
addr = inl + 1;
sg->addr = (uintptr_t)addr;
sg->lkey = 0;
byte_count = ntohl(inl->byte_count) & ~MLX5_INLINE_SEG;
if (UCS_PTR_BYTE_OFFSET(addr, byte_count) > qend) {
sg->length = UCS_PTR_BYTE_DIFF(addr, qend);
(sg + 1)->addr = (uintptr_t)qstart;
(sg + 1)->lkey = 0;
(sg + 1)->length = byte_count - sg->length;
++(*index);
} else {
sg->length = byte_count;
}
*is_inline = 1;
ds = ucs_div_round_up(sizeof(*inl) + byte_count,
UCT_IB_MLX5_WQE_SEG_SIZE);
++(*index);
} else {
dpseg = *dseg_p;
sg->addr = be64toh(dpseg->addr);
sg->length = ntohl(dpseg->byte_count);
sg->lkey = ntohl(dpseg->lkey);
*is_inline = 0;
ds = 1;
++(*index);
}
*dseg_p = UCS_PTR_BYTE_OFFSET(*dseg_p, ds * UCT_IB_MLX5_WQE_SEG_SIZE);
if (*dseg_p >= qend) {
*dseg_p = UCS_PTR_BYTE_OFFSET(*dseg_p, -UCS_PTR_BYTE_DIFF(qstart, qend));
}
return ds;
}
static uint64_t network_to_host(void *ptr, int size)
{
if (size == 4) {
return ntohl(*(uint32_t*)ptr);
} else if (size == 8) {
return be64toh(*(uint64_t*)ptr);
} else {
return *(uint64_t*)ptr;
}
}
static size_t uct_ib_mlx5_dump_dgram(char *buf, size_t max, void *seg, int is_eth)
{
struct mlx5_wqe_datagram_seg *dgseg = seg;
struct mlx5_base_av *base_av;
struct mlx5_grh_av *grh_av;
char gid_buf[32];
int sgid_index;
char *p, *endp;
p = buf;
endp = buf + max - 1;
base_av = mlx5_av_base(&dgseg->av);
snprintf(p, endp - p, " [rqpn 0x%x",
ntohl(base_av->dqp_dct & ~UCT_IB_MLX5_EXTENDED_UD_AV));
p += strlen(p);
if (!is_eth) {
snprintf(p, endp - p, " rlid %d", ntohs(base_av->rlid));
p += strlen(p);
}
if (mlx5_av_base(&dgseg->av)->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV) {
grh_av = mlx5_av_grh(&dgseg->av);
if (is_eth || (grh_av->grh_gid_fl & UCT_IB_MLX5_AV_GRH_PRESENT)) {
if (is_eth) {
snprintf(p, endp - p, " rmac %02x:%02x:%02x:%02x:%02x:%02x",
grh_av->rmac[0], grh_av->rmac[1], grh_av->rmac[2],
grh_av->rmac[3], grh_av->rmac[4], grh_av->rmac[5]);
p += strlen(p);
}
sgid_index = (htonl(grh_av->grh_gid_fl) >> 20) & UCS_MASK(8);
snprintf(p, endp - p, " sgix %d dgid %s tc %d]", sgid_index,
inet_ntop(AF_INET6, grh_av->rgid, gid_buf, sizeof(gid_buf)),
grh_av->tclass);
} else {
snprintf(p, endp - p, "]");
}
return UCT_IB_MLX5_AV_FULL_SIZE;
} else {
snprintf(p, endp - p, "]");
return UCT_IB_MLX5_AV_BASE_SIZE;
}
}
static int uct_ib_mlx5_is_qp_require_av_seg(int qp_type)
{
if (qp_type == IBV_QPT_UD) {
return 1;
}
#if HAVE_TL_DC
if (qp_type == UCT_IB_QPT_DCI) {
return 1;
}
#endif
return 0;
}
static void uct_ib_mlx5_wqe_dump(uct_ib_iface_t *iface, void *wqe, void *qstart,
void *qend, int max_sge, int dump_qp,
uct_log_data_dump_func_t packet_dump_cb,
char *buffer, size_t max, uct_ib_log_sge_t *log_sge)
{
static uct_ib_opcode_t opcodes[] = {
[MLX5_OPCODE_NOP] = { "NOP", 0 },
[MLX5_OPCODE_RDMA_WRITE] = { "RDMA_WRITE", UCT_IB_OPCODE_FLAG_HAS_RADDR },
[MLX5_OPCODE_RDMA_READ] = { "RDMA_READ", UCT_IB_OPCODE_FLAG_HAS_RADDR },
[MLX5_OPCODE_SEND] = { "SEND", 0 },
[MLX5_OPCODE_SEND_IMM] = { "SEND_IMM", 0 },
[MLX5_OPCODE_ATOMIC_CS] = { "CSWAP", UCT_IB_OPCODE_FLAG_HAS_RADDR|UCT_IB_OPCODE_FLAG_HAS_ATOMIC },
[MLX5_OPCODE_ATOMIC_FA] = { "FETCH_ADD", UCT_IB_OPCODE_FLAG_HAS_RADDR|UCT_IB_OPCODE_FLAG_HAS_ATOMIC },
[MLX5_OPCODE_ATOMIC_MASKED_CS] = { "MASKED_CSWAP",
UCT_IB_OPCODE_FLAG_HAS_RADDR|UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC },
[MLX5_OPCODE_ATOMIC_MASKED_FA] = { "MASKED_FETCH_ADD",
UCT_IB_OPCODE_FLAG_HAS_RADDR|UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC },
};
struct mlx5_wqe_ctrl_seg *ctrl = wqe;
uint8_t opcode = ctrl->opmod_idx_opcode >> 24;
uint8_t opmod = ctrl->opmod_idx_opcode & 0xff;
uint32_t qp_num = ntohl(ctrl->qpn_ds) >> 8;
int ds = ctrl->qpn_ds >> 24;
uct_ib_opcode_t *op = &opcodes[opcode];
char *s = buffer;
char *ends = buffer + max;
struct ibv_sge sg_list[16];
uint64_t inline_bitmap;
int i, is_inline, is_eth;
size_t dg_size;
void *seg;
/* QP and WQE index */
if (dump_qp) {
snprintf(s, ends - s, "QP 0x%x [%03ld] ", qp_num,
UCS_PTR_BYTE_DIFF(qstart, wqe) / MLX5_SEND_WQE_BB);
s += strlen(s);
}
/* Opcode and flags */
uct_ib_log_dump_opcode(op,
ctrl->fm_ce_se & MLX5_WQE_CTRL_CQ_UPDATE,
ctrl->fm_ce_se & MLX5_WQE_CTRL_FENCE,
ctrl->fm_ce_se & (1 << 1),
s, ends - s);
s += strlen(s);
/* Additional segments */
--ds;
seg = ctrl + 1;
if (seg == qend) {
seg = qstart;
}
if (uct_ib_mlx5_is_qp_require_av_seg(iface->config.qp_type)) {
is_eth = uct_ib_iface_is_roce(iface);
dg_size = uct_ib_mlx5_dump_dgram(s, ends - s, seg, is_eth);
s += strlen(s);
seg = (char *)seg + dg_size;
ds -= ucs_div_round_up(dg_size, UCT_IB_MLX5_WQE_SEG_SIZE);
}
if (seg == qend) {
seg = qstart;
}
/* Remote address segment */
if (op->flags & UCT_IB_OPCODE_FLAG_HAS_RADDR) {
struct mlx5_wqe_raddr_seg *rseg = seg;
uct_ib_log_dump_remote_addr(be64toh(rseg->raddr), ntohl(rseg->rkey), s, ends - s);
s += strlen(s);
--ds;
seg = rseg + 1;
if (seg == qend) {
seg = qstart;
}
}
/* Atomic segment */
if (op->flags & UCT_IB_OPCODE_FLAG_HAS_ATOMIC) {
struct mlx5_wqe_atomic_seg *atomic = seg;
if (opcode == MLX5_OPCODE_ATOMIC_FA) {
uct_ib_log_dump_atomic_fadd(be64toh(atomic->swap_add), s, ends - s);
} else if (opcode == MLX5_OPCODE_ATOMIC_CS) {
uct_ib_log_dump_atomic_cswap(be64toh(atomic->compare),
be64toh(atomic->swap_add), s, ends - s);
}
s += strlen(s);
--ds;
seg = atomic + 1;
if (seg == qend) {
seg = qstart;
}
}
/* Extended atomic segment */
if (op->flags & UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC) {
uint64_t add, boundary, compare, swap, compare_mask, swap_mask;
int size = 1 << ((opmod & 7) + 2);
if (opcode == MLX5_OPCODE_ATOMIC_MASKED_FA) {
add = network_to_host(seg, size);
boundary = network_to_host(UCS_PTR_BYTE_OFFSET(seg, size), size);
seg = UCS_PTR_BYTE_OFFSET(seg,
ucs_align_up_pow2(size * 2,
UCT_IB_MLX5_WQE_SEG_SIZE));
ds -= ucs_div_round_up(2 * size, UCT_IB_MLX5_WQE_SEG_SIZE);
uct_ib_log_dump_atomic_masked_fadd(size, add, boundary, s, ends - s);
} else if (opcode == MLX5_OPCODE_ATOMIC_MASKED_CS) {
swap = network_to_host(seg, size);
compare = network_to_host(UCS_PTR_BYTE_OFFSET(seg, size), size);
seg = UCS_PTR_BYTE_OFFSET(seg, size * 2);
if (seg == qend) {
seg = qstart;
}
swap_mask = network_to_host(seg, size);
compare_mask = network_to_host(UCS_PTR_BYTE_OFFSET(seg, size), size);
seg = UCS_PTR_BYTE_OFFSET(seg, size * 2);
if (seg == qend) {
seg = qstart;
}
ucs_assert(((size * 4) % UCT_IB_MLX5_WQE_SEG_SIZE) == 0);
ds -= size * 4;
uct_ib_log_dump_atomic_masked_cswap(size, compare, compare_mask, swap,
swap_mask, s, ends - s);
}
s += strlen(s);
}
/* Data segments*/
if (log_sge == NULL) {
i = 0;
inline_bitmap = 0;
while ((ds > 0) && (i < sizeof(sg_list) / sizeof(sg_list[0]))) {
ds -= uct_ib_mlx5_parse_dseg(&seg, qstart, qend, sg_list, &i, &is_inline);
if (is_inline) {
inline_bitmap |= UCS_BIT(i-1);
}
s += strlen(s);
}
}
uct_ib_log_dump_sg_list(iface, UCT_AM_TRACE_TYPE_SEND,
log_sge ? log_sge->sg_list : sg_list,
log_sge ? log_sge->num_sge : ucs_min(i, max_sge),
log_sge ? log_sge->inline_bitmap : inline_bitmap,
packet_dump_cb, s, ends - s);
}
void __uct_ib_mlx5_log_tx(const char *file, int line, const char *function,
uct_ib_iface_t *iface, void *wqe, void *qstart,
void *qend, int max_sge, uct_ib_log_sge_t *log_sge,
uct_log_data_dump_func_t packet_dump_cb)
{
char buf[256] = {0};
uct_ib_mlx5_wqe_dump(iface, wqe, qstart, qend, max_sge, 1, packet_dump_cb,
buf, sizeof(buf) - 1, log_sge);
uct_log_data(file, line, function, buf);
}
void uct_ib_mlx5_cqe_dump(const char *file, int line, const char *function, struct mlx5_cqe64 *cqe)
{
char buf[256] = {0};
snprintf(buf, sizeof(buf) - 1,
"CQE(op_own 0x%x) qp 0x%x sqp 0x%x slid %d bytes %d wqe_idx %d ",
(unsigned)cqe->op_own,
(unsigned)(ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER)),
(unsigned)(ntohl(cqe->flags_rqpn) & UCS_MASK(UCT_IB_QPN_ORDER)),
(unsigned)ntohs(cqe->slid),
(unsigned)ntohl(cqe->byte_cnt),
(unsigned)ntohs(cqe->wqe_counter));
uct_log_data(file, line, function, buf);
}
void __uct_ib_mlx5_log_rx(const char *file, int line, const char *function,
uct_ib_iface_t *iface, struct mlx5_cqe64 *cqe,
void *data, uct_log_data_dump_func_t packet_dump_cb)
{
char buf[256] = {0};
size_t length;
length = ntohl(cqe->byte_cnt);
if (iface->config.qp_type == IBV_QPT_UD) {
length -= UCT_IB_GRH_LEN;
data = UCS_PTR_BYTE_OFFSET(data, UCT_IB_GRH_LEN);
}
uct_ib_log_dump_recv_completion(iface,
ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER),
ntohl(cqe->flags_rqpn) & UCS_MASK(UCT_IB_QPN_ORDER),
ntohs(cqe->slid),
data, length,
packet_dump_cb, buf, sizeof(buf) - 1);
uct_log_data(file, line, function, buf);
}