Blob Blame History Raw
// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause

// Authors: Bernard Metzler <bmt@zurich.ibm.com>
// Copyright (c) 2008-2019, IBM Corporation

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#include <net/if.h>
#include <pthread.h>
#include <stdatomic.h>
#include <assert.h>

#include "siw_abi.h"
#include "siw.h"

static const int siw_debug;
static void siw_free_context(struct ibv_context *ibv_ctx);

static int siw_query_device(struct ibv_context *ctx,
			    struct ibv_device_attr *attr)
{
	struct ibv_query_device cmd;
	uint64_t raw_fw_ver;
	unsigned int major, minor, sub_minor;
	int rv;

	memset(&cmd, 0, sizeof(cmd));

	rv = ibv_cmd_query_device(ctx, attr, &raw_fw_ver, &cmd, sizeof(cmd));
	if (rv)
		return rv;

	major = (raw_fw_ver >> 32) & 0xffff;
	minor = (raw_fw_ver >> 16) & 0xffff;
	sub_minor = raw_fw_ver & 0xffff;

	snprintf(attr->fw_ver, sizeof(attr->fw_ver), "%d.%d.%d", major, minor,
		 sub_minor);

	return 0;
}

static int siw_query_port(struct ibv_context *ctx, uint8_t port,
			  struct ibv_port_attr *attr)
{
	struct ibv_query_port cmd;

	memset(&cmd, 0, sizeof(cmd));

	return ibv_cmd_query_port(ctx, port, attr, &cmd, sizeof(cmd));
}

static int siw_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
			int attr_mask, struct ibv_qp_init_attr *init_attr)
{
	struct ibv_query_qp cmd;

	memset(&cmd, 0, sizeof(cmd));

	return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd,
				sizeof(cmd));
}

static struct ibv_pd *siw_alloc_pd(struct ibv_context *ctx)
{
	struct ibv_alloc_pd cmd;
	struct ib_uverbs_alloc_pd_resp resp;
	struct ibv_pd *pd;

	memset(&cmd, 0, sizeof(cmd));

	pd = calloc(1, sizeof(*pd));
	if (!pd)
		return NULL;

	if (ibv_cmd_alloc_pd(ctx, pd, &cmd, sizeof(cmd), &resp, sizeof(resp))) {
		free(pd);
		return NULL;
	}
	return pd;
}

static int siw_free_pd(struct ibv_pd *pd)
{
	int rv;

	rv = ibv_cmd_dealloc_pd(pd);
	if (rv)
		return rv;

	free(pd);
	return 0;
}

static struct ibv_mr *siw_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
				 uint64_t hca_va, int access)
{
	struct siw_cmd_reg_mr cmd = {};
	struct siw_cmd_reg_mr_resp resp = {};
	struct siw_mr *mr;
	int rv;

	mr = calloc(1, sizeof(*mr));
	if (!mr)
		return NULL;

	rv = ibv_cmd_reg_mr(pd, addr, len, hca_va, access,
			    &mr->base_mr, &cmd.ibv_cmd, sizeof(cmd),
			    &resp.ibv_resp, sizeof(resp));
	if (rv) {
		free(mr);
		return NULL;
	}
	return &mr->base_mr.ibv_mr;
}

static int siw_dereg_mr(struct verbs_mr *base_mr)
{
	struct siw_mr *mr = mr_base2siw(base_mr);
	int rv;

	rv = ibv_cmd_dereg_mr(base_mr);
	if (rv)
		return rv;

	free(mr);
	return 0;
}

static struct ibv_cq *siw_create_cq(struct ibv_context *ctx, int num_cqe,
				    struct ibv_comp_channel *channel,
				    int comp_vector)
{
	struct siw_cmd_create_cq cmd = {};
	struct siw_cmd_create_cq_resp resp = {};
	struct siw_cq *cq;
	int cq_size, rv;

	cq = calloc(1, sizeof(*cq));
	if (!cq)
		return NULL;

	rv = ibv_cmd_create_cq(ctx, num_cqe, channel, comp_vector, &cq->base_cq,
			       &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp,
			       sizeof(resp));
	if (rv) {
		if (siw_debug)
			printf("libsiw: CQ creation failed: %d\n", rv);
		free(cq);
		return NULL;
	}
	if (resp.cq_key == SIW_INVAL_UOBJ_KEY) {
		if (siw_debug)
			printf("libsiw: prepare CQ mapping failed\n");
		goto fail;
	}
	pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
	cq->id = resp.cq_id;
	cq->num_cqe = resp.num_cqe;

	cq_size = resp.num_cqe * sizeof(struct siw_cqe) +
		  sizeof(struct siw_cq_ctrl);

	cq->queue = mmap(NULL, cq_size, PROT_READ | PROT_WRITE,
			 MAP_SHARED, ctx->cmd_fd, resp.cq_key);

	if (cq->queue == MAP_FAILED) {
		if (siw_debug)
			printf("libsiw: CQ mapping failed: %d", errno);
		goto fail;
	}
	cq->ctrl = (struct siw_cq_ctrl *)&cq->queue[cq->num_cqe];
	cq->ctrl->flags = SIW_NOTIFY_NOT;

	return &cq->base_cq;
fail:
	ibv_cmd_destroy_cq(&cq->base_cq);
	free(cq);

	return NULL;
}

static int siw_resize_cq(struct ibv_cq *base_cq, int num_cqe)
{
	return -EOPNOTSUPP;
}

static int siw_destroy_cq(struct ibv_cq *base_cq)
{
	struct siw_cq *cq = cq_base2siw(base_cq);
	int rv;

	assert(pthread_spin_trylock(&cq->lock));

	if (cq->queue)
		munmap(cq->queue, cq->num_cqe * sizeof(struct siw_cqe) +
					  sizeof(struct siw_cq_ctrl));

	rv = ibv_cmd_destroy_cq(base_cq);
	if (rv) {
		pthread_spin_unlock(&cq->lock);
		return rv;
	}
	pthread_spin_destroy(&cq->lock);

	free(cq);

	return 0;
}

static struct ibv_srq *siw_create_srq(struct ibv_pd *pd,
				      struct ibv_srq_init_attr *attr)
{
	struct siw_cmd_create_srq cmd = {};
	struct siw_cmd_create_srq_resp resp = {};
	struct ibv_context *ctx = pd->context;
	struct siw_srq *srq;
	int rv, rq_size;

	srq = calloc(1, sizeof(*srq));
	if (!srq)
		return NULL;

	rv = ibv_cmd_create_srq(pd, &srq->base_srq, attr, &cmd.ibv_cmd,
				sizeof(cmd), &resp.ibv_resp, sizeof(resp));
	if (rv) {
		if (siw_debug)
			printf("libsiw: creating SRQ failed\n");
		free(srq);
		return NULL;
	}
	if (resp.srq_key == SIW_INVAL_UOBJ_KEY) {
		if (siw_debug)
			printf("libsiw: prepare SRQ mapping failed\n");
		goto fail;
	}
	pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE);
	rq_size = resp.num_rqe * sizeof(struct siw_rqe);
	srq->num_rqe = resp.num_rqe;

	srq->recvq = mmap(NULL, rq_size, PROT_READ | PROT_WRITE,
			  MAP_SHARED, ctx->cmd_fd, resp.srq_key);

	if (srq->recvq == MAP_FAILED) {
		if (siw_debug)
			printf("libsiw: SRQ mapping failed: %d", errno);
		goto fail;
	}
	return &srq->base_srq;
fail:
	ibv_cmd_destroy_srq(&srq->base_srq);
	free(srq);

	return NULL;
}

static int siw_modify_srq(struct ibv_srq *base_srq, struct ibv_srq_attr *attr,
			  int attr_mask)
{
	struct ibv_modify_srq cmd = {};
	struct siw_srq *srq = srq_base2siw(base_srq);
	int rv;

	pthread_spin_lock(&srq->lock);
	rv = ibv_cmd_modify_srq(base_srq, attr, attr_mask, &cmd, sizeof(cmd));
	pthread_spin_unlock(&srq->lock);

	return rv;
}

static int siw_destroy_srq(struct ibv_srq *base_srq)
{
	struct siw_srq *srq = srq_base2siw(base_srq);
	int rv;

	assert(pthread_spin_trylock(&srq->lock));

	rv = ibv_cmd_destroy_srq(base_srq);
	if (rv) {
		pthread_spin_unlock(&srq->lock);
		return rv;
	}
	if (srq->recvq)
		munmap(srq->recvq, srq->num_rqe * sizeof(struct siw_rqe));

	pthread_spin_destroy(&srq->lock);

	free(srq);

	return 0;
}

static struct ibv_qp *siw_create_qp(struct ibv_pd *pd,
				    struct ibv_qp_init_attr *attr)
{
	struct siw_cmd_create_qp cmd = {};
	struct siw_cmd_create_qp_resp resp = {};
	struct siw_qp *qp;
	struct ibv_context *base_ctx = pd->context;
	int sq_size, rq_size, rv;

	memset(&cmd, 0, sizeof(cmd));
	memset(&resp, 0, sizeof(resp));

	qp = calloc(1, sizeof(*qp));
	if (!qp)
		return NULL;

	rv = ibv_cmd_create_qp(pd, &qp->base_qp, attr, &cmd.ibv_cmd,
			       sizeof(cmd), &resp.ibv_resp, sizeof(resp));

	if (rv) {
		if (siw_debug)
			printf("libsiw: QP creation failed\n");
		free(qp);
		return NULL;
	}
	if (resp.sq_key == SIW_INVAL_UOBJ_KEY ||
	    resp.rq_key == SIW_INVAL_UOBJ_KEY) {
		if (siw_debug)
			printf("libsiw: prepare QP mapping failed\n");
		goto fail;
	}
	qp->id = resp.qp_id;
	qp->num_sqe = resp.num_sqe;
	qp->num_rqe = resp.num_rqe;
	qp->sq_sig_all = attr->sq_sig_all;

	/* Init doorbell request structure */
	qp->db_req.hdr.command = IB_USER_VERBS_CMD_POST_SEND;
	qp->db_req.hdr.in_words = sizeof(qp->db_req) / 4;
	qp->db_req.hdr.out_words = sizeof(qp->db_resp) / 4;
	qp->db_req.response = (uintptr_t)&qp->db_resp;
	qp->db_req.wr_count = 0;
	qp->db_req.sge_count = 0;
	qp->db_req.wqe_size = sizeof(struct ibv_send_wr);

	pthread_spin_init(&qp->sq_lock, PTHREAD_PROCESS_PRIVATE);
	pthread_spin_init(&qp->rq_lock, PTHREAD_PROCESS_PRIVATE);

	sq_size = resp.num_sqe * sizeof(struct siw_sqe);

	qp->sendq = mmap(NULL, sq_size, PROT_READ | PROT_WRITE,
			 MAP_SHARED, base_ctx->cmd_fd, resp.sq_key);

	if (qp->sendq == MAP_FAILED) {
		if (siw_debug)
			printf("libsiw: SQ mapping failed: %d", errno);

		qp->sendq = NULL;
		goto fail;
	}
	if (attr->srq) {
		qp->srq = srq_base2siw(attr->srq);
	} else {
		rq_size = resp.num_rqe * sizeof(struct siw_rqe);

		qp->recvq = mmap(NULL, rq_size, PROT_READ | PROT_WRITE,
				 MAP_SHARED, base_ctx->cmd_fd, resp.rq_key);

		if (qp->recvq == MAP_FAILED) {
			if (siw_debug)
				printf("libsiw: RQ mapping failed: %d\n",
				       resp.num_rqe);
			qp->recvq = NULL;
			goto fail;
		}
	}
	qp->db_req.qp_handle = qp->base_qp.handle;

	return &qp->base_qp;
fail:
	ibv_cmd_destroy_qp(&qp->base_qp);

	if (qp->sendq)
		munmap(qp->sendq, qp->num_sqe * sizeof(struct siw_sqe));
	if (qp->recvq)
		munmap(qp->recvq, qp->num_rqe * sizeof(struct siw_rqe));

	free(qp);

	return NULL;
}

static int siw_modify_qp(struct ibv_qp *base_qp, struct ibv_qp_attr *attr,
			 int attr_mask)
{
	struct ibv_modify_qp cmd;
	struct siw_qp *qp = qp_base2siw(base_qp);
	int rv;

	memset(&cmd, 0, sizeof(cmd));

	pthread_spin_lock(&qp->sq_lock);
	pthread_spin_lock(&qp->rq_lock);

	rv = ibv_cmd_modify_qp(base_qp, attr, attr_mask, &cmd, sizeof(cmd));

	pthread_spin_unlock(&qp->rq_lock);
	pthread_spin_unlock(&qp->sq_lock);

	return rv;
}

static int siw_destroy_qp(struct ibv_qp *base_qp)
{
	struct siw_qp *qp = qp_base2siw(base_qp);
	int rv;

	assert(pthread_spin_trylock(&qp->sq_lock));
	assert(pthread_spin_trylock(&qp->rq_lock));

	if (qp->sendq)
		munmap(qp->sendq, qp->num_sqe * sizeof(struct siw_sqe));
	if (qp->recvq)
		munmap(qp->recvq, qp->num_rqe * sizeof(struct siw_rqe));

	rv = ibv_cmd_destroy_qp(base_qp);
	if (rv) {
		pthread_spin_unlock(&qp->rq_lock);
		pthread_spin_unlock(&qp->sq_lock);
		return rv;
	}
	pthread_spin_destroy(&qp->rq_lock);
	pthread_spin_destroy(&qp->sq_lock);

	free(qp);

	return 0;
}

static struct ibv_ah *siw_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
{
	return NULL;
}

static int siw_destroy_ah(struct ibv_ah *ah)
{
	return -EOPNOTSUPP;
}

static void siw_async_event(struct ibv_context *ctx,
			    struct ibv_async_event *event)
{
	struct ibv_qp *base_qp = event->element.qp;
	struct ibv_cq *base_cq = event->element.cq;

	switch (event->event_type) {
	case IBV_EVENT_CQ_ERR:
		printf("libsiw: CQ[%d] event: error\n",
		       cq_base2siw(base_cq)->id);
		break;

	case IBV_EVENT_QP_FATAL:
		printf("libsiw: QP[%d] event: fatal error\n",
		       qp_base2siw(base_qp)->id);
		break;

	case IBV_EVENT_QP_REQ_ERR:
		printf("libsiw: QP[%d] event: request error\n",
		       qp_base2siw(base_qp)->id);
		break;

	case IBV_EVENT_QP_ACCESS_ERR:
		printf("libsiw: QP[%d] event: access error\n",
		       qp_base2siw(base_qp)->id);
		break;

	case IBV_EVENT_SQ_DRAINED:
	case IBV_EVENT_COMM_EST:
	case IBV_EVENT_QP_LAST_WQE_REACHED:
		break;

	default:
		break;
	}
}

static int siw_notify_cq(struct ibv_cq *ibcq, int solicited)
{
	struct siw_cq *cq = cq_base2siw(ibcq);
	int rv = 0;

	if (solicited)
		atomic_store((_Atomic(uint32_t) *)&cq->ctrl->flags,
			SIW_NOTIFY_SOLICITED);
	else
		atomic_store((_Atomic(uint32_t) *)&cq->ctrl->flags,
			SIW_NOTIFY_SOLICITED | SIW_NOTIFY_NEXT_COMPLETION);
	return rv;
}

static const struct {
	enum ibv_wr_opcode base;
	enum siw_opcode siw;
} map_send_opcode[IBV_WR_DRIVER1 + 1] = {
	{ IBV_WR_RDMA_WRITE, SIW_OP_WRITE},
	{ IBV_WR_RDMA_WRITE_WITH_IMM, SIW_NUM_OPCODES + 1 },
	{ IBV_WR_SEND, SIW_OP_SEND },
	{ IBV_WR_SEND_WITH_IMM, SIW_NUM_OPCODES + 1 },
	{ IBV_WR_RDMA_READ, SIW_OP_READ },
	{ IBV_WR_ATOMIC_CMP_AND_SWP, SIW_NUM_OPCODES + 1 },
	{ IBV_WR_ATOMIC_FETCH_AND_ADD, SIW_NUM_OPCODES + 1 },
	{ IBV_WR_LOCAL_INV, SIW_NUM_OPCODES + 1 },
	{ IBV_WR_BIND_MW, SIW_NUM_OPCODES + 1 },
	{ IBV_WR_SEND_WITH_INV, SIW_OP_SEND_REMOTE_INV },
	{ IBV_WR_TSO, SIW_NUM_OPCODES + 1 },
	{ IBV_WR_DRIVER1, SIW_NUM_OPCODES + 1 }
};

static inline uint16_t map_send_flags(int ibv_flags)
{
	uint16_t flags = SIW_WQE_VALID;

	if (ibv_flags & IBV_SEND_SIGNALED)
		flags |= SIW_WQE_SIGNALLED;
	if (ibv_flags & IBV_SEND_SOLICITED)
		flags |= SIW_WQE_SOLICITED;
	if (ibv_flags & IBV_SEND_INLINE)
		flags |= SIW_WQE_INLINE;
	if (ibv_flags & IBV_SEND_FENCE)
		flags |= SIW_WQE_READ_FENCE;

	return flags;
}

static inline int push_send_wqe(struct ibv_send_wr *base_wr,
				struct siw_sqe *siw_sqe, int sig_all)
{
	uint32_t flags = map_send_flags(base_wr->send_flags);
	atomic_ushort *fp = (atomic_ushort *)&siw_sqe->flags;

	siw_sqe->id = base_wr->wr_id;
	siw_sqe->num_sge = base_wr->num_sge;
	siw_sqe->raddr = base_wr->wr.rdma.remote_addr;
	siw_sqe->rkey = base_wr->wr.rdma.rkey;

	siw_sqe->opcode = map_send_opcode[base_wr->opcode].siw;
	if (siw_sqe->opcode > SIW_NUM_OPCODES) {
		if (siw_debug)
			printf("libsiw: opcode %d unsupported\n",
			       base_wr->opcode);
		return -EINVAL;
	}
	if (sig_all)
		flags |= SIW_WQE_SIGNALLED;

	if (flags & SIW_WQE_INLINE) {
		char *data = (char *)&siw_sqe->sge[1];
		int bytes = 0, i = 0;

		/* Allow more than SIW_MAX_SGE, since content copied here */
		while (i < base_wr->num_sge) {
			bytes += base_wr->sg_list[i].length;
			if (bytes > (int)SIW_MAX_INLINE) {
				if (siw_debug)
					printf("libsiw: inline data: %d:%d\n",
					       bytes, (int)SIW_MAX_INLINE);
				return -EINVAL;
			}
			memcpy(data,
			       (void *)(uintptr_t)base_wr->sg_list[i].addr,
			       base_wr->sg_list[i].length);
			data += base_wr->sg_list[i++].length;
		}
		siw_sqe->sge[0].length = bytes;

	} else {
		if (siw_sqe->num_sge > SIW_MAX_SGE)
			return -EINVAL;

		/* this assumes same layout of siw and base SGE */
		memcpy(siw_sqe->sge, base_wr->sg_list,
		       siw_sqe->num_sge * sizeof(struct ibv_sge));
	}
	atomic_store(fp, flags);

	return 0;
}

static int siw_post_send(struct ibv_qp *base_qp, struct ibv_send_wr *wr,
			 struct ibv_send_wr **bad_wr)
{
	struct siw_qp *qp = qp_base2siw(base_qp);
	uint32_t sq_put;
	atomic_ushort *fp;
	int new_sqe = 0, rv = 0;

	*bad_wr = NULL;

	pthread_spin_lock(&qp->sq_lock);

	sq_put = qp->sq_put;

	/*
	 * Push all current work requests into mmapped SQ
	 */
	while (wr) {
		uint32_t idx = sq_put % qp->num_sqe;
		struct siw_sqe *sqe = &qp->sendq[idx];
		uint16_t sqe_flags;

		fp = (atomic_ushort *)&sqe->flags;
		sqe_flags = atomic_load(fp);

		if (!(sqe_flags & SIW_WQE_VALID)) {
			rv = push_send_wqe(wr, sqe, qp->sq_sig_all);
			if (rv) {
				*bad_wr = wr;
				break;
			}
			new_sqe++;
		} else {
			if (siw_debug)
				printf("libsiw: QP[%d]: SQ overflow, idx %d\n",
				       qp->id, idx);
			rv = -ENOMEM;
			*bad_wr = wr;
			break;
		}
		sq_put++;
		wr = wr->next;
	}
	if (new_sqe) {
		/*
		 * If last WQE pushed before position where current post_send
		 * started is idle, we assume SQ is not being actively
		 * processed. Only then, the doorbell call will be issued.
		 * This may significantly reduce unnecessary doorbell calls
		 * on a busy SQ. We also always ring the doorbell, if the
		 * complete SQ was re-written during current post_send.
		 */
		if (new_sqe < qp->num_sqe) {
			uint32_t old_idx = (qp->sq_put - 1) % qp->num_sqe;
			struct siw_sqe *old_sqe = &qp->sendq[old_idx];

			fp = (atomic_ushort *)&old_sqe->flags;
			if (!(atomic_load(fp) & SIW_WQE_VALID))
				rv = siw_db(qp);
		} else {
			rv = siw_db(qp);
		}
		if (rv)
			*bad_wr = wr;

		qp->sq_put = sq_put;
	}
	pthread_spin_unlock(&qp->sq_lock);

	return rv;
}

static inline int push_recv_wqe(struct ibv_recv_wr *base_wr,
				struct siw_rqe *siw_rqe)
{
	atomic_ushort *fp = (atomic_ushort *)&siw_rqe->flags;

	siw_rqe->id = base_wr->wr_id;
	siw_rqe->num_sge = base_wr->num_sge;

	if (base_wr->num_sge == 1) {
		siw_rqe->sge[0].laddr = base_wr->sg_list[0].addr;
		siw_rqe->sge[0].length = base_wr->sg_list[0].length;
		siw_rqe->sge[0].lkey = base_wr->sg_list[0].lkey;
	} else if (base_wr->num_sge && base_wr->num_sge <= SIW_MAX_SGE)
		/* this assumes same layout of siw and base SGE */
		memcpy(siw_rqe->sge, base_wr->sg_list,
		       sizeof(struct ibv_sge) * base_wr->num_sge);
	else
		return -EINVAL;

	atomic_store(fp, SIW_WQE_VALID);

	return 0;
}

static int siw_post_recv(struct ibv_qp *base_qp, struct ibv_recv_wr *wr,
			 struct ibv_recv_wr **bad_wr)
{
	struct siw_qp *qp = qp_base2siw(base_qp);
	uint32_t rq_put;
	int rv = 0;

	pthread_spin_lock(&qp->rq_lock);

	rq_put = qp->rq_put;

	while (wr) {
		int idx = rq_put % qp->num_rqe;
		struct siw_rqe *rqe = &qp->recvq[idx];
		atomic_ushort *fp = (atomic_ushort *)&rqe->flags;
		uint16_t rqe_flags = atomic_load(fp);

		if (!(rqe_flags & SIW_WQE_VALID)) {
			if (push_recv_wqe(wr, rqe)) {
				*bad_wr = wr;
				rv = -EINVAL;
				break;
			}
		} else {
			if (siw_debug)
				printf("libsiw: QP[%d]: RQ overflow, idx %d\n",
				       qp->id, idx);
			rv = -ENOMEM;
			*bad_wr = wr;
			break;
		}
		rq_put++;
		wr = wr->next;
	}
	qp->rq_put = rq_put;

	pthread_spin_unlock(&qp->rq_lock);

	return rv;
}

static int siw_post_srq_recv(struct ibv_srq *base_srq, struct ibv_recv_wr *wr,
			     struct ibv_recv_wr **bad_wr)
{
	struct siw_srq *srq = srq_base2siw(base_srq);
	uint32_t srq_put;
	int rv = 0;

	pthread_spin_lock(&srq->lock);

	srq_put = srq->rq_put;

	while (wr) {
		int idx = srq_put % srq->num_rqe;
		struct siw_rqe *rqe = &srq->recvq[idx];
		atomic_ushort *fp = (atomic_ushort *)&rqe->flags;
		uint16_t rqe_flags = atomic_load(fp);

		if (!(rqe_flags & SIW_WQE_VALID)) {
			if (push_recv_wqe(wr, rqe)) {
				*bad_wr = wr;
				rv = -EINVAL;
				break;
			}
		} else {
			if (siw_debug)
				printf("libsiw: SRQ[%p]: SRQ overflow\n", srq);
			rv = -ENOMEM;
			*bad_wr = wr;
			break;
		}
		srq_put++;
		wr = wr->next;
	}
	srq->rq_put = srq_put;

	pthread_spin_unlock(&srq->lock);

	return rv;
}

static const struct {
	enum siw_opcode siw;
	enum ibv_wc_opcode base;
} map_cqe_opcode[SIW_NUM_OPCODES] = {
	{ SIW_OP_WRITE, IBV_WC_RDMA_WRITE },
	{ SIW_OP_READ, IBV_WC_RDMA_READ },
	{ SIW_OP_READ_LOCAL_INV, IBV_WC_RDMA_READ },
	{ SIW_OP_SEND, IBV_WC_SEND },
	{ SIW_OP_SEND_WITH_IMM, IBV_WC_SEND },
	{ SIW_OP_SEND_REMOTE_INV, IBV_WC_SEND },
	{ SIW_OP_FETCH_AND_ADD, IBV_WC_FETCH_ADD },
	{ SIW_OP_COMP_AND_SWAP, IBV_WC_COMP_SWAP },
	{ SIW_OP_RECEIVE, IBV_WC_RECV }
};

static const struct {
	enum siw_wc_status siw;
	enum ibv_wc_status base;
} map_cqe_status[SIW_NUM_WC_STATUS] = {
	{ SIW_WC_SUCCESS, IBV_WC_SUCCESS },
	{ SIW_WC_LOC_LEN_ERR, IBV_WC_LOC_LEN_ERR },
	{ SIW_WC_LOC_PROT_ERR, IBV_WC_LOC_PROT_ERR },
	{ SIW_WC_LOC_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR },
	{ SIW_WC_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR },
	{ SIW_WC_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR },
	{ SIW_WC_LOC_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR },
	{ SIW_WC_REM_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR },
	{ SIW_WC_REM_INV_REQ_ERR, IBV_WC_REM_INV_REQ_ERR },
	{ SIW_WC_GENERAL_ERR, IBV_WC_GENERAL_ERR }
};

static inline void copy_cqe(struct siw_cqe *cqe, struct ibv_wc *wc)
{
	wc->wr_id = cqe->id;
	wc->byte_len = cqe->bytes;

	/* No immediate data supported yet */
	wc->wc_flags = 0;
	wc->imm_data = 0;

	wc->vendor_err = 0;
	wc->opcode = map_cqe_opcode[cqe->opcode].base;
	wc->status = map_cqe_status[cqe->status].base;
	wc->qp_num = (uint32_t)cqe->qp_id;
}

static int siw_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc)
{
	struct siw_cq *cq = cq_base2siw(ibcq);
	int new = 0;

	pthread_spin_lock(&cq->lock);

	for (; num_entries--; wc++) {
		struct siw_cqe *cqe = &cq->queue[cq->cq_get % cq->num_cqe];
		atomic_uchar *fp = (atomic_uchar *)&cqe->flags;

		if (atomic_load(fp) & SIW_WQE_VALID) {
			copy_cqe(cqe, wc);
			atomic_store(fp, 0);
			cq->cq_get++;
			new++;
		} else
			break;
	}
	pthread_spin_unlock(&cq->lock);

	return new;
}

static const struct verbs_context_ops siw_context_ops = {
	.alloc_pd = siw_alloc_pd,
	.async_event = siw_async_event,
	.create_ah = siw_create_ah,
	.create_cq = siw_create_cq,
	.create_qp = siw_create_qp,
	.create_srq = siw_create_srq,
	.dealloc_pd = siw_free_pd,
	.dereg_mr = siw_dereg_mr,
	.destroy_ah = siw_destroy_ah,
	.destroy_cq = siw_destroy_cq,
	.destroy_qp = siw_destroy_qp,
	.destroy_srq = siw_destroy_srq,
	.free_context = siw_free_context,
	.modify_qp = siw_modify_qp,
	.modify_srq = siw_modify_srq,
	.poll_cq = siw_poll_cq,
	.post_recv = siw_post_recv,
	.post_send = siw_post_send,
	.post_srq_recv = siw_post_srq_recv,
	.query_device = siw_query_device,
	.query_port = siw_query_port,
	.query_qp = siw_query_qp,
	.reg_mr = siw_reg_mr,
	.req_notify_cq = siw_notify_cq,
	.resize_cq = siw_resize_cq,
};

static struct verbs_context *siw_alloc_context(struct ibv_device *base_dev,
					       int fd, void *pdata)
{
	struct siw_context *ctx;
	struct ibv_get_context cmd = {};
	struct siw_cmd_alloc_context_resp resp = {};

	ctx = verbs_init_and_alloc_context(base_dev, fd, ctx, base_ctx,
					   RDMA_DRIVER_SIW);
	if (!ctx)
		return NULL;

	if (ibv_cmd_get_context(&ctx->base_ctx, &cmd, sizeof(cmd),
				&resp.ibv_resp, sizeof(resp))) {
		verbs_uninit_context(&ctx->base_ctx);
		free(ctx);

		return NULL;
	}
	verbs_set_ops(&ctx->base_ctx, &siw_context_ops);
	ctx->dev_id = resp.dev_id;

	return &ctx->base_ctx;
}

static void siw_free_context(struct ibv_context *ibv_ctx)
{
	struct siw_context *ctx = ctx_ibv2siw(ibv_ctx);

	verbs_uninit_context(&ctx->base_ctx);
	free(ctx);
}

static struct verbs_device *siw_device_alloc(struct verbs_sysfs_dev *unused)
{
	struct siw_device *dev;

	dev = calloc(1, sizeof(*dev));
	if (!dev)
		return NULL;

	return &dev->base_dev;
}

static void siw_device_free(struct verbs_device *vdev)
{
	struct siw_device *dev =
		container_of(vdev, struct siw_device, base_dev);
	free(dev);
}

static const struct verbs_match_ent rnic_table[] = {
	VERBS_DRIVER_ID(RDMA_DRIVER_SIW),
	{},
};

static const struct verbs_device_ops siw_dev_ops = {
	.name = "siw",
	.match_min_abi_version = SIW_ABI_VERSION,
	.match_max_abi_version = SIW_ABI_VERSION,
	.match_table = rnic_table,
	.alloc_device = siw_device_alloc,
	.uninit_device = siw_device_free,
	.alloc_context = siw_alloc_context,
};

PROVIDER_DRIVER(siw, siw_dev_ops);