Blob Blame History Raw
/*
 * Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#ifndef RING_SIMPLE_H
#define RING_SIMPLE_H

#include "ring_slave.h"

#include "vma/dev/gro_mgr.h"
#include "vma/dev/net_device_table_mgr.h"

struct cq_moderation_info {
	uint32_t period;
	uint32_t count;
	uint64_t packets;
	uint64_t bytes;
	uint64_t prev_packets;
	uint64_t prev_bytes;
	uint32_t missed_rounds;
};

/**
 * @class ring simple
 *
 * Object to manages the QP and CQ operation
 * This object is used for Rx & Tx at the same time
 *
 */
class ring_simple : public ring_slave
{
public:
	ring_simple(int if_index, ring* parent, ring_type_t type);
	virtual ~ring_simple();

	virtual int		request_notification(cq_type_t cq_type, uint64_t poll_sn);
	virtual int		poll_and_process_element_rx(uint64_t* p_cq_poll_sn, void* pv_fd_ready_array = NULL);
	virtual void		adapt_cq_moderation();
	virtual bool		reclaim_recv_buffers(descq_t *rx_reuse);
	virtual bool		reclaim_recv_buffers(mem_buf_desc_t* rx_reuse_lst);
	bool			reclaim_recv_buffers_no_lock(mem_buf_desc_t* rx_reuse_lst); // No locks
	virtual int		reclaim_recv_single_buffer(mem_buf_desc_t* rx_reuse); // No locks
	virtual int 		socketxtreme_poll(struct vma_completion_t *vma_completions, unsigned int ncompletions, int flags);	
	virtual int		drain_and_proccess();
	virtual int		wait_for_notification_and_process_element(int cq_channel_fd, uint64_t* p_cq_poll_sn, void* pv_fd_ready_array = NULL);
	// Tx completion handling at the qp_mgr level is just re listing the desc+data buffer in the free lists
	void			mem_buf_desc_completion_with_error_tx(mem_buf_desc_t* p_tx_wc_buf_desc); // Assume locked...
	void			mem_buf_desc_completion_with_error_rx(mem_buf_desc_t* p_rx_wc_buf_desc); // Assume locked...
	void			mem_buf_desc_return_to_owner_tx(mem_buf_desc_t* p_mem_buf_desc);
	void			mem_buf_desc_return_to_owner_rx(mem_buf_desc_t* p_mem_buf_desc, void* pv_fd_ready_array = NULL);
	inline int		send_buffer(vma_ibv_send_wr* p_send_wqe, vma_wr_tx_packet_attr attr);
	virtual bool		is_up();
	void			start_active_qp_mgr();
	void			stop_active_qp_mgr();
	virtual mem_buf_desc_t*	mem_buf_tx_get(ring_user_id_t id, bool b_block, int n_num_mem_bufs = 1);
	virtual int		mem_buf_tx_release(mem_buf_desc_t* p_mem_buf_desc_list, bool b_accounting, bool trylock = false);
	virtual void		send_ring_buffer(ring_user_id_t id, vma_ibv_send_wr* p_send_wqe, vma_wr_tx_packet_attr attr);
	virtual void		send_lwip_buffer(ring_user_id_t id, vma_ibv_send_wr* p_send_wqe, vma_wr_tx_packet_attr attr);
	virtual void		mem_buf_desc_return_single_to_owner_tx(mem_buf_desc_t* p_mem_buf_desc);
	virtual bool 		get_hw_dummy_send_support(ring_user_id_t id, vma_ibv_send_wr* p_send_wqe);
	inline void 		convert_hw_time_to_system_time(uint64_t hwtime, struct timespec* systime) { m_p_ib_ctx->convert_hw_time_to_system_time(hwtime, systime); }
	inline uint32_t		get_qpn() const { return (m_p_l2_addr ? ((IPoIB_addr *)m_p_l2_addr)->get_qpn() : 0); }
	virtual uint32_t	get_underly_qpn() { return m_p_qp_mgr->get_underly_qpn(); }
	virtual int		modify_ratelimit(struct vma_rate_limit_t &rate_limit);
	virtual int		get_tx_channel_fd() const { return m_p_tx_comp_event_channel ? m_p_tx_comp_event_channel->fd : -1; };
        virtual uint32_t	get_max_inline_data();
#ifdef DEFINED_TSO
        virtual uint32_t	get_max_send_sge(void);
        virtual uint32_t	get_max_payload_sz(void);
        virtual uint16_t	get_max_header_sz(void);
	virtual uint32_t	get_tx_lkey(ring_user_id_t id) { NOT_IN_USE(id); return m_tx_lkey; }
        virtual bool		is_tso(void);
#endif /* DEFINED_TSO */

	struct ibv_comp_channel* get_tx_comp_event_channel() { return m_p_tx_comp_event_channel; }
	int			get_ring_descriptors(vma_mlx_hw_device_data &data);
	void			modify_cq_moderation(uint32_t period, uint32_t count);
	int			ack_and_arm_cq(cq_type_t cq_type);
	friend class cq_mgr;
	friend class cq_mgr_mlx5;
	friend class qp_mgr;
	friend class qp_mgr_eth_mlx5;
	friend class rfs;
	friend class rfs_uc;
	friend class rfs_uc_tcp_gro;
	friend class rfs_mc;
	friend class ring_bond;

protected:
	virtual qp_mgr*		create_qp_mgr(const ib_ctx_handler* ib_ctx, uint8_t port_num, struct ibv_comp_channel* p_rx_comp_event_channel) = 0;
	void			create_resources();
	virtual void		init_tx_buffers(uint32_t count);
	virtual void		inc_cq_moderation_stats(size_t sz_data);
#ifdef DEFINED_TSO
        void                    set_tx_num_wr(int32_t num_wr) { m_tx_num_wr = m_tx_num_wr_free = num_wr; }
#endif /* DEFINED_TSO */
	uint32_t		get_tx_num_wr() { return m_tx_num_wr; }
	uint32_t		get_mtu() { return m_mtu; }

	ib_ctx_handler*		m_p_ib_ctx;
	qp_mgr*			m_p_qp_mgr;
	struct cq_moderation_info m_cq_moderation_info;
	cq_mgr*			m_p_cq_mgr_rx;
	cq_mgr*			m_p_cq_mgr_tx;
private:
	bool is_socketxtreme(void) {return m_socketxtreme.active;}

	void put_ec(struct ring_ec *ec)
	{
		m_socketxtreme.lock_ec_list.lock();
		list_add_tail(&ec->list, &m_socketxtreme.ec_list);
		m_socketxtreme.lock_ec_list.unlock();
	}

	void del_ec(struct ring_ec *ec)
	{
		m_socketxtreme.lock_ec_list.lock();
		list_del_init(&ec->list);
		ec->clear();
		m_socketxtreme.lock_ec_list.unlock();
	}

	inline ring_ec* get_ec(void)
	{
		struct ring_ec *ec = NULL;

		m_socketxtreme.lock_ec_list.lock();
		if (!list_empty(&m_socketxtreme.ec_list)) {
			ec = list_entry(m_socketxtreme.ec_list.next, struct ring_ec, list);
			list_del_init(&ec->list);
		}
		m_socketxtreme.lock_ec_list.unlock();
		return ec;
	}

	struct vma_completion_t *get_comp(void)
	{
		return m_socketxtreme.completion;
	}

	struct {
		/* queue of event completion elements
		 * this queue is stored events related different sockinfo (sockets)
		 * In current implementation every sockinfo (socket) can have single event
		 * in this queue
		 */
		struct list_head         ec_list;

		/* Thread-safety lock for get/put operations under the queue */
		lock_spin                lock_ec_list;

		/* This completion is introduced to process events directly w/o
		 * storing them in the queue of event completion elements
		 */
		struct vma_completion_t* completion;

		/* This flag is enabled in case socketxtreme_poll() call is done */
		bool                     active;
	} m_socketxtreme;

	inline void		send_status_handler(int ret, vma_ibv_send_wr* p_send_wqe);
	inline mem_buf_desc_t*	get_tx_buffers(uint32_t n_num_mem_bufs);
	inline int		put_tx_buffers(mem_buf_desc_t* buff_list);
	inline int		put_tx_single_buffer(mem_buf_desc_t* buff);
	inline void		return_to_global_pool();
	bool			is_available_qp_wr(bool b_block);
	void			save_l2_address(const L2_address* p_l2_addr) { delete_l2_address(); m_p_l2_addr = p_l2_addr->clone(); };
	void			delete_l2_address() { if (m_p_l2_addr) delete m_p_l2_addr; m_p_l2_addr = NULL; };

	lock_mutex		m_lock_ring_tx_buf_wait;
	uint32_t		m_tx_num_bufs;
	uint32_t		m_tx_num_wr;
	int32_t			m_tx_num_wr_free;
	bool			m_b_qp_tx_first_flushed_completion_handled;
	uint32_t		m_missing_buf_ref_count;
	uint32_t		m_tx_lkey; // this is the registered memory lkey for a given specific device for the buffer pool use
	gro_mgr			m_gro_mgr;
	bool			m_up;
	struct ibv_comp_channel* m_p_rx_comp_event_channel;
	struct ibv_comp_channel* m_p_tx_comp_event_channel;
	L2_address*		m_p_l2_addr;
	uint32_t		m_mtu;

#ifdef DEFINED_TSO
	struct {
		/* Maximum length of TCP payload for TSO */
		uint32_t max_payload_sz;

		/* Maximum length of header for TSO */
		uint16_t max_header_sz;
	} m_tso;
#endif /* DEFINED_TSO */
};

class ring_eth : public ring_simple
{
public:
	ring_eth(int if_index,
			ring* parent = NULL, ring_type_t type = RING_ETH, bool call_create_res = true):
		ring_simple(if_index, parent, type) {
		net_device_val_eth* p_ndev =
				dynamic_cast<net_device_val_eth *>(g_p_net_device_table_mgr->get_net_device_val(m_parent->get_if_index()));
		if (p_ndev) {
			m_partition = p_ndev->get_vlan();

			/* Do resource initialization for 
			 * ring_eth_direct, ring_eth_cb inside related
			 * constructors because
			 * they use own create_qp_mgr() methods
			 */
			if (call_create_res) {
				create_resources();
			}
		}
	}
protected:
	virtual qp_mgr* create_qp_mgr(const ib_ctx_handler* ib_ctx, uint8_t port_num, struct ibv_comp_channel* p_rx_comp_event_channel);
};

class ring_ib : public ring_simple
{
public:
	ring_ib(int if_index,
			ring* parent = NULL):
		ring_simple(if_index, parent, RING_IB) {
		net_device_val_ib* p_ndev =
				dynamic_cast<net_device_val_ib *>(g_p_net_device_table_mgr->get_net_device_val(m_parent->get_if_index()));
		if (p_ndev) {
			m_partition = p_ndev->get_pkey();
			create_resources();
		}
	}
protected:
	virtual qp_mgr* create_qp_mgr(const ib_ctx_handler* ib_ctx, uint8_t port_num, struct ibv_comp_channel* p_rx_comp_event_channel);
};

#endif //RING_SIMPLE_H