/* * Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef RING_SIMPLE_H #define RING_SIMPLE_H #include "ring_slave.h" #include "vma/dev/gro_mgr.h" #include "vma/dev/net_device_table_mgr.h" struct cq_moderation_info { uint32_t period; uint32_t count; uint64_t packets; uint64_t bytes; uint64_t prev_packets; uint64_t prev_bytes; uint32_t missed_rounds; }; /** * @class ring simple * * Object to manages the QP and CQ operation * This object is used for Rx & Tx at the same time * */ class ring_simple : public ring_slave { public: ring_simple(int if_index, ring* parent, ring_type_t type); virtual ~ring_simple(); virtual int request_notification(cq_type_t cq_type, uint64_t poll_sn); virtual int poll_and_process_element_rx(uint64_t* p_cq_poll_sn, void* pv_fd_ready_array = NULL); virtual void adapt_cq_moderation(); virtual bool reclaim_recv_buffers(descq_t *rx_reuse); virtual bool reclaim_recv_buffers(mem_buf_desc_t* rx_reuse_lst); bool reclaim_recv_buffers_no_lock(mem_buf_desc_t* rx_reuse_lst); // No locks virtual int reclaim_recv_single_buffer(mem_buf_desc_t* rx_reuse); // No locks virtual int socketxtreme_poll(struct vma_completion_t *vma_completions, unsigned int ncompletions, int flags); virtual int drain_and_proccess(); virtual int wait_for_notification_and_process_element(int cq_channel_fd, uint64_t* p_cq_poll_sn, void* pv_fd_ready_array = NULL); // Tx completion handling at the qp_mgr level is just re listing the desc+data buffer in the free lists void mem_buf_desc_completion_with_error_tx(mem_buf_desc_t* p_tx_wc_buf_desc); // Assume locked... void mem_buf_desc_completion_with_error_rx(mem_buf_desc_t* p_rx_wc_buf_desc); // Assume locked... void mem_buf_desc_return_to_owner_tx(mem_buf_desc_t* p_mem_buf_desc); void mem_buf_desc_return_to_owner_rx(mem_buf_desc_t* p_mem_buf_desc, void* pv_fd_ready_array = NULL); inline int send_buffer(vma_ibv_send_wr* p_send_wqe, vma_wr_tx_packet_attr attr); virtual bool is_up(); void start_active_qp_mgr(); void stop_active_qp_mgr(); virtual mem_buf_desc_t* mem_buf_tx_get(ring_user_id_t id, bool b_block, int n_num_mem_bufs = 1); virtual int mem_buf_tx_release(mem_buf_desc_t* p_mem_buf_desc_list, bool b_accounting, bool trylock = false); virtual void send_ring_buffer(ring_user_id_t id, vma_ibv_send_wr* p_send_wqe, vma_wr_tx_packet_attr attr); virtual void send_lwip_buffer(ring_user_id_t id, vma_ibv_send_wr* p_send_wqe, vma_wr_tx_packet_attr attr); virtual void mem_buf_desc_return_single_to_owner_tx(mem_buf_desc_t* p_mem_buf_desc); virtual bool get_hw_dummy_send_support(ring_user_id_t id, vma_ibv_send_wr* p_send_wqe); inline void convert_hw_time_to_system_time(uint64_t hwtime, struct timespec* systime) { m_p_ib_ctx->convert_hw_time_to_system_time(hwtime, systime); } inline uint32_t get_qpn() const { return (m_p_l2_addr ? ((IPoIB_addr *)m_p_l2_addr)->get_qpn() : 0); } virtual uint32_t get_underly_qpn() { return m_p_qp_mgr->get_underly_qpn(); } virtual int modify_ratelimit(struct vma_rate_limit_t &rate_limit); virtual int get_tx_channel_fd() const { return m_p_tx_comp_event_channel ? m_p_tx_comp_event_channel->fd : -1; }; virtual uint32_t get_max_inline_data(); #ifdef DEFINED_TSO virtual uint32_t get_max_send_sge(void); virtual uint32_t get_max_payload_sz(void); virtual uint16_t get_max_header_sz(void); virtual uint32_t get_tx_lkey(ring_user_id_t id) { NOT_IN_USE(id); return m_tx_lkey; } virtual bool is_tso(void); #endif /* DEFINED_TSO */ struct ibv_comp_channel* get_tx_comp_event_channel() { return m_p_tx_comp_event_channel; } int get_ring_descriptors(vma_mlx_hw_device_data &data); void modify_cq_moderation(uint32_t period, uint32_t count); int ack_and_arm_cq(cq_type_t cq_type); friend class cq_mgr; friend class cq_mgr_mlx5; friend class qp_mgr; friend class qp_mgr_eth_mlx5; friend class rfs; friend class rfs_uc; friend class rfs_uc_tcp_gro; friend class rfs_mc; friend class ring_bond; protected: virtual qp_mgr* create_qp_mgr(const ib_ctx_handler* ib_ctx, uint8_t port_num, struct ibv_comp_channel* p_rx_comp_event_channel) = 0; void create_resources(); virtual void init_tx_buffers(uint32_t count); virtual void inc_cq_moderation_stats(size_t sz_data); #ifdef DEFINED_TSO void set_tx_num_wr(int32_t num_wr) { m_tx_num_wr = m_tx_num_wr_free = num_wr; } #endif /* DEFINED_TSO */ uint32_t get_tx_num_wr() { return m_tx_num_wr; } uint32_t get_mtu() { return m_mtu; } ib_ctx_handler* m_p_ib_ctx; qp_mgr* m_p_qp_mgr; struct cq_moderation_info m_cq_moderation_info; cq_mgr* m_p_cq_mgr_rx; cq_mgr* m_p_cq_mgr_tx; private: bool is_socketxtreme(void) {return m_socketxtreme.active;} void put_ec(struct ring_ec *ec) { m_socketxtreme.lock_ec_list.lock(); list_add_tail(&ec->list, &m_socketxtreme.ec_list); m_socketxtreme.lock_ec_list.unlock(); } void del_ec(struct ring_ec *ec) { m_socketxtreme.lock_ec_list.lock(); list_del_init(&ec->list); ec->clear(); m_socketxtreme.lock_ec_list.unlock(); } inline ring_ec* get_ec(void) { struct ring_ec *ec = NULL; m_socketxtreme.lock_ec_list.lock(); if (!list_empty(&m_socketxtreme.ec_list)) { ec = list_entry(m_socketxtreme.ec_list.next, struct ring_ec, list); list_del_init(&ec->list); } m_socketxtreme.lock_ec_list.unlock(); return ec; } struct vma_completion_t *get_comp(void) { return m_socketxtreme.completion; } struct { /* queue of event completion elements * this queue is stored events related different sockinfo (sockets) * In current implementation every sockinfo (socket) can have single event * in this queue */ struct list_head ec_list; /* Thread-safety lock for get/put operations under the queue */ lock_spin lock_ec_list; /* This completion is introduced to process events directly w/o * storing them in the queue of event completion elements */ struct vma_completion_t* completion; /* This flag is enabled in case socketxtreme_poll() call is done */ bool active; } m_socketxtreme; inline void send_status_handler(int ret, vma_ibv_send_wr* p_send_wqe); inline mem_buf_desc_t* get_tx_buffers(uint32_t n_num_mem_bufs); inline int put_tx_buffers(mem_buf_desc_t* buff_list); inline int put_tx_single_buffer(mem_buf_desc_t* buff); inline void return_to_global_pool(); bool is_available_qp_wr(bool b_block); void save_l2_address(const L2_address* p_l2_addr) { delete_l2_address(); m_p_l2_addr = p_l2_addr->clone(); }; void delete_l2_address() { if (m_p_l2_addr) delete m_p_l2_addr; m_p_l2_addr = NULL; }; lock_mutex m_lock_ring_tx_buf_wait; uint32_t m_tx_num_bufs; uint32_t m_tx_num_wr; int32_t m_tx_num_wr_free; bool m_b_qp_tx_first_flushed_completion_handled; uint32_t m_missing_buf_ref_count; uint32_t m_tx_lkey; // this is the registered memory lkey for a given specific device for the buffer pool use gro_mgr m_gro_mgr; bool m_up; struct ibv_comp_channel* m_p_rx_comp_event_channel; struct ibv_comp_channel* m_p_tx_comp_event_channel; L2_address* m_p_l2_addr; uint32_t m_mtu; #ifdef DEFINED_TSO struct { /* Maximum length of TCP payload for TSO */ uint32_t max_payload_sz; /* Maximum length of header for TSO */ uint16_t max_header_sz; } m_tso; #endif /* DEFINED_TSO */ }; class ring_eth : public ring_simple { public: ring_eth(int if_index, ring* parent = NULL, ring_type_t type = RING_ETH, bool call_create_res = true): ring_simple(if_index, parent, type) { net_device_val_eth* p_ndev = dynamic_cast(g_p_net_device_table_mgr->get_net_device_val(m_parent->get_if_index())); if (p_ndev) { m_partition = p_ndev->get_vlan(); /* Do resource initialization for * ring_eth_direct, ring_eth_cb inside related * constructors because * they use own create_qp_mgr() methods */ if (call_create_res) { create_resources(); } } } protected: virtual qp_mgr* create_qp_mgr(const ib_ctx_handler* ib_ctx, uint8_t port_num, struct ibv_comp_channel* p_rx_comp_event_channel); }; class ring_ib : public ring_simple { public: ring_ib(int if_index, ring* parent = NULL): ring_simple(if_index, parent, RING_IB) { net_device_val_ib* p_ndev = dynamic_cast(g_p_net_device_table_mgr->get_net_device_val(m_parent->get_if_index())); if (p_ndev) { m_partition = p_ndev->get_pkey(); create_resources(); } } protected: virtual qp_mgr* create_qp_mgr(const ib_ctx_handler* ib_ctx, uint8_t port_num, struct ibv_comp_channel* p_rx_comp_event_channel); }; #endif //RING_SIMPLE_H