Blame src/vma/dev/net_device_val.cpp

Packit 6d2c1b
/*
Packit 6d2c1b
 * Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved.
Packit 6d2c1b
 *
Packit 6d2c1b
 * This software is available to you under a choice of one of two
Packit 6d2c1b
 * licenses.  You may choose to be licensed under the terms of the GNU
Packit 6d2c1b
 * General Public License (GPL) Version 2, available from the file
Packit 6d2c1b
 * COPYING in the main directory of this source tree, or the
Packit 6d2c1b
 * BSD license below:
Packit 6d2c1b
 *
Packit 6d2c1b
 *     Redistribution and use in source and binary forms, with or
Packit 6d2c1b
 *     without modification, are permitted provided that the following
Packit 6d2c1b
 *     conditions are met:
Packit 6d2c1b
 *
Packit 6d2c1b
 *      - Redistributions of source code must retain the above
Packit 6d2c1b
 *        copyright notice, this list of conditions and the following
Packit 6d2c1b
 *        disclaimer.
Packit 6d2c1b
 *
Packit 6d2c1b
 *      - Redistributions in binary form must reproduce the above
Packit 6d2c1b
 *        copyright notice, this list of conditions and the following
Packit 6d2c1b
 *        disclaimer in the documentation and/or other materials
Packit 6d2c1b
 *        provided with the distribution.
Packit 6d2c1b
 *
Packit 6d2c1b
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Packit 6d2c1b
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Packit 6d2c1b
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Packit 6d2c1b
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
Packit 6d2c1b
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
Packit 6d2c1b
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
Packit 6d2c1b
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
Packit 6d2c1b
 * SOFTWARE.
Packit 6d2c1b
 */
Packit 6d2c1b
Packit 6d2c1b
Packit 6d2c1b
Packit 6d2c1b
#include <string.h>
Packit 6d2c1b
#include <ifaddrs.h>
Packit 6d2c1b
#include <sys/epoll.h>
Packit 6d2c1b
#include <linux/if_infiniband.h>
Packit 6d2c1b
#include <linux/if_ether.h>
Packit 6d2c1b
#include <linux/rtnetlink.h>
Packit 6d2c1b
#include <linux/netlink.h>
Packit 6d2c1b
#include <linux/if_tun.h>
Packit 6d2c1b
#include <sys/epoll.h>
Packit 6d2c1b
Packit 6d2c1b
#include "utils/bullseye.h"
Packit 6d2c1b
#include "vma/util/if.h"
Packit 6d2c1b
#include "vma/dev/net_device_val.h"
Packit 6d2c1b
#include "vma/util/vtypes.h"
Packit 6d2c1b
#include "vma/util/utils.h"
Packit 6d2c1b
#include "vma/util/valgrind.h"
Packit 6d2c1b
#include "vma/event/event_handler_manager.h"
Packit 6d2c1b
#include "vma/proto/L2_address.h"
Packit 6d2c1b
#include "vma/dev/ib_ctx_handler_collection.h"
Packit 6d2c1b
#include "vma/dev/ring_tap.h"
Packit 6d2c1b
#include "vma/dev/ring_simple.h"
Packit 6d2c1b
#include "vma/dev/ring_eth_cb.h"
Packit 6d2c1b
#include "vma/dev/ring_eth_direct.h"
Packit 6d2c1b
#include "vma/dev/ring_slave.h"
Packit 6d2c1b
#include "vma/dev/ring_bond.h"
Packit 6d2c1b
#include "vma/sock/sock-redirect.h"
Packit 6d2c1b
#include "vma/dev/net_device_table_mgr.h"
Packit 6d2c1b
#include "vma/proto/neighbour_table_mgr.h"
Packit 6d2c1b
#include "ring_profile.h"
Packit 6d2c1b
Packit 6d2c1b
#ifdef HAVE_LIBNL3
Packit 6d2c1b
#include <netlink/route/link/vlan.h>
Packit 6d2c1b
#endif
Packit 6d2c1b
Packit 6d2c1b
#define MODULE_NAME             "ndv"
Packit 6d2c1b
Packit 6d2c1b
#define nd_logpanic           __log_panic
Packit 6d2c1b
#define nd_logerr             __log_err
Packit 6d2c1b
#define nd_logwarn            __log_warn
Packit 6d2c1b
#define nd_loginfo            __log_info
Packit 6d2c1b
#define nd_logdbg             __log_info_dbg
Packit 6d2c1b
#define nd_logfunc            __log_info_func
Packit 6d2c1b
#define nd_logfuncall         __log_info_funcall
Packit 6d2c1b
Packit 6d2c1b
ring_alloc_logic_attr::ring_alloc_logic_attr():
Packit 6d2c1b
				m_ring_alloc_logic(RING_LOGIC_PER_INTERFACE),
Packit 6d2c1b
				m_ring_profile_key(0),
Packit 6d2c1b
				m_user_id_key(0)
Packit 6d2c1b
{
Packit 6d2c1b
	m_mem_desc.iov_base = NULL;
Packit 6d2c1b
	m_mem_desc.iov_len = 0;
Packit 6d2c1b
	init();
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
ring_alloc_logic_attr::ring_alloc_logic_attr(ring_logic_t ring_logic):
Packit 6d2c1b
				m_ring_alloc_logic(ring_logic),
Packit 6d2c1b
				m_ring_profile_key(0),
Packit 6d2c1b
				m_user_id_key(0)
Packit 6d2c1b
{
Packit 6d2c1b
	m_mem_desc.iov_base = NULL;
Packit 6d2c1b
	m_mem_desc.iov_len = 0;
Packit 6d2c1b
	init();
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
ring_alloc_logic_attr::ring_alloc_logic_attr(const ring_alloc_logic_attr &other):
Packit 6d2c1b
	m_hash(other.m_hash),
Packit 6d2c1b
	m_ring_alloc_logic(other.m_ring_alloc_logic),
Packit 6d2c1b
	m_ring_profile_key(other.m_ring_profile_key),
Packit 6d2c1b
	m_user_id_key(other.m_user_id_key),
Packit 6d2c1b
	m_mem_desc(other.m_mem_desc)
Packit 6d2c1b
{
Packit 6d2c1b
	snprintf(m_str, RING_ALLOC_STR_SIZE, "%s", other.m_str);
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void ring_alloc_logic_attr::init()
Packit 6d2c1b
{
Packit 6d2c1b
	size_t h = 5381;
Packit 6d2c1b
	int c;
Packit 6d2c1b
	char buff[RING_ALLOC_STR_SIZE];
Packit 6d2c1b
Packit 6d2c1b
	snprintf(m_str, RING_ALLOC_STR_SIZE,
Packit 6d2c1b
		 "allocation logic %d profile %d key %ld user address %p "
Packit 6d2c1b
		 "user length %zd", m_ring_alloc_logic, m_ring_profile_key,
Packit 6d2c1b
		 m_user_id_key, m_mem_desc.iov_base, m_mem_desc.iov_len);
Packit 6d2c1b
	snprintf(buff, RING_ALLOC_STR_SIZE, "%d%d%ld%p%zd", m_ring_alloc_logic,
Packit 6d2c1b
		 m_ring_profile_key, m_user_id_key, m_mem_desc.iov_base,
Packit 6d2c1b
		 m_mem_desc.iov_len);
Packit 6d2c1b
	const char* chr = buff;
Packit 6d2c1b
	while ((c = *chr++))
Packit 6d2c1b
		h = ((h << 5) + h) + c; /* m_hash * 33 + c */
Packit 6d2c1b
	m_hash = h;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void ring_alloc_logic_attr::set_ring_alloc_logic(ring_logic_t logic)
Packit 6d2c1b
{
Packit 6d2c1b
	if (m_ring_alloc_logic != logic) {
Packit 6d2c1b
		m_ring_alloc_logic = logic;
Packit 6d2c1b
		init();
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void ring_alloc_logic_attr::set_ring_profile_key(vma_ring_profile_key profile)
Packit 6d2c1b
{
Packit 6d2c1b
	if (m_ring_profile_key != profile) {
Packit 6d2c1b
		m_ring_profile_key = profile;
Packit 6d2c1b
		init();
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void ring_alloc_logic_attr::set_memory_descriptor(iovec &mem_desc)
Packit 6d2c1b
{
Packit 6d2c1b
	if (m_mem_desc.iov_base != mem_desc.iov_base ||
Packit 6d2c1b
	    m_mem_desc.iov_len != mem_desc.iov_len) {
Packit 6d2c1b
		m_mem_desc = mem_desc;
Packit 6d2c1b
		init();
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void ring_alloc_logic_attr::set_user_id_key(uint64_t user_id_key)
Packit 6d2c1b
{
Packit 6d2c1b
	if (m_user_id_key != user_id_key) {
Packit 6d2c1b
		m_user_id_key = user_id_key;
Packit 6d2c1b
		init();
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
net_device_val::net_device_val(struct net_device_val_desc *desc) : m_lock("net_device_val lock")
Packit 6d2c1b
{
Packit 6d2c1b
	bool valid = false;
Packit 6d2c1b
	ib_ctx_handler* ib_ctx;
Packit 6d2c1b
	struct nlmsghdr *nl_msg = NULL;
Packit 6d2c1b
	struct ifinfomsg *nl_msgdata = NULL;
Packit 6d2c1b
	int nl_attrlen;
Packit 6d2c1b
	struct rtattr *nl_attr;
Packit 6d2c1b
Packit 6d2c1b
	m_if_idx = 0;
Packit 6d2c1b
	m_if_link = 0;
Packit 6d2c1b
	m_type = 0;
Packit 6d2c1b
	m_flags = 0;
Packit 6d2c1b
	m_mtu = 0;
Packit 6d2c1b
	m_state = INVALID;
Packit 6d2c1b
	m_p_L2_addr = NULL;
Packit 6d2c1b
	m_p_br_addr = NULL;
Packit 6d2c1b
	m_bond = NO_BOND;
Packit 6d2c1b
	m_if_active = 0;
Packit 6d2c1b
	m_bond_xmit_hash_policy = XHP_LAYER_2;
Packit 6d2c1b
	m_bond_fail_over_mac = 0;
Packit 6d2c1b
	m_transport_type = VMA_TRANSPORT_UNKNOWN;
Packit 6d2c1b
Packit 6d2c1b
	if (NULL == desc) {
Packit 6d2c1b
		nd_logerr("Invalid net_device_val name=%s", "NA");
Packit 6d2c1b
		m_state = INVALID;
Packit 6d2c1b
		return;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	nl_msg = desc->nl_msg;
Packit 6d2c1b
	nl_msgdata = (struct ifinfomsg *)NLMSG_DATA(nl_msg);
Packit 6d2c1b
Packit 6d2c1b
	nl_attr = (struct rtattr *)IFLA_RTA(nl_msgdata);
Packit 6d2c1b
	nl_attrlen = IFLA_PAYLOAD(nl_msg);
Packit 6d2c1b
Packit 6d2c1b
	set_type(nl_msgdata->ifi_type);
Packit 6d2c1b
	set_if_idx(nl_msgdata->ifi_index);
Packit 6d2c1b
	set_flags(nl_msgdata->ifi_flags);
Packit 6d2c1b
	while (RTA_OK(nl_attr, nl_attrlen)) {
Packit 6d2c1b
		char *nl_attrdata = (char *)RTA_DATA(nl_attr);
Packit 6d2c1b
		size_t nl_attrpayload = RTA_PAYLOAD(nl_attr);
Packit 6d2c1b
Packit 6d2c1b
		switch (nl_attr->rta_type) {
Packit 6d2c1b
		case IFLA_MTU:
Packit 6d2c1b
			set_mtu(*(int32_t *)nl_attrdata);
Packit 6d2c1b
			break;
Packit 6d2c1b
		case IFLA_LINK:
Packit 6d2c1b
			set_if_link(*(int32_t *)nl_attrdata);
Packit 6d2c1b
			break;
Packit 6d2c1b
		case IFLA_IFNAME:
Packit 6d2c1b
			set_ifname(nl_attrdata);
Packit 6d2c1b
			break;
Packit 6d2c1b
		case IFLA_ADDRESS:
Packit 6d2c1b
			set_l2_if_addr((uint8_t *)nl_attrdata, nl_attrpayload);
Packit 6d2c1b
			break;
Packit 6d2c1b
		case IFLA_BROADCAST:
Packit 6d2c1b
			set_l2_bc_addr((uint8_t *)nl_attrdata, nl_attrpayload);
Packit 6d2c1b
			break;
Packit 6d2c1b
		default:
Packit 6d2c1b
			break;
Packit 6d2c1b
		}
Packit 6d2c1b
		nl_attr = RTA_NEXT(nl_attr, nl_attrlen);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	/* Valid interface should have at least one IP address */
Packit 6d2c1b
	set_ip_array();
Packit 6d2c1b
	if (m_ip.empty()) {
Packit 6d2c1b
		return;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	/* Identify device type */
Packit 6d2c1b
	if ((get_flags() & IFF_MASTER) || check_device_exist(get_ifname_link(), BOND_DEVICE_FILE)) {
Packit 6d2c1b
		verify_bonding_mode();
Packit 6d2c1b
	} else if (check_netvsc_device_exist(get_ifname_link())) {
Packit 6d2c1b
		m_bond = NETVSC;
Packit 6d2c1b
	} else {
Packit 6d2c1b
		m_bond = NO_BOND;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	set_str();
Packit 6d2c1b
Packit 6d2c1b
	nd_logdbg("Check interface '%s' (index=%d addr=%d.%d.%d.%d flags=%X)",
Packit 6d2c1b
			get_ifname(), get_if_idx(), NIPQUAD(get_local_addr()), get_flags());
Packit 6d2c1b
Packit 6d2c1b
	valid = false;
Packit 6d2c1b
	ib_ctx = g_p_ib_ctx_handler_collection->get_ib_ctx(get_ifname_link());
Packit 6d2c1b
	switch (m_bond) {
Packit 6d2c1b
	case NETVSC:
Packit 6d2c1b
		if (get_type() == ARPHRD_ETHER) {
Packit 6d2c1b
			char slave_ifname[IFNAMSIZ] = {0};
Packit 6d2c1b
			unsigned int slave_flags = 0;
Packit 6d2c1b
			/* valid = true; uncomment it is valid flow to operate w/o SRIOV */
Packit 6d2c1b
			if (get_netvsc_slave(get_ifname_link(), slave_ifname, slave_flags)) {
Packit 6d2c1b
				valid = verify_qp_creation(slave_ifname, IBV_QPT_RAW_PACKET);
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
		break;
Packit 6d2c1b
	case LAG_8023ad:
Packit 6d2c1b
	case ACTIVE_BACKUP:
Packit 6d2c1b
		// this is a bond interface (or a vlan/alias over bond), find the slaves
Packit 6d2c1b
		valid = verify_bond_ipoib_or_eth_qp_creation();
Packit 6d2c1b
		break;
Packit 6d2c1b
	default:
Packit 6d2c1b
		valid = (bool)(ib_ctx && verify_ipoib_or_eth_qp_creation(get_ifname_link()));
Packit 6d2c1b
		break;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (!valid) {
Packit 6d2c1b
		nd_logdbg("Skip interface '%s'", get_ifname());
Packit 6d2c1b
		return;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (safe_mce_sys().mtu != 0 && (int)safe_mce_sys().mtu != get_mtu()) {
Packit 6d2c1b
		nd_logwarn("Mismatch between interface %s MTU=%d and VMA_MTU=%d."
Packit 6d2c1b
				"Make sure VMA_MTU and all offloaded interfaces MTUs match.",
Packit 6d2c1b
				get_ifname(), get_mtu(), safe_mce_sys().mtu);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	/* Set interface state after all verifications */
Packit 6d2c1b
	if (m_flags & IFF_RUNNING) {
Packit 6d2c1b
		m_state = RUNNING;
Packit 6d2c1b
	}
Packit 6d2c1b
	else {
Packit 6d2c1b
		if (m_flags & IFF_UP) {
Packit 6d2c1b
			m_state = UP;
Packit 6d2c1b
		}
Packit 6d2c1b
		else {
Packit 6d2c1b
			m_state = DOWN;
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	nd_logdbg("Use interface '%s'", get_ifname());
Packit 6d2c1b
	if (ib_ctx) {
Packit 6d2c1b
		nd_logdbg("%s ==> %s port %d (%s)",
Packit 6d2c1b
				get_ifname(),
Packit 6d2c1b
				ib_ctx->get_ibname(), get_port_from_ifname(get_ifname_link()),
Packit 6d2c1b
				(ib_ctx->is_active(get_port_from_ifname(get_ifname_link())) ? "Up" : "Down"));
Packit 6d2c1b
	} else {
Packit 6d2c1b
		nd_logdbg("%s ==> none",
Packit 6d2c1b
				get_ifname());
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
net_device_val::~net_device_val()
Packit 6d2c1b
{
Packit 6d2c1b
	auto_unlocker lock(m_lock);
Packit 6d2c1b
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter;
Packit 6d2c1b
	while ((ring_iter = m_h_ring_map.begin()) != m_h_ring_map.end()) {
Packit 6d2c1b
		delete THE_RING;
Packit 6d2c1b
		resource_allocation_key *tmp = ring_iter->first;
Packit 6d2c1b
		m_h_ring_map.erase(ring_iter);
Packit 6d2c1b
		delete tmp;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	rings_key_redirection_hash_map_t::iterator redirect_iter;
Packit 6d2c1b
	while ((redirect_iter = m_h_ring_key_redirection_map.begin()) !=
Packit 6d2c1b
		m_h_ring_key_redirection_map.end()) {
Packit 6d2c1b
		delete redirect_iter->second.first;
Packit 6d2c1b
		m_h_ring_key_redirection_map.erase(redirect_iter);
Packit 6d2c1b
	}
Packit 6d2c1b
	if (m_p_br_addr) {
Packit 6d2c1b
		delete m_p_br_addr;
Packit 6d2c1b
		m_p_br_addr = NULL;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (m_p_L2_addr) {
Packit 6d2c1b
		delete m_p_L2_addr;
Packit 6d2c1b
		m_p_L2_addr = NULL;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	slave_data_vector_t::iterator slave = m_slaves.begin();
Packit 6d2c1b
	for (; slave != m_slaves.end(); ++slave) {
Packit 6d2c1b
		delete *slave;
Packit 6d2c1b
	}
Packit 6d2c1b
	m_slaves.clear();
Packit 6d2c1b
Packit 6d2c1b
	ip_data_vector_t::iterator ip = m_ip.begin();
Packit 6d2c1b
	for (; ip != m_ip.end(); ++ip) {
Packit 6d2c1b
		delete *ip;
Packit 6d2c1b
	}
Packit 6d2c1b
	m_ip.clear();
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::set_ip_array()
Packit 6d2c1b
{
Packit 6d2c1b
	int rc = 0;
Packit 6d2c1b
	int fd = -1;
Packit 6d2c1b
	struct {
Packit 6d2c1b
		struct nlmsghdr hdr;
Packit 6d2c1b
		struct ifaddrmsg addrmsg;
Packit 6d2c1b
	} nl_req;
Packit 6d2c1b
	struct nlmsghdr *nl_msg;
Packit 6d2c1b
	int nl_msglen = 0;
Packit 6d2c1b
	char nl_res[8096];
Packit 6d2c1b
	static int _seq = 0;
Packit 6d2c1b
Packit 6d2c1b
	/* Set up the netlink socket */
Packit 6d2c1b
	fd = orig_os_api.socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
Packit 6d2c1b
	if (fd < 0) {
Packit 6d2c1b
		nd_logerr("netlink socket() creation");
Packit 6d2c1b
		return;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	/* Prepare RTM_GETADDR request */
Packit 6d2c1b
	memset(&nl_req, 0, sizeof(nl_req));
Packit 6d2c1b
	nl_req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
Packit 6d2c1b
	nl_req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
Packit 6d2c1b
	nl_req.hdr.nlmsg_type = RTM_GETADDR;
Packit 6d2c1b
	nl_req.hdr.nlmsg_seq = _seq++;
Packit 6d2c1b
	nl_req.hdr.nlmsg_pid = getpid();
Packit 6d2c1b
	nl_req.addrmsg.ifa_family = AF_INET;
Packit 6d2c1b
	nl_req.addrmsg.ifa_index = m_if_idx;
Packit 6d2c1b
Packit 6d2c1b
	/* Send the netlink request */
Packit 6d2c1b
	rc = orig_os_api.send(fd, &nl_req, nl_req.hdr.nlmsg_len, 0);
Packit 6d2c1b
	if (rc < 0) {
Packit 6d2c1b
		nd_logerr("netlink send() operation");
Packit 6d2c1b
		goto ret;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	do {
Packit 6d2c1b
		/* Receive the netlink reply */
Packit 6d2c1b
		rc = orig_os_api.recv(fd, nl_res, sizeof(nl_res), 0);
Packit 6d2c1b
		if (rc < 0) {
Packit 6d2c1b
			nd_logerr("netlink recv() operation");
Packit 6d2c1b
			goto ret;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		nl_msg = (struct nlmsghdr *)nl_res;
Packit 6d2c1b
		nl_msglen = rc;
Packit 6d2c1b
		while (NLMSG_OK(nl_msg, (size_t)nl_msglen) && (nl_msg->nlmsg_type != NLMSG_ERROR)) {
Packit 6d2c1b
			int nl_attrlen;
Packit 6d2c1b
			struct ifaddrmsg *nl_msgdata;
Packit 6d2c1b
			struct rtattr *nl_attr;
Packit 6d2c1b
			ip_data_t* p_val = NULL;
Packit 6d2c1b
Packit 6d2c1b
			nl_msgdata = (struct ifaddrmsg *)NLMSG_DATA(nl_msg);
Packit 6d2c1b
Packit 6d2c1b
			/* Process just specific if index */
Packit 6d2c1b
			if ((int)nl_msgdata->ifa_index == m_if_idx) {
Packit 6d2c1b
				nl_attr = (struct rtattr *)IFA_RTA(nl_msgdata);
Packit 6d2c1b
				nl_attrlen = IFA_PAYLOAD(nl_msg);
Packit 6d2c1b
Packit 6d2c1b
				p_val = new ip_data_t;
Packit 6d2c1b
				p_val->flags = nl_msgdata->ifa_flags;
Packit 6d2c1b
				memset(&p_val->netmask, 0, sizeof(in_addr_t));
Packit 6d2c1b
				p_val->netmask = prefix_to_netmask(nl_msgdata->ifa_prefixlen);
Packit 6d2c1b
				while (RTA_OK(nl_attr, nl_attrlen)) {
Packit 6d2c1b
					char *nl_attrdata = (char *)RTA_DATA(nl_attr);
Packit 6d2c1b
Packit 6d2c1b
					switch (nl_attr->rta_type) {
Packit 6d2c1b
					case IFA_ADDRESS:
Packit 6d2c1b
						memset(&p_val->local_addr, 0, sizeof(in_addr_t));
Packit 6d2c1b
						memcpy(&p_val->local_addr, (in_addr_t *)nl_attrdata, sizeof(in_addr_t));
Packit 6d2c1b
						break;
Packit 6d2c1b
					default:
Packit 6d2c1b
						break;
Packit 6d2c1b
					}
Packit 6d2c1b
					nl_attr = RTA_NEXT(nl_attr, nl_attrlen);
Packit 6d2c1b
				}
Packit 6d2c1b
Packit 6d2c1b
				m_ip.push_back(p_val);
Packit 6d2c1b
			}
Packit 6d2c1b
Packit 6d2c1b
			/* Check if it is the last message */
Packit 6d2c1b
			if(nl_msg->nlmsg_type == NLMSG_DONE) {
Packit 6d2c1b
				goto ret;
Packit 6d2c1b
			}
Packit 6d2c1b
			nl_msg = NLMSG_NEXT(nl_msg, nl_msglen);
Packit 6d2c1b
		}
Packit 6d2c1b
	} while (1);
Packit 6d2c1b
Packit 6d2c1b
ret:
Packit 6d2c1b
	orig_os_api.close(fd);
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::set_str()
Packit 6d2c1b
{
Packit 6d2c1b
	char str_x[BUFF_SIZE] = {0};
Packit 6d2c1b
Packit 6d2c1b
	m_str[0] = '\0';
Packit 6d2c1b
Packit 6d2c1b
	str_x[0] = '\0';
Packit 6d2c1b
	sprintf(str_x, "%d:", m_if_idx);
Packit 6d2c1b
	strcat(m_str, str_x);
Packit 6d2c1b
Packit 6d2c1b
	str_x[0] = '\0';
Packit 6d2c1b
	if (!strcmp(get_ifname(), get_ifname_link())) {
Packit 6d2c1b
		sprintf(str_x, " %s:", get_ifname());
Packit 6d2c1b
	} else {
Packit 6d2c1b
		sprintf(str_x, " %s@%s:", get_ifname(), get_ifname_link());
Packit 6d2c1b
	}
Packit 6d2c1b
	strcat(m_str, str_x);
Packit 6d2c1b
Packit 6d2c1b
	str_x[0] = '\0';
Packit 6d2c1b
	sprintf(str_x, " <%s%s%s%s%s%s%s%s%s%s%s>:",
Packit 6d2c1b
			(m_flags & IFF_UP        ? "UP," : ""),
Packit 6d2c1b
			(m_flags & IFF_RUNNING   ? "RUNNING," : ""),
Packit 6d2c1b
			(m_flags & IFF_NOARP     ? "NO_ARP," : ""),
Packit 6d2c1b
			(m_flags & IFF_LOOPBACK  ? "LOOPBACK," : ""),
Packit 6d2c1b
			(m_flags & IFF_BROADCAST ? "BROADCAST," : ""),
Packit 6d2c1b
			(m_flags & IFF_MULTICAST ? "MULTICAST," : ""),
Packit 6d2c1b
			(m_flags & IFF_MASTER    ? "MASTER," : ""),
Packit 6d2c1b
			(m_flags & IFF_SLAVE     ? "SLAVE," : ""),
Packit 6d2c1b
			(m_flags & IFF_LOWER_UP  ? "LOWER_UP," : ""),
Packit 6d2c1b
			(m_flags & IFF_DEBUG     ? "DEBUG," : ""),
Packit 6d2c1b
			(m_flags & IFF_PROMISC   ? "PROMISC," : ""));
Packit 6d2c1b
	strcat(m_str, str_x);
Packit 6d2c1b
Packit 6d2c1b
	str_x[0] = '\0';
Packit 6d2c1b
	sprintf(str_x, " mtu %d", m_mtu);
Packit 6d2c1b
	strcat(m_str, str_x);
Packit 6d2c1b
Packit 6d2c1b
	str_x[0] = '\0';
Packit 6d2c1b
	switch (m_type) {
Packit 6d2c1b
	case ARPHRD_LOOPBACK:
Packit 6d2c1b
		sprintf(str_x, " type %s", "loopback");
Packit 6d2c1b
		break;
Packit 6d2c1b
	case ARPHRD_ETHER:
Packit 6d2c1b
		sprintf(str_x, " type %s", "ether");
Packit 6d2c1b
		break;
Packit 6d2c1b
	case ARPHRD_INFINIBAND:
Packit 6d2c1b
		sprintf(str_x, " type %s", "infiniband");
Packit 6d2c1b
		break;
Packit 6d2c1b
	default:
Packit 6d2c1b
		sprintf(str_x, " type %s", "unknown");
Packit 6d2c1b
		break;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	str_x[0] = '\0';
Packit 6d2c1b
	switch (m_bond) {
Packit 6d2c1b
	case NETVSC:
Packit 6d2c1b
		sprintf(str_x, " (%s)", "netvsc");
Packit 6d2c1b
		break;
Packit 6d2c1b
	case LAG_8023ad:
Packit 6d2c1b
		sprintf(str_x, " (%s)", "lag 8023ad");
Packit 6d2c1b
		break;
Packit 6d2c1b
	case ACTIVE_BACKUP:
Packit 6d2c1b
		sprintf(str_x, " (%s)", "active backup");
Packit 6d2c1b
		break;
Packit 6d2c1b
	default:
Packit 6d2c1b
		sprintf(str_x, " (%s)", "normal");
Packit 6d2c1b
		break;
Packit 6d2c1b
	}
Packit 6d2c1b
	strcat(m_str, str_x);
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::print_val()
Packit 6d2c1b
{
Packit 6d2c1b
	size_t i = 0;
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter;
Packit 6d2c1b
Packit 6d2c1b
	set_str();
Packit 6d2c1b
	nd_logdbg("%s", m_str);
Packit 6d2c1b
Packit 6d2c1b
	nd_logdbg("  ip list: %s", (m_ip.empty() ? "empty " : ""));
Packit 6d2c1b
	for (i = 0; i < m_ip.size(); i++) {
Packit 6d2c1b
		nd_logdbg("    inet: %d.%d.%d.%d netmask: %d.%d.%d.%d flags: 0x%X",
Packit 6d2c1b
				NIPQUAD(m_ip[i]->local_addr), NIPQUAD(m_ip[i]->netmask), m_ip[i]->flags);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	nd_logdbg("  slave list: %s", (m_slaves.empty() ? "empty " : ""));
Packit 6d2c1b
	for (i = 0; i < m_slaves.size(); i++) {
Packit 6d2c1b
		char if_name[IFNAMSIZ] = {0};
Packit 6d2c1b
Packit 6d2c1b
		if_name[0] = '\0';
Packit 6d2c1b
		if_indextoname(m_slaves[i]->if_index, if_name);
Packit 6d2c1b
		nd_logdbg("    %d: %s: %s active: %d",
Packit 6d2c1b
				m_slaves[i]->if_index, if_name, m_slaves[i]->p_L2_addr->to_str().c_str(), m_slaves[i]->active);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	nd_logdbg("  ring list: %s", (m_h_ring_map.empty() ? "empty " : ""));
Packit 6d2c1b
	for (ring_iter = m_h_ring_map.begin(); ring_iter != m_h_ring_map.end(); ring_iter++) {
Packit 6d2c1b
		ring *cur_ring = ring_iter->second.first;
Packit 6d2c1b
		NOT_IN_USE(cur_ring); // Suppress --enable-opt-log=high warning
Packit 6d2c1b
		nd_logdbg("    %d: 0x%X: parent 0x%X ref %d",
Packit 6d2c1b
				cur_ring->get_if_index(), cur_ring, cur_ring->get_parent(), ring_iter->second.second);
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::set_slave_array()
Packit 6d2c1b
{
Packit 6d2c1b
	char active_slave[IFNAMSIZ] = {0}; // gather the slave data (only for active-backup)-
Packit 6d2c1b
Packit 6d2c1b
	nd_logdbg("");
Packit 6d2c1b
Packit 6d2c1b
	if (m_bond == NETVSC) {
Packit 6d2c1b
		slave_data_t* s = NULL;
Packit 6d2c1b
		unsigned int slave_flags = 0;
Packit 6d2c1b
		if (get_netvsc_slave(get_ifname_link(), active_slave, slave_flags)) {
Packit 6d2c1b
			if ((slave_flags & IFF_UP) &&
Packit 6d2c1b
					verify_qp_creation(active_slave, IBV_QPT_RAW_PACKET)) {
Packit 6d2c1b
				s = new slave_data_t(if_nametoindex(active_slave));
Packit 6d2c1b
				m_slaves.push_back(s);
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
	} else if (m_bond == NO_BOND) {
Packit 6d2c1b
		slave_data_t* s = new slave_data_t(if_nametoindex(get_ifname()));
Packit 6d2c1b
		m_slaves.push_back(s);
Packit 6d2c1b
	} else {
Packit 6d2c1b
		// bond device
Packit 6d2c1b
Packit 6d2c1b
		// get list of all slave devices
Packit 6d2c1b
		char slaves_list[IFNAMSIZ * MAX_SLAVES] = {0};
Packit 6d2c1b
		if (get_bond_slaves_name_list(get_ifname_link(), slaves_list, sizeof(slaves_list))) {
Packit 6d2c1b
			char* slave = strtok(slaves_list, " ");
Packit 6d2c1b
			while (slave) {
Packit 6d2c1b
				char* p = strchr(slave, '\n');
Packit 6d2c1b
				if (p) *p = '\0'; // Remove the tailing 'new line" char
Packit 6d2c1b
Packit 6d2c1b
				slave_data_t* s = new slave_data_t(if_nametoindex(slave));
Packit 6d2c1b
				m_slaves.push_back(s);
Packit 6d2c1b
				slave = strtok(NULL, " ");
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		// find the active slave
Packit 6d2c1b
		if (get_bond_active_slave_name(get_ifname_link(), active_slave, sizeof(active_slave))) {
Packit 6d2c1b
			m_if_active = if_nametoindex(active_slave);
Packit 6d2c1b
			nd_logdbg("found the active slave: %d: '%s'", m_if_active, active_slave);
Packit 6d2c1b
		}
Packit 6d2c1b
		else {
Packit 6d2c1b
			nd_logdbg("failed to find the active slave, Moving to LAG state");
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	bool up_and_active_slaves[m_slaves.size()];
Packit 6d2c1b
Packit 6d2c1b
	memset(up_and_active_slaves, 0, sizeof(up_and_active_slaves));
Packit 6d2c1b
Packit 6d2c1b
	if (m_bond == LAG_8023ad) {
Packit 6d2c1b
		get_up_and_active_slaves(up_and_active_slaves, m_slaves.size());
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	for (uint16_t i = 0; i < m_slaves.size(); i++) {
Packit 6d2c1b
		char if_name[IFNAMSIZ] = {0};
Packit 6d2c1b
		char base_ifname[IFNAMSIZ];
Packit 6d2c1b
Packit 6d2c1b
		if (!if_indextoname(m_slaves[i]->if_index, if_name)) {
Packit 6d2c1b
			nd_logerr("Can not find interface name by index=%d", m_slaves[i]->if_index);
Packit 6d2c1b
			continue;
Packit 6d2c1b
		}
Packit 6d2c1b
		get_base_interface_name((const char*)if_name, base_ifname, sizeof(base_ifname));
Packit 6d2c1b
Packit 6d2c1b
		// Save L2 address
Packit 6d2c1b
		m_slaves[i]->p_L2_addr = create_L2_address(if_name);
Packit 6d2c1b
		m_slaves[i]->active = false;
Packit 6d2c1b
Packit 6d2c1b
		if (m_bond == ACTIVE_BACKUP && m_if_active == m_slaves[i]->if_index) {
Packit 6d2c1b
			m_slaves[i]->active = true;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		if (m_bond == LAG_8023ad) {
Packit 6d2c1b
			if (up_and_active_slaves[i]) {
Packit 6d2c1b
				m_slaves[i]->active = true;
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		if (m_bond == NETVSC) {
Packit 6d2c1b
			m_slaves[i]->active = true;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		if (m_bond == NO_BOND) {
Packit 6d2c1b
			m_slaves[i]->active = true;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		m_slaves[i]->p_ib_ctx = g_p_ib_ctx_handler_collection->get_ib_ctx(base_ifname);
Packit 6d2c1b
		m_slaves[i]->port_num = get_port_from_ifname(base_ifname);
Packit 6d2c1b
		if (m_slaves[i]->port_num < 1) {
Packit 6d2c1b
			nd_logdbg("Error: port %d ==> ifname=%s base_ifname=%s",
Packit 6d2c1b
					m_slaves[i]->port_num, if_name, base_ifname);
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (m_slaves.empty() && NETVSC != m_bond) {
Packit 6d2c1b
		m_state = INVALID;
Packit 6d2c1b
		nd_logpanic("No slave found.");
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
const slave_data_t* net_device_val::get_slave(int if_index)
Packit 6d2c1b
{
Packit 6d2c1b
	auto_unlocker lock(m_lock);
Packit 6d2c1b
Packit 6d2c1b
	slave_data_vector_t::iterator iter;
Packit 6d2c1b
	for (iter = m_slaves.begin(); iter != m_slaves.end(); iter++) {
Packit 6d2c1b
		slave_data_t *cur_slave = *iter;
Packit 6d2c1b
		if (cur_slave->if_index == if_index) {
Packit 6d2c1b
			return cur_slave;
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
	return NULL;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::verify_bonding_mode()
Packit 6d2c1b
{
Packit 6d2c1b
	// this is a bond interface, lets get its mode.
Packit 6d2c1b
	char bond_mode_file_content[FILENAME_MAX];
Packit 6d2c1b
	char bond_failover_mac_file_content[FILENAME_MAX];
Packit 6d2c1b
	char bond_mode_param_file[FILENAME_MAX];
Packit 6d2c1b
	char bond_failover_mac_param_file[FILENAME_MAX];
Packit 6d2c1b
	char bond_xmit_hash_policy_file_content[FILENAME_MAX];
Packit 6d2c1b
	char bond_xmit_hash_policy_param_file[FILENAME_MAX];
Packit 6d2c1b
Packit 6d2c1b
	memset(bond_mode_file_content, 0, FILENAME_MAX);
Packit 6d2c1b
	sprintf(bond_mode_param_file, BONDING_MODE_PARAM_FILE, get_ifname_link());
Packit 6d2c1b
	sprintf(bond_failover_mac_param_file, BONDING_FAILOVER_MAC_PARAM_FILE, get_ifname_link());
Packit 6d2c1b
Packit 6d2c1b
	if (priv_safe_read_file(bond_mode_param_file, bond_mode_file_content, FILENAME_MAX) > 0) {
Packit 6d2c1b
		char *bond_mode = NULL;
Packit 6d2c1b
		bond_mode = strtok(bond_mode_file_content, " ");
Packit 6d2c1b
		if (bond_mode) {
Packit 6d2c1b
			if (!strcmp(bond_mode, "active-backup")) {
Packit 6d2c1b
				m_bond = ACTIVE_BACKUP;
Packit 6d2c1b
			} else if (strstr(bond_mode, "802.3ad")) {
Packit 6d2c1b
				m_bond = LAG_8023ad;
Packit 6d2c1b
			}
Packit 6d2c1b
			if (priv_safe_read_file(bond_failover_mac_param_file, bond_failover_mac_file_content, FILENAME_MAX) > 0) {
Packit 6d2c1b
				if(strstr(bond_failover_mac_file_content, "0")){
Packit 6d2c1b
					m_bond_fail_over_mac = 0;
Packit 6d2c1b
				} else if(strstr(bond_failover_mac_file_content, "1")){
Packit 6d2c1b
					m_bond_fail_over_mac = 1;
Packit 6d2c1b
				} else if(strstr(bond_failover_mac_file_content, "2")){
Packit 6d2c1b
					m_bond_fail_over_mac = 2;
Packit 6d2c1b
				}
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	memset(bond_xmit_hash_policy_file_content, 0, FILENAME_MAX);
Packit 6d2c1b
	sprintf(bond_xmit_hash_policy_param_file, BONDING_XMIT_HASH_POLICY_PARAM_FILE, get_ifname_link());
Packit 6d2c1b
	if (priv_safe_try_read_file(bond_xmit_hash_policy_param_file, bond_xmit_hash_policy_file_content, FILENAME_MAX) > 0) {
Packit 6d2c1b
		char *bond_xhp = NULL;
Packit 6d2c1b
		char *saveptr = NULL;
Packit 6d2c1b
Packit 6d2c1b
		bond_xhp = strtok_r(bond_xmit_hash_policy_file_content, " ", &saveptr);
Packit 6d2c1b
		if (NULL == bond_xhp) {
Packit 6d2c1b
			nd_logdbg("could not parse bond xmit hash policy, staying with default (L2)\n");
Packit 6d2c1b
		} else {
Packit 6d2c1b
			bond_xhp = strtok_r(NULL, " ", &saveptr);
Packit 6d2c1b
			if (bond_xhp) {
Packit 6d2c1b
				m_bond_xmit_hash_policy = (bond_xmit_hash_policy)strtol(bond_xhp, NULL , 10);
Packit 6d2c1b
				if (m_bond_xmit_hash_policy < XHP_LAYER_2 || m_bond_xmit_hash_policy > XHP_ENCAP_3_4) {
Packit 6d2c1b
					vlog_printf(VLOG_WARNING,"VMA does not support xmit hash policy = %d\n", m_bond_xmit_hash_policy);
Packit 6d2c1b
					m_bond_xmit_hash_policy = XHP_LAYER_2;
Packit 6d2c1b
				}
Packit 6d2c1b
			}
Packit 6d2c1b
			nd_logdbg("got bond xmit hash policy = %d\n", m_bond_xmit_hash_policy);
Packit 6d2c1b
		}
Packit 6d2c1b
	} else {
Packit 6d2c1b
		nd_logdbg("could not read bond xmit hash policy, staying with default (L2)\n");
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (m_bond == NO_BOND || m_bond_fail_over_mac > 1) {
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"******************************************************************************\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"VMA doesn't support current bonding configuration of %s.\n", get_ifname_link());
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"The only supported bonding mode is \"802.3ad 4(#4)\" or \"active-backup(#1)\"\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"with \"fail_over_mac=1\" or \"fail_over_mac=0\".\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"The effect of working in unsupported bonding mode is undefined.\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"Read more about Bonding in the VMA's User Manual\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"******************************************************************************\n");
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
/**
Packit 6d2c1b
 * only for active-backup bond
Packit 6d2c1b
 */
Packit 6d2c1b
bool net_device_val::update_active_backup_slaves()
Packit 6d2c1b
{
Packit 6d2c1b
	// update the active slave
Packit 6d2c1b
	// /sys/class/net/bond0/bonding/active_slave
Packit 6d2c1b
	char active_slave[IFNAMSIZ*MAX_SLAVES] = {0};
Packit 6d2c1b
	int if_active_slave = 0;
Packit 6d2c1b
Packit 6d2c1b
	if (!get_bond_active_slave_name(get_ifname_link(), active_slave, IFNAMSIZ)) {
Packit 6d2c1b
		nd_logdbg("failed to find the active slave!");
Packit 6d2c1b
		return 0;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	//nothing changed
Packit 6d2c1b
	if_active_slave = if_nametoindex(active_slave);
Packit 6d2c1b
	if (m_if_active == if_active_slave) {
Packit 6d2c1b
		return 0;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	m_p_L2_addr = create_L2_address(get_ifname());
Packit 6d2c1b
	bool found_active_slave = false;
Packit 6d2c1b
	for (size_t i = 0; i < m_slaves.size(); i++) {
Packit 6d2c1b
		if (if_active_slave == m_slaves[i]->if_index) {
Packit 6d2c1b
			m_slaves[i]->active = true;
Packit 6d2c1b
			found_active_slave = true;
Packit 6d2c1b
			nd_logdbg("Slave changed old=%d new=%d", m_if_active, if_active_slave);
Packit 6d2c1b
			m_if_active = if_active_slave;
Packit 6d2c1b
		} else {
Packit 6d2c1b
			m_slaves[i]->active = false;
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
	if (!found_active_slave) {
Packit 6d2c1b
		nd_logdbg("Failed to locate new active slave details");
Packit 6d2c1b
		return 0;
Packit 6d2c1b
	}
Packit 6d2c1b
	// restart rings
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter;
Packit 6d2c1b
	for (ring_iter = m_h_ring_map.begin(); ring_iter != m_h_ring_map.end(); ring_iter++) {
Packit 6d2c1b
		THE_RING->restart();
Packit 6d2c1b
	}
Packit 6d2c1b
	return 1;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
/*
Packit 6d2c1b
 * this function assume m_slaves[i]->if_name and m_slaves.size() are already set.
Packit 6d2c1b
 */
Packit 6d2c1b
bool net_device_val::get_up_and_active_slaves(bool* up_and_active_slaves, size_t size)
Packit 6d2c1b
{
Packit 6d2c1b
	bool up_slaves[m_slaves.size()];
Packit 6d2c1b
	int num_up = 0;
Packit 6d2c1b
	bool active_slaves[m_slaves.size()];
Packit 6d2c1b
	int num_up_and_active = 0;
Packit 6d2c1b
	size_t i = 0;
Packit 6d2c1b
Packit 6d2c1b
	if (size != m_slaves.size()) {
Packit 6d2c1b
		nd_logwarn("programmer error! array size is not correct");
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	/* get slaves operstate and active state */
Packit 6d2c1b
	for (i = 0; i < m_slaves.size(); i++) {
Packit 6d2c1b
		char oper_state[5] = {0};
Packit 6d2c1b
		char slave_state[10] = {0};
Packit 6d2c1b
		char if_name[IFNAMSIZ] = {0};
Packit 6d2c1b
Packit 6d2c1b
		if (!if_indextoname(m_slaves[i]->if_index, if_name)) {
Packit 6d2c1b
			nd_logerr("Can not find interface name by index=%d", m_slaves[i]->if_index);
Packit 6d2c1b
			continue;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		// get interface operstate
Packit 6d2c1b
		get_interface_oper_state(if_name, oper_state, sizeof(oper_state));
Packit 6d2c1b
		if (strstr(oper_state, "up")) {
Packit 6d2c1b
			num_up++;
Packit 6d2c1b
			up_slaves[i] = true;
Packit 6d2c1b
		} else {
Packit 6d2c1b
			up_slaves[i] = false;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		active_slaves[i] = true;
Packit 6d2c1b
		// get slave state
Packit 6d2c1b
		if (get_bond_slave_state(if_name, slave_state, sizeof(slave_state))){
Packit 6d2c1b
			if (!strstr(slave_state, "active"))
Packit 6d2c1b
				active_slaves[i] = false;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		if (active_slaves[i] && up_slaves[i]) {
Packit 6d2c1b
			up_and_active_slaves[i] = true;
Packit 6d2c1b
			num_up_and_active++;
Packit 6d2c1b
		} else {
Packit 6d2c1b
			up_and_active_slaves[i] = false;
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	/* make sure at least one up interface is active */
Packit 6d2c1b
	if (!num_up_and_active && num_up) {
Packit 6d2c1b
		for (i = 0; i < m_slaves.size(); i++) {
Packit 6d2c1b
			if (up_slaves[i]) {
Packit 6d2c1b
				up_and_active_slaves[i] = true;
Packit 6d2c1b
				break;
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	return true;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
bool net_device_val::update_active_slaves()
Packit 6d2c1b
{
Packit 6d2c1b
	bool changed = false;
Packit 6d2c1b
	bool up_and_active_slaves[m_slaves.size()];
Packit 6d2c1b
	size_t i = 0;
Packit 6d2c1b
Packit 6d2c1b
	memset(&up_and_active_slaves, 0, m_slaves.size() * sizeof(bool));
Packit 6d2c1b
	get_up_and_active_slaves(up_and_active_slaves, m_slaves.size());
Packit 6d2c1b
Packit 6d2c1b
	/* compare to current status and prepare for restart */
Packit 6d2c1b
	for (i = 0; i< m_slaves.size(); i++) {
Packit 6d2c1b
		if (up_and_active_slaves[i]) {
Packit 6d2c1b
			//slave came up
Packit 6d2c1b
			if (!m_slaves[i]->active) {
Packit 6d2c1b
				nd_logdbg("slave %d is up ", m_slaves[i]->if_index);
Packit 6d2c1b
				m_slaves[i]->active = true;
Packit 6d2c1b
				changed = true;
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
		else {
Packit 6d2c1b
			//slave went down
Packit 6d2c1b
			if (m_slaves[i]->active) {
Packit 6d2c1b
				nd_logdbg("slave %d is down ", m_slaves[i]->if_index);
Packit 6d2c1b
				m_slaves[i]->active = false;
Packit 6d2c1b
				changed = true;
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	/* restart if status changed */
Packit 6d2c1b
	if (changed) {
Packit 6d2c1b
		m_p_L2_addr = create_L2_address(get_ifname());
Packit 6d2c1b
		// restart rings
Packit 6d2c1b
		rings_hash_map_t::iterator ring_iter;
Packit 6d2c1b
		for (ring_iter = m_h_ring_map.begin(); ring_iter != m_h_ring_map.end(); ring_iter++) {
Packit 6d2c1b
			THE_RING->restart();
Packit 6d2c1b
		}
Packit 6d2c1b
		return 1;
Packit 6d2c1b
	}
Packit 6d2c1b
	return 0;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::update_netvsc_slaves(int if_index, int if_flags)
Packit 6d2c1b
{
Packit 6d2c1b
	slave_data_t* s = NULL;
Packit 6d2c1b
	bool found = false;
Packit 6d2c1b
	ib_ctx_handler *ib_ctx = NULL, *up_ib_ctx = NULL;
Packit 6d2c1b
	char if_name[IFNAMSIZ] = {0};
Packit 6d2c1b
Packit 6d2c1b
	m_lock.lock();
Packit 6d2c1b
Packit 6d2c1b
	if (if_indextoname(if_index, if_name) && (if_flags & IFF_UP) && (if_flags & IFF_RUNNING)) {
Packit 6d2c1b
		nd_logdbg("slave %d is up", if_index);
Packit 6d2c1b
Packit 6d2c1b
		g_p_ib_ctx_handler_collection->update_tbl(if_name);
Packit 6d2c1b
		if ((up_ib_ctx = g_p_ib_ctx_handler_collection->get_ib_ctx(if_name))) {
Packit 6d2c1b
			s = new slave_data_t(if_index);
Packit 6d2c1b
			s->active = true;
Packit 6d2c1b
			s->p_ib_ctx = up_ib_ctx;
Packit 6d2c1b
			s->p_L2_addr = create_L2_address(if_name);
Packit 6d2c1b
			s->port_num = get_port_from_ifname(if_name);
Packit 6d2c1b
			m_slaves.push_back(s);
Packit 6d2c1b
Packit 6d2c1b
			up_ib_ctx->set_ctx_time_converter_status(g_p_net_device_table_mgr->get_ctx_time_conversion_mode());
Packit 6d2c1b
			g_buffer_pool_rx->register_memory(s->p_ib_ctx);
Packit 6d2c1b
			g_buffer_pool_tx->register_memory(s->p_ib_ctx);
Packit 6d2c1b
			found = true;
Packit 6d2c1b
		}
Packit 6d2c1b
	} else {
Packit 6d2c1b
		if (!m_slaves.empty()) {
Packit 6d2c1b
			s = m_slaves.back();
Packit 6d2c1b
			m_slaves.pop_back();
Packit 6d2c1b
Packit 6d2c1b
			nd_logdbg("slave %d is down ", s->if_index);
Packit 6d2c1b
Packit 6d2c1b
			ib_ctx = s->p_ib_ctx;
Packit 6d2c1b
			delete s;
Packit 6d2c1b
			found = true;
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	m_lock.unlock();
Packit 6d2c1b
Packit 6d2c1b
	if (!found) {
Packit 6d2c1b
		nd_logdbg("Unable to detect any changes for interface %d. ignoring", if_index);
Packit 6d2c1b
		return;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	/* restart if status changed */
Packit 6d2c1b
	m_p_L2_addr = create_L2_address(get_ifname());
Packit 6d2c1b
	// restart rings
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter;
Packit 6d2c1b
	for (ring_iter = m_h_ring_map.begin(); ring_iter != m_h_ring_map.end(); ring_iter++) {
Packit 6d2c1b
		THE_RING->restart();
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (ib_ctx) {
Packit 6d2c1b
		g_p_ib_ctx_handler_collection->del_ib_ctx(ib_ctx);
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
std::string net_device_val::to_str()
Packit 6d2c1b
{
Packit 6d2c1b
	return std::string("Net Device: " + m_name);
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
ring* net_device_val::reserve_ring(resource_allocation_key *key)
Packit 6d2c1b
{
Packit 6d2c1b
	nd_logfunc("");
Packit 6d2c1b
	auto_unlocker lock(m_lock);
Packit 6d2c1b
	key = ring_key_redirection_reserve(key);
Packit 6d2c1b
	ring* the_ring = NULL;
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter = m_h_ring_map.find(key);
Packit 6d2c1b
Packit 6d2c1b
	if (m_h_ring_map.end() == ring_iter) {
Packit 6d2c1b
		nd_logdbg("Creating new RING for %s", key->to_str());
Packit 6d2c1b
		// copy key since we keep pointer and socket can die so map will lose pointer
Packit 6d2c1b
		resource_allocation_key *new_key = new resource_allocation_key(*key);
Packit 6d2c1b
		the_ring = create_ring(new_key);
Packit 6d2c1b
		if (!the_ring) {
Packit 6d2c1b
			return NULL;
Packit 6d2c1b
		}
Packit 6d2c1b
		m_h_ring_map[new_key] = std::make_pair(the_ring, 0); // each ring is born with ref_count = 0
Packit 6d2c1b
		ring_iter = m_h_ring_map.find(new_key);
Packit 6d2c1b
		epoll_event ev = {0, {0}};
Packit 6d2c1b
		int num_ring_rx_fds = the_ring->get_num_resources();
Packit 6d2c1b
		int *ring_rx_fds_array = the_ring->get_rx_channel_fds();
Packit 6d2c1b
		ev.events = EPOLLIN;
Packit 6d2c1b
		for (int i = 0; i < num_ring_rx_fds; i++) {
Packit 6d2c1b
			int cq_ch_fd = ring_rx_fds_array[i];
Packit 6d2c1b
			ev.data.fd = cq_ch_fd;
Packit 6d2c1b
			BULLSEYE_EXCLUDE_BLOCK_START
Packit 6d2c1b
			if (unlikely( orig_os_api.epoll_ctl(g_p_net_device_table_mgr->global_ring_epfd_get(),
Packit 6d2c1b
					EPOLL_CTL_ADD, cq_ch_fd, &ev))) {
Packit 6d2c1b
				nd_logerr("Failed to add RING notification fd to global_table_mgr_epfd (errno=%d %m)", errno);
Packit 6d2c1b
			}
Packit 6d2c1b
			BULLSEYE_EXCLUDE_BLOCK_END
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		g_p_net_device_table_mgr->global_ring_wakeup();
Packit 6d2c1b
	}
Packit 6d2c1b
	// now we are sure the ring is in the map
Packit 6d2c1b
Packit 6d2c1b
	ADD_RING_REF_CNT;
Packit 6d2c1b
	the_ring = GET_THE_RING(key);
Packit 6d2c1b
Packit 6d2c1b
	nd_logdbg("0x%X: if_index %d parent 0x%X ref %d key %s",
Packit 6d2c1b
			the_ring, the_ring->get_if_index(),
Packit 6d2c1b
			the_ring->get_parent(), RING_REF_CNT, key->to_str());
Packit 6d2c1b
Packit 6d2c1b
	return the_ring;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
bool net_device_val::release_ring(resource_allocation_key *key)
Packit 6d2c1b
{
Packit 6d2c1b
	nd_logfunc("");
Packit 6d2c1b
Packit 6d2c1b
	resource_allocation_key *red_key;
Packit 6d2c1b
Packit 6d2c1b
	auto_unlocker lock(m_lock);
Packit 6d2c1b
	red_key = get_ring_key_redirection(key);
Packit 6d2c1b
	ring* the_ring = NULL;
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter = m_h_ring_map.find(red_key);
Packit 6d2c1b
Packit 6d2c1b
	if (m_h_ring_map.end() != ring_iter) {
Packit 6d2c1b
		DEC_RING_REF_CNT;
Packit 6d2c1b
		the_ring = GET_THE_RING(red_key);
Packit 6d2c1b
Packit 6d2c1b
		nd_logdbg("0x%X: if_index %d parent 0x%X ref %d key %s",
Packit 6d2c1b
				the_ring, the_ring->get_if_index(),
Packit 6d2c1b
				the_ring->get_parent(), RING_REF_CNT, red_key->to_str());
Packit 6d2c1b
Packit 6d2c1b
		if ( TEST_REF_CNT_ZERO ) {
Packit 6d2c1b
			int num_ring_rx_fds = the_ring->get_num_resources();
Packit 6d2c1b
			int *ring_rx_fds_array = the_ring->get_rx_channel_fds();
Packit 6d2c1b
			nd_logdbg("Deleting RING %p for key %s and removing notification fd from global_table_mgr_epfd (epfd=%d)",
Packit 6d2c1b
					the_ring, red_key->to_str(), g_p_net_device_table_mgr->global_ring_epfd_get());
Packit 6d2c1b
			for (int i = 0; i < num_ring_rx_fds; i++) {
Packit 6d2c1b
				int cq_ch_fd = ring_rx_fds_array[i];
Packit 6d2c1b
				BULLSEYE_EXCLUDE_BLOCK_START
Packit 6d2c1b
				if (unlikely(orig_os_api.epoll_ctl(g_p_net_device_table_mgr->global_ring_epfd_get(),
Packit 6d2c1b
						EPOLL_CTL_DEL, cq_ch_fd, NULL))) {
Packit 6d2c1b
					nd_logerr("Failed to delete RING notification fd to global_table_mgr_epfd (errno=%d %m)", errno);
Packit 6d2c1b
				}
Packit 6d2c1b
				BULLSEYE_EXCLUDE_BLOCK_END
Packit 6d2c1b
			}
Packit 6d2c1b
Packit 6d2c1b
			ring_key_redirection_release(key);
Packit 6d2c1b
Packit 6d2c1b
			delete the_ring;
Packit 6d2c1b
			delete ring_iter->first;
Packit 6d2c1b
			m_h_ring_map.erase(ring_iter);
Packit 6d2c1b
		}
Packit 6d2c1b
		return true;
Packit 6d2c1b
	}
Packit 6d2c1b
	return false;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
/*
Packit 6d2c1b
 * this function maps key to new keys that it created
Packit 6d2c1b
 * the key that it creates is the size of the map
Packit 6d2c1b
 */
Packit 6d2c1b
resource_allocation_key* net_device_val::ring_key_redirection_reserve(resource_allocation_key *key)
Packit 6d2c1b
{
Packit 6d2c1b
	// if allocation logic is usr idx feature disabled
Packit 6d2c1b
	if (!safe_mce_sys().ring_limit_per_interface ||
Packit 6d2c1b
	    key->get_ring_alloc_logic() == RING_LOGIC_PER_USER_ID)
Packit 6d2c1b
		return key;
Packit 6d2c1b
Packit 6d2c1b
	if (m_h_ring_key_redirection_map.find(key) != m_h_ring_key_redirection_map.end()) {
Packit 6d2c1b
		m_h_ring_key_redirection_map[key].second++;
Packit 6d2c1b
		nd_logdbg("redirecting key=%s (ref-count:%d) to key=%s", key->to_str(),
Packit 6d2c1b
			m_h_ring_key_redirection_map[key].second,
Packit 6d2c1b
			m_h_ring_key_redirection_map[key].first->to_str());
Packit 6d2c1b
		return m_h_ring_key_redirection_map[key].first;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	int ring_map_size = (int)m_h_ring_map.size();
Packit 6d2c1b
	if (safe_mce_sys().ring_limit_per_interface > ring_map_size) {
Packit 6d2c1b
		resource_allocation_key *key2 = new resource_allocation_key(*key);
Packit 6d2c1b
		// replace key to redirection key
Packit 6d2c1b
		key2->set_user_id_key(ring_map_size);
Packit 6d2c1b
		m_h_ring_key_redirection_map[key] = std::make_pair(key2, 1);
Packit 6d2c1b
		nd_logdbg("redirecting key=%s (ref-count:1) to key=%s",
Packit 6d2c1b
			  key->to_str(), key2->to_str());
Packit 6d2c1b
		return key2;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter = m_h_ring_map.begin();
Packit 6d2c1b
	int min_ref_count = ring_iter->second.second;
Packit 6d2c1b
	resource_allocation_key *min_key = ring_iter->first;
Packit 6d2c1b
	while (ring_iter != m_h_ring_map.end()) {
Packit 6d2c1b
		// redirect only to ring with the same profile
Packit 6d2c1b
		if (ring_iter->first->get_ring_profile_key() ==
Packit 6d2c1b
		    key->get_ring_profile_key() &&
Packit 6d2c1b
		    ring_iter->second.second < min_ref_count) {
Packit 6d2c1b
			min_ref_count = ring_iter->second.second;
Packit 6d2c1b
			min_key = ring_iter->first;
Packit 6d2c1b
		}
Packit 6d2c1b
		ring_iter++;
Packit 6d2c1b
	}
Packit 6d2c1b
	m_h_ring_key_redirection_map[key] = std::make_pair(new resource_allocation_key(*min_key), 1);
Packit 6d2c1b
	nd_logdbg("redirecting key=%s (ref-count:1) to key=%s",
Packit 6d2c1b
		  key->to_str(), min_key->to_str());
Packit 6d2c1b
	return min_key;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
resource_allocation_key* net_device_val::get_ring_key_redirection(resource_allocation_key *key)
Packit 6d2c1b
{
Packit 6d2c1b
	if (!safe_mce_sys().ring_limit_per_interface) return key;
Packit 6d2c1b
Packit 6d2c1b
	if (m_h_ring_key_redirection_map.find(key) == m_h_ring_key_redirection_map.end()) {
Packit 6d2c1b
		nd_logdbg("key = %s is not found in the redirection map",
Packit 6d2c1b
			  key->to_str());
Packit 6d2c1b
		return key;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	return m_h_ring_key_redirection_map[key].first;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::ring_key_redirection_release(resource_allocation_key *key)
Packit 6d2c1b
{
Packit 6d2c1b
	if (safe_mce_sys().ring_limit_per_interface && m_h_ring_key_redirection_map.find(key) != m_h_ring_key_redirection_map.end()
Packit 6d2c1b
		&& --m_h_ring_key_redirection_map[key].second == 0) {
Packit 6d2c1b
		// this is allocated in ring_key_redirection_reserve
Packit 6d2c1b
		nd_logdbg("release redirecting key=%s (ref-count:%d) to key=%s", key->to_str(),
Packit 6d2c1b
			m_h_ring_key_redirection_map[key].second,
Packit 6d2c1b
			m_h_ring_key_redirection_map[key].first->to_str());
Packit 6d2c1b
		delete m_h_ring_key_redirection_map[key].first;
Packit 6d2c1b
		m_h_ring_key_redirection_map.erase(key);
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
int net_device_val::global_ring_poll_and_process_element(uint64_t *p_poll_sn, void* pv_fd_ready_array /*=NULL*/)
Packit 6d2c1b
{
Packit 6d2c1b
	nd_logfuncall("");
Packit 6d2c1b
	int ret_total = 0;
Packit 6d2c1b
	auto_unlocker lock(m_lock);
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter;
Packit 6d2c1b
	for (ring_iter = m_h_ring_map.begin(); ring_iter != m_h_ring_map.end(); ring_iter++) {
Packit 6d2c1b
		int ret = THE_RING->poll_and_process_element_rx(p_poll_sn, pv_fd_ready_array);
Packit 6d2c1b
		BULLSEYE_EXCLUDE_BLOCK_START
Packit 6d2c1b
		if (ret < 0 && errno != EAGAIN) {
Packit 6d2c1b
			nd_logerr("Error in ring->poll_and_process_element() of %p (errno=%d %m)", THE_RING, errno);
Packit 6d2c1b
			return ret;
Packit 6d2c1b
		}
Packit 6d2c1b
		BULLSEYE_EXCLUDE_BLOCK_END
Packit 6d2c1b
		if (ret > 0)
Packit 6d2c1b
			nd_logfunc("ring[%p] Returned with: %d (sn=%d)", THE_RING, ret, *p_poll_sn);
Packit 6d2c1b
		ret_total += ret;
Packit 6d2c1b
	}
Packit 6d2c1b
	return ret_total;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
int net_device_val::global_ring_request_notification(uint64_t poll_sn)
Packit 6d2c1b
{
Packit 6d2c1b
	int ret_total = 0;
Packit 6d2c1b
	auto_unlocker lock(m_lock);
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter;
Packit 6d2c1b
	for (ring_iter = m_h_ring_map.begin(); ring_iter != m_h_ring_map.end(); ring_iter++) {
Packit 6d2c1b
		int ret = THE_RING->request_notification(CQT_RX, poll_sn);
Packit 6d2c1b
		if (ret < 0) {
Packit 6d2c1b
			nd_logerr("Error ring[%p]->request_notification() (errno=%d %m)", THE_RING, errno);
Packit 6d2c1b
			return ret;
Packit 6d2c1b
		}
Packit 6d2c1b
		nd_logfunc("ring[%p] Returned with: %d (sn=%d)", THE_RING, ret, poll_sn);
Packit 6d2c1b
		ret_total += ret;
Packit 6d2c1b
	}
Packit 6d2c1b
	return ret_total;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
int net_device_val::ring_drain_and_proccess()
Packit 6d2c1b
{
Packit 6d2c1b
	nd_logfuncall();
Packit 6d2c1b
	int ret_total = 0;
Packit 6d2c1b
Packit 6d2c1b
	auto_unlocker lock(m_lock);
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter;
Packit 6d2c1b
	for (ring_iter = m_h_ring_map.begin(); ring_iter != m_h_ring_map.end(); ring_iter++) {
Packit 6d2c1b
		int ret = THE_RING->drain_and_proccess();
Packit 6d2c1b
		if (ret < 0)
Packit 6d2c1b
			return ret;
Packit 6d2c1b
		if (ret > 0)
Packit 6d2c1b
			nd_logfunc("cq[%p] Returned with: %d", THE_RING, ret);
Packit 6d2c1b
		ret_total += ret;
Packit 6d2c1b
	}
Packit 6d2c1b
	return ret_total;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::ring_adapt_cq_moderation()
Packit 6d2c1b
{
Packit 6d2c1b
	nd_logfuncall();
Packit 6d2c1b
Packit 6d2c1b
	auto_unlocker lock(m_lock);
Packit 6d2c1b
	rings_hash_map_t::iterator ring_iter;
Packit 6d2c1b
	for (ring_iter = m_h_ring_map.begin(); ring_iter != m_h_ring_map.end(); ring_iter++) {
Packit 6d2c1b
		THE_RING->adapt_cq_moderation();
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::register_to_ibverbs_events(event_handler_ibverbs *handler) {
Packit 6d2c1b
	for (size_t i = 0; i < m_slaves.size(); i++) {
Packit 6d2c1b
		bool found = false;
Packit 6d2c1b
		for (size_t j = 0; j < i; j++) {
Packit 6d2c1b
			if (m_slaves[i]->p_ib_ctx == m_slaves[j]->p_ib_ctx) {
Packit 6d2c1b
				found = true; //two slaves might be on two ports of the same device, register only once
Packit 6d2c1b
				break;
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
		if (found)
Packit 6d2c1b
			continue;
Packit 6d2c1b
		nd_logfunc("registering slave to ibverbs events slave=%p", m_slaves[i]);
Packit 6d2c1b
		g_p_event_handler_manager->register_ibverbs_event(m_slaves[i]->p_ib_ctx->get_ibv_context()->async_fd, handler, m_slaves[i]->p_ib_ctx->get_ibv_context(), 0);
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val::unregister_to_ibverbs_events(event_handler_ibverbs *handler) {
Packit 6d2c1b
	for (size_t i = 0; i < m_slaves.size(); i++) {
Packit 6d2c1b
		bool found = false;
Packit 6d2c1b
		for (size_t j = 0; j < i; j++) {
Packit 6d2c1b
			if (m_slaves[i]->p_ib_ctx == m_slaves[j]->p_ib_ctx) {
Packit 6d2c1b
				found = true; //two slaves might be on two ports of the same device, unregister only once
Packit 6d2c1b
				break;
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
		if (found)
Packit 6d2c1b
			continue;
Packit 6d2c1b
		nd_logfunc("unregistering slave to ibverbs events slave=%p", m_slaves[i]);
Packit 6d2c1b
		g_p_event_handler_manager->unregister_ibverbs_event(m_slaves[i]->p_ib_ctx->get_ibv_context()->async_fd, handler);
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val_eth::configure()
Packit 6d2c1b
{
Packit 6d2c1b
	m_p_L2_addr = create_L2_address(get_ifname());
Packit 6d2c1b
Packit 6d2c1b
	BULLSEYE_EXCLUDE_BLOCK_START
Packit 6d2c1b
	if (m_p_L2_addr == NULL) {
Packit 6d2c1b
		nd_logpanic("m_p_L2_addr allocation error");
Packit 6d2c1b
	}
Packit 6d2c1b
	BULLSEYE_EXCLUDE_BLOCK_END
Packit 6d2c1b
Packit 6d2c1b
	create_br_address(get_ifname());
Packit 6d2c1b
Packit 6d2c1b
	m_vlan = get_vlan_id_from_ifname(get_ifname());
Packit 6d2c1b
	if (m_vlan) {
Packit 6d2c1b
		parse_prio_egress_map();
Packit 6d2c1b
	}
Packit 6d2c1b
	if (m_vlan && m_bond != NO_BOND && m_bond_fail_over_mac == 1) {
Packit 6d2c1b
		vlog_printf(VLOG_WARNING, " ******************************************************************\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING, "%s: vlan over bond while fail_over_mac=1 is not offloaded\n", get_ifname());
Packit 6d2c1b
		vlog_printf(VLOG_WARNING, " ******************************************************************\n");
Packit 6d2c1b
		m_state = INVALID;
Packit 6d2c1b
	}
Packit 6d2c1b
	if(!m_vlan && (get_flags() & IFF_MASTER)) {
Packit 6d2c1b
		char if_name[IFNAMSIZ] = {0};
Packit 6d2c1b
Packit 6d2c1b
		if (!if_indextoname(m_slaves[0]->if_index, if_name)) {
Packit 6d2c1b
			nd_logerr("Can not find interface name by index=%d", m_slaves[0]->if_index);
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		//in case vlan is configured on slave
Packit 6d2c1b
		m_vlan = get_vlan_id_from_ifname(if_name);
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
int net_device_val::get_priority_by_tc_class(uint32_t tc_class)
Packit 6d2c1b
{
Packit 6d2c1b
	tc_class_priority_map::iterator it = m_class_prio_map.find(tc_class);
Packit 6d2c1b
	if (it == m_class_prio_map.end()) {
Packit 6d2c1b
		return VMA_DEFAULT_ENGRESS_MAP_PRIO;
Packit 6d2c1b
	}
Packit 6d2c1b
	return it->second;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val_eth::parse_prio_egress_map()
Packit 6d2c1b
{
Packit 6d2c1b
#ifdef HAVE_LIBNL3
Packit 6d2c1b
	int len, ret;
Packit 6d2c1b
	nl_cache *cache = NULL;
Packit 6d2c1b
	rtnl_link *link;
Packit 6d2c1b
	vlan_map *map;
Packit 6d2c1b
Packit 6d2c1b
	nl_socket_handle *nl_socket = nl_socket_handle_alloc();
Packit 6d2c1b
	if (!nl_socket) {
Packit 6d2c1b
		nd_logdbg("unable to allocate socket socket %m", errno);
Packit 6d2c1b
		goto out;
Packit 6d2c1b
	}
Packit 6d2c1b
	nl_socket_set_local_port(nl_socket, 0);
Packit 6d2c1b
	ret = nl_connect(nl_socket, NETLINK_ROUTE);
Packit 6d2c1b
	if (ret < 0) {
Packit 6d2c1b
		nd_logdbg("unable to connect to libnl socket %d %m", ret, errno);
Packit 6d2c1b
		goto out;
Packit 6d2c1b
	}
Packit 6d2c1b
	ret = rtnl_link_alloc_cache(nl_socket, AF_UNSPEC, &cache);
Packit 6d2c1b
	if (!cache) {
Packit 6d2c1b
		nd_logdbg("unable to create libnl cache %d %m", ret, errno);
Packit 6d2c1b
		goto out;
Packit 6d2c1b
	}
Packit 6d2c1b
	link = rtnl_link_get_by_name(cache, get_ifname());
Packit 6d2c1b
	if (!link) {
Packit 6d2c1b
		nd_logdbg("unable to get libnl link %d %m", ret, errno);
Packit 6d2c1b
		goto out;
Packit 6d2c1b
	}
Packit 6d2c1b
	map = rtnl_link_vlan_get_egress_map(link, &len;;
Packit 6d2c1b
	if (!map || !len) {
Packit 6d2c1b
		nd_logdbg("no egress map found %d %p",len, map);
Packit 6d2c1b
		goto out;
Packit 6d2c1b
	}
Packit 6d2c1b
	for (int i = 0; i < len; i++) {
Packit 6d2c1b
		m_class_prio_map[map[i].vm_from] = map[i].vm_to;
Packit 6d2c1b
	}
Packit 6d2c1b
out:
Packit 6d2c1b
	if (cache) {
Packit 6d2c1b
		nl_cache_free(cache);
Packit 6d2c1b
	}
Packit 6d2c1b
	if (nl_socket) {
Packit 6d2c1b
		nl_socket_handle_free(nl_socket);
Packit 6d2c1b
	}
Packit 6d2c1b
#else
Packit 6d2c1b
	nd_logdbg("libnl3 not found, cannot read engress map, "
Packit 6d2c1b
		  "SO_PRIORITY will not work properly");
Packit 6d2c1b
#endif
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
ring* net_device_val_eth::create_ring(resource_allocation_key *key)
Packit 6d2c1b
{
Packit 6d2c1b
	ring* ring = NULL;
Packit 6d2c1b
Packit 6d2c1b
	// if this is a ring profile key get the profile from the global map
Packit 6d2c1b
	if (key->get_ring_profile_key()) {
Packit 6d2c1b
		if (!g_p_ring_profile) {
Packit 6d2c1b
			nd_logdbg("could not find ring profile");
Packit 6d2c1b
			return NULL;
Packit 6d2c1b
		}
Packit 6d2c1b
		ring_profile *prof =
Packit 6d2c1b
			g_p_ring_profile->get_profile(key->get_ring_profile_key());
Packit 6d2c1b
		if (prof == NULL) {
Packit 6d2c1b
			nd_logerr("could not find ring profile %d",
Packit 6d2c1b
				  key->get_ring_profile_key());
Packit 6d2c1b
			return NULL;
Packit 6d2c1b
		}
Packit 6d2c1b
		try {
Packit 6d2c1b
			switch (prof->get_ring_type()) {
Packit 6d2c1b
#ifdef HAVE_MP_RQ
Packit 6d2c1b
			case VMA_RING_CYCLIC_BUFFER:
Packit 6d2c1b
				ring = new ring_eth_cb(get_if_idx(),
Packit 6d2c1b
						       &prof->get_desc()->ring_cyclicb,
Packit 6d2c1b
						       key->get_memory_descriptor());
Packit 6d2c1b
			break;
Packit 6d2c1b
#endif
Packit 6d2c1b
			case VMA_RING_EXTERNAL_MEM:
Packit 6d2c1b
				ring = new ring_eth_direct(get_if_idx(),
Packit 6d2c1b
							   &prof->get_desc()->ring_ext);
Packit 6d2c1b
			break;
Packit 6d2c1b
			default:
Packit 6d2c1b
				nd_logdbg("Unknown ring type");
Packit 6d2c1b
				break;
Packit 6d2c1b
			}
Packit 6d2c1b
		} catch (vma_error &error) {
Packit 6d2c1b
			nd_logdbg("failed creating ring %s", error.message);
Packit 6d2c1b
		}
Packit 6d2c1b
	} else {
Packit 6d2c1b
		try {
Packit 6d2c1b
			switch (m_bond) {
Packit 6d2c1b
			case NO_BOND:
Packit 6d2c1b
				ring = new ring_eth(get_if_idx());
Packit 6d2c1b
				break;
Packit 6d2c1b
			case ACTIVE_BACKUP:
Packit 6d2c1b
			case LAG_8023ad:
Packit 6d2c1b
				ring = new ring_bond_eth(get_if_idx());
Packit 6d2c1b
				break;
Packit 6d2c1b
			case NETVSC:
Packit 6d2c1b
				ring = new ring_bond_netvsc(get_if_idx());
Packit 6d2c1b
				break;
Packit 6d2c1b
			default:
Packit 6d2c1b
				nd_logdbg("Unknown ring type");
Packit 6d2c1b
				break;
Packit 6d2c1b
			}
Packit 6d2c1b
		} catch (vma_error &error) {
Packit 6d2c1b
			nd_logdbg("failed creating ring %s", error.message);
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
	return ring;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
L2_address* net_device_val_eth::create_L2_address(const char* ifname)
Packit 6d2c1b
{
Packit 6d2c1b
	if (m_p_L2_addr) {
Packit 6d2c1b
		delete m_p_L2_addr;
Packit 6d2c1b
		m_p_L2_addr = NULL;
Packit 6d2c1b
	}
Packit 6d2c1b
	unsigned char hw_addr[ETH_ALEN];
Packit 6d2c1b
	get_local_ll_addr(ifname, hw_addr, ETH_ALEN, false);
Packit 6d2c1b
	return new ETH_addr(hw_addr);
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val_eth::create_br_address(const char* ifname)
Packit 6d2c1b
{
Packit 6d2c1b
	if(m_p_br_addr) {
Packit 6d2c1b
		delete m_p_br_addr;
Packit 6d2c1b
		m_p_br_addr = NULL;
Packit 6d2c1b
	}
Packit 6d2c1b
	uint8_t hw_addr[ETH_ALEN];
Packit 6d2c1b
	get_local_ll_addr(ifname, hw_addr, ETH_ALEN, true);
Packit 6d2c1b
	m_p_br_addr = new ETH_addr(hw_addr);
Packit 6d2c1b
Packit 6d2c1b
	BULLSEYE_EXCLUDE_BLOCK_START
Packit 6d2c1b
	if(m_p_br_addr == NULL) {
Packit 6d2c1b
		nd_logpanic("m_p_br_addr allocation error");
Packit 6d2c1b
	}
Packit 6d2c1b
	BULLSEYE_EXCLUDE_BLOCK_END
Packit 6d2c1b
}
Packit 6d2c1b
std::string net_device_val_eth::to_str()
Packit 6d2c1b
{
Packit 6d2c1b
	return std::string("ETH: " + net_device_val::to_str());
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
net_device_val_ib::~net_device_val_ib()
Packit 6d2c1b
{
Packit 6d2c1b
	struct in_addr in;
Packit 6d2c1b
	if (1 == inet_pton(AF_INET, BROADCAST_IP, &in)) {
Packit 6d2c1b
		g_p_neigh_table_mgr->unregister_observer(neigh_key(ip_address(in.s_addr), this), this);
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val_ib::configure()
Packit 6d2c1b
{
Packit 6d2c1b
	ib_ctx_handler* p_ib_ctx = NULL;
Packit 6d2c1b
	struct in_addr in;
Packit 6d2c1b
Packit 6d2c1b
	m_p_L2_addr = create_L2_address(get_ifname());
Packit 6d2c1b
Packit 6d2c1b
	BULLSEYE_EXCLUDE_BLOCK_START
Packit 6d2c1b
	if(m_p_L2_addr == NULL) {
Packit 6d2c1b
		nd_logpanic("m_p_L2_addr allocation error");
Packit 6d2c1b
	}
Packit 6d2c1b
	BULLSEYE_EXCLUDE_BLOCK_END
Packit 6d2c1b
Packit 6d2c1b
	create_br_address(get_ifname());
Packit 6d2c1b
Packit 6d2c1b
	if (1 == inet_pton(AF_INET, BROADCAST_IP, &in)) {
Packit 6d2c1b
		g_p_neigh_table_mgr->unregister_observer(neigh_key(ip_address(in.s_addr), this), this);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	//Register to IB BR neigh
Packit 6d2c1b
	cache_entry_subject<neigh_key, neigh_val*>* p_ces = NULL;
Packit 6d2c1b
	if (1 == inet_pton(AF_INET, BROADCAST_IP, &in)) {
Packit 6d2c1b
		g_p_neigh_table_mgr->register_observer(neigh_key(ip_address(in.s_addr), this), this, &p_ces);
Packit 6d2c1b
	}
Packit 6d2c1b
	m_br_neigh = dynamic_cast<neigh_ib_broadcast*>(p_ces);
Packit 6d2c1b
Packit 6d2c1b
	p_ib_ctx = g_p_ib_ctx_handler_collection->get_ib_ctx(get_ifname_link());
Packit 6d2c1b
	if (!p_ib_ctx || ibv_query_pkey(p_ib_ctx->get_ibv_context(), get_port_from_ifname(get_ifname_link()), 0, &m_pkey)) {
Packit 6d2c1b
		nd_logerr("failed querying pkey");
Packit 6d2c1b
	}
Packit 6d2c1b
	nd_logdbg("pkey: %d", m_pkey);
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
ring* net_device_val_ib::create_ring(resource_allocation_key *key)
Packit 6d2c1b
{
Packit 6d2c1b
	ring* ring = NULL;
Packit 6d2c1b
Packit 6d2c1b
	NOT_IN_USE(key);
Packit 6d2c1b
	try {
Packit 6d2c1b
		switch (m_bond) {
Packit 6d2c1b
		case NO_BOND:
Packit 6d2c1b
			ring = new ring_ib(get_if_idx());
Packit 6d2c1b
			break;
Packit 6d2c1b
		case ACTIVE_BACKUP:
Packit 6d2c1b
		case LAG_8023ad:
Packit 6d2c1b
			ring = new ring_bond_ib(get_if_idx());
Packit 6d2c1b
			break;
Packit 6d2c1b
		default:
Packit 6d2c1b
			nd_logdbg("Unknown ring type");
Packit 6d2c1b
			break;
Packit 6d2c1b
		}
Packit 6d2c1b
	} catch (vma_error &error) {
Packit 6d2c1b
		nd_logdbg("failed creating ring %s", error.message);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	return ring;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
L2_address* net_device_val_ib::create_L2_address(const char* ifname)
Packit 6d2c1b
{
Packit 6d2c1b
	if (m_p_L2_addr) {
Packit 6d2c1b
		delete m_p_L2_addr;
Packit 6d2c1b
		m_p_L2_addr = NULL;
Packit 6d2c1b
	}
Packit 6d2c1b
	unsigned char hw_addr[IPOIB_HW_ADDR_LEN];
Packit 6d2c1b
	get_local_ll_addr(ifname, hw_addr, IPOIB_HW_ADDR_LEN, false);
Packit 6d2c1b
	return new IPoIB_addr(hw_addr);
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void net_device_val_ib::create_br_address(const char* ifname)
Packit 6d2c1b
{
Packit 6d2c1b
	if (m_p_br_addr) {
Packit 6d2c1b
		delete m_p_br_addr;
Packit 6d2c1b
		m_p_br_addr = NULL;
Packit 6d2c1b
	}
Packit 6d2c1b
	unsigned char hw_addr[IPOIB_HW_ADDR_LEN];
Packit 6d2c1b
	get_local_ll_addr(ifname, hw_addr, IPOIB_HW_ADDR_LEN, true);
Packit 6d2c1b
	m_p_br_addr = new IPoIB_addr(hw_addr);
Packit 6d2c1b
	BULLSEYE_EXCLUDE_BLOCK_START
Packit 6d2c1b
	if (m_p_br_addr == NULL) {
Packit 6d2c1b
		nd_logpanic("m_p_br_addr allocation error");
Packit 6d2c1b
	}
Packit 6d2c1b
	BULLSEYE_EXCLUDE_BLOCK_END
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
std::string net_device_val_ib::to_str()
Packit 6d2c1b
{
Packit 6d2c1b
	return std::string("IB: " + net_device_val::to_str());
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
Packit 6d2c1b
bool net_device_val::verify_bond_ipoib_or_eth_qp_creation()
Packit 6d2c1b
{
Packit 6d2c1b
	char slaves[IFNAMSIZ * MAX_SLAVES] = {0};
Packit 6d2c1b
	if (!get_bond_slaves_name_list(get_ifname_link(), slaves, sizeof slaves)) {
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* Interface %s will not be offloaded, slave list or bond name could not be found\n", get_ifname());
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
	//go over all slaves and check preconditions
Packit 6d2c1b
	bool bond_ok = true;
Packit 6d2c1b
	char* slave_name;
Packit 6d2c1b
	char* save_ptr;
Packit 6d2c1b
	slave_name = strtok_r(slaves, " ", &save_ptr);
Packit 6d2c1b
	while (slave_name != NULL)
Packit 6d2c1b
	{
Packit 6d2c1b
		char* p = strchr(slave_name, '\n');
Packit 6d2c1b
		if (p) *p = '\0'; // Remove the tailing 'new line" char
Packit 6d2c1b
		if (!verify_ipoib_or_eth_qp_creation(slave_name)) {
Packit 6d2c1b
			//check all slaves but print only once for bond
Packit 6d2c1b
			bond_ok =  false;
Packit 6d2c1b
		}
Packit 6d2c1b
		slave_name = strtok_r(NULL, " ", &save_ptr);
Packit 6d2c1b
	}
Packit 6d2c1b
	if (!bond_ok) {
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* Bond %s will not be offloaded due to problem with its slaves.\n", get_ifname());
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* Check warning messages for more information.\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
	} else {
Packit 6d2c1b
		/*
Packit 6d2c1b
		 * Print warning message while bond device contains two slaves of the same HCA
Packit 6d2c1b
		 * while RoCE LAG is enabled for both slaves.
Packit 6d2c1b
		 */
Packit 6d2c1b
		sys_image_guid_map_t::iterator guid_iter;
Packit 6d2c1b
		for (guid_iter = m_sys_image_guid_map.begin(); guid_iter != m_sys_image_guid_map.end(); guid_iter++) {
Packit 6d2c1b
			char bond_roce_lag_path[256] = {0};
Packit 6d2c1b
			if (guid_iter->second.size() > 1 &&
Packit 6d2c1b
					check_bond_roce_lag_exist(bond_roce_lag_path, sizeof(bond_roce_lag_path), guid_iter->second.front().c_str()) &&
Packit 6d2c1b
					check_bond_roce_lag_exist(bond_roce_lag_path, sizeof(bond_roce_lag_path), guid_iter->second.back().c_str())) {
Packit 6d2c1b
				print_roce_lag_warnings(get_ifname_link(), bond_roce_lag_path, guid_iter->second.front().c_str(), guid_iter->second.back().c_str());
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
	return bond_ok;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
//interface name can be slave while ifa struct can describe bond
Packit 6d2c1b
bool net_device_val::verify_ipoib_or_eth_qp_creation(const char* interface_name)
Packit 6d2c1b
{
Packit 6d2c1b
	if (m_type == ARPHRD_INFINIBAND) {
Packit 6d2c1b
		if (verify_enable_ipoib(interface_name) && verify_qp_creation(interface_name, IBV_QPT_UD)) {
Packit 6d2c1b
			return true;
Packit 6d2c1b
		}
Packit 6d2c1b
	} else {
Packit 6d2c1b
		if (verify_qp_creation(interface_name, IBV_QPT_RAW_PACKET)) {
Packit 6d2c1b
			return true;
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
	return false;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
bool net_device_val::verify_enable_ipoib(const char* interface_name)
Packit 6d2c1b
{
Packit 6d2c1b
	char filename[256] = "\0";
Packit 6d2c1b
	char ifname[IFNAMSIZ] = "\0";
Packit 6d2c1b
	NOT_IN_USE(interface_name); // Suppress --enable-opt-log=high warning
Packit 6d2c1b
Packit 6d2c1b
	if(!safe_mce_sys().enable_ipoib) {
Packit 6d2c1b
		nd_logdbg("Blocking offload: IPoIB interfaces ('%s')", interface_name);
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
#ifndef DEFINED_IBV_QP_INIT_SOURCE_QPN
Packit 6d2c1b
	// Note: mlx4 does not support this capability
Packit 6d2c1b
	ib_ctx_handler* ib_ctx = g_p_ib_ctx_handler_collection->get_ib_ctx(get_ifname_link());
Packit 6d2c1b
	if (!ib_ctx->is_mlx4()) {
Packit 6d2c1b
		nd_logwarn("Blocking offload: SOURCE_QPN is not supported for this driver ('%s')", interface_name);
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
#endif
Packit 6d2c1b
Packit 6d2c1b
	// Verify IPoIB is in 'datagram mode' for proper VMA with flow steering operation
Packit 6d2c1b
	if (validate_ipoib_prop(get_ifname(), m_flags, IPOIB_MODE_PARAM_FILE, "datagram", 8, filename, ifname)) {
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* IPoIB mode of interface '%s' is \"connected\" !\n", get_ifname());
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* Please change it to datagram: \"echo datagram > %s\" before loading your application with VMA library\n", filename);
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* VMA doesn't support IPoIB in connected mode.\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* Please refer to VMA Release Notes for more information\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
	else {
Packit 6d2c1b
		nd_logdbg("verified interface '%s' is running in datagram mode", get_ifname());
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	// Verify umcast is disabled for IB flow
Packit 6d2c1b
	if (validate_ipoib_prop(get_ifname(), m_flags, UMCAST_PARAM_FILE, "0", 1, filename, ifname)) { // Extract UMCAST flag (only for IB transport types)
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* UMCAST flag is Enabled for interface %s !\n", get_ifname());
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* Please disable it: \"echo 0 > %s\" before loading your application with VMA library\n", filename);
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* This option in no longer needed in this version\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"* Please refer to Release Notes for more information\n");
Packit 6d2c1b
		vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
	else {
Packit 6d2c1b
		nd_logdbg("verified interface '%s' is running with umcast disabled", get_ifname());
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	return true;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
//ifname should point to a physical device
Packit 6d2c1b
bool net_device_val::verify_qp_creation(const char* ifname, enum ibv_qp_type qp_type)
Packit 6d2c1b
{
Packit 6d2c1b
	bool success = false;
Packit 6d2c1b
	char bond_roce_lag_path[256] = {0};
Packit 6d2c1b
	struct ibv_cq* cq = NULL;
Packit 6d2c1b
	struct ibv_comp_channel *channel = NULL;
Packit 6d2c1b
	struct ibv_qp* qp = NULL;
Packit 6d2c1b
Packit 6d2c1b
	vma_ibv_qp_init_attr qp_init_attr;
Packit 6d2c1b
	memset(&qp_init_attr, 0, sizeof(qp_init_attr));
Packit 6d2c1b
Packit 6d2c1b
	vma_ibv_cq_init_attr attr;
Packit 6d2c1b
	memset(&attr, 0, sizeof(attr));
Packit 6d2c1b
Packit 6d2c1b
	qp_init_attr.cap.max_send_wr = MCE_DEFAULT_TX_NUM_WRE;
Packit 6d2c1b
	qp_init_attr.cap.max_recv_wr = MCE_DEFAULT_RX_NUM_WRE;
Packit 6d2c1b
	qp_init_attr.cap.max_inline_data = MCE_DEFAULT_TX_MAX_INLINE;
Packit 6d2c1b
	qp_init_attr.cap.max_send_sge = MCE_DEFAULT_TX_NUM_SGE;
Packit 6d2c1b
	qp_init_attr.cap.max_recv_sge = MCE_DEFAULT_RX_NUM_SGE;
Packit 6d2c1b
	qp_init_attr.sq_sig_all = 0;
Packit 6d2c1b
	qp_init_attr.qp_type = qp_type;
Packit 6d2c1b
Packit 6d2c1b
	//find ib_cxt
Packit 6d2c1b
	char base_ifname[IFNAMSIZ];
Packit 6d2c1b
	get_base_interface_name((const char*)(ifname), base_ifname, sizeof(base_ifname));
Packit 6d2c1b
	int port_num = get_port_from_ifname(base_ifname);
Packit 6d2c1b
	ib_ctx_handler* p_ib_ctx = g_p_ib_ctx_handler_collection->get_ib_ctx(base_ifname);
Packit 6d2c1b
Packit 6d2c1b
	if (!p_ib_ctx) {
Packit 6d2c1b
		nd_logdbg("Cant find ib_ctx for interface %s", base_ifname);
Packit 6d2c1b
		if (qp_type == IBV_QPT_RAW_PACKET && m_bond != NO_BOND) {
Packit 6d2c1b
			if (check_bond_roce_lag_exist(bond_roce_lag_path, sizeof(bond_roce_lag_path), ifname)) {
Packit 6d2c1b
				print_roce_lag_warnings(get_ifname_link(), bond_roce_lag_path);
Packit 6d2c1b
			} else if ((p_ib_ctx = g_p_ib_ctx_handler_collection->get_ib_ctx(get_ifname_link()))
Packit 6d2c1b
					&& strstr(p_ib_ctx->get_ibname(), "bond")) {
Packit 6d2c1b
				print_roce_lag_warnings(get_ifname_link());
Packit 6d2c1b
			}
Packit 6d2c1b
		}
Packit 6d2c1b
		goto release_resources;
Packit 6d2c1b
	} else if (port_num > p_ib_ctx->get_ibv_device_attr()->phys_port_cnt) {
Packit 6d2c1b
		nd_logdbg("Invalid port for interface %s", base_ifname);
Packit 6d2c1b
		if (qp_type == IBV_QPT_RAW_PACKET && m_bond != NO_BOND && p_ib_ctx->is_mlx4()) {
Packit 6d2c1b
			print_roce_lag_warnings(get_ifname_link());
Packit 6d2c1b
		}
Packit 6d2c1b
		goto release_resources;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	// Add to guid map in order to detect roce lag issue
Packit 6d2c1b
	if (qp_type == IBV_QPT_RAW_PACKET && m_bond != NO_BOND) {
Packit 6d2c1b
		m_sys_image_guid_map[p_ib_ctx->get_ibv_device_attr()->sys_image_guid].push_back(base_ifname);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	//create qp resources
Packit 6d2c1b
	channel = ibv_create_comp_channel(p_ib_ctx->get_ibv_context());
Packit 6d2c1b
	if (!channel) {
Packit 6d2c1b
		nd_logdbg("channel creation failed for interface %s (errno=%d %m)", ifname, errno);
Packit 6d2c1b
		goto release_resources;
Packit 6d2c1b
	}
Packit 6d2c1b
	VALGRIND_MAKE_MEM_DEFINED(channel, sizeof(ibv_comp_channel));
Packit 6d2c1b
	cq = vma_ibv_create_cq(p_ib_ctx->get_ibv_context(), safe_mce_sys().tx_num_wr, (void*)this, channel, 0, &attr);
Packit 6d2c1b
	if (!cq) {
Packit 6d2c1b
		nd_logdbg("cq creation failed for interface %s (errno=%d %m)", ifname, errno);
Packit 6d2c1b
		goto release_resources;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	vma_ibv_qp_init_attr_comp_mask(p_ib_ctx->get_ibv_pd(), qp_init_attr);
Packit 6d2c1b
	qp_init_attr.recv_cq = cq;
Packit 6d2c1b
	qp_init_attr.send_cq = cq;
Packit 6d2c1b
Packit 6d2c1b
	// Set source qpn for non mlx4 IPoIB devices
Packit 6d2c1b
	if (qp_type == IBV_QPT_UD && !p_ib_ctx->is_mlx4()) {
Packit 6d2c1b
		unsigned char hw_addr[IPOIB_HW_ADDR_LEN];
Packit 6d2c1b
		get_local_ll_addr(ifname, hw_addr, IPOIB_HW_ADDR_LEN, false);
Packit 6d2c1b
		IPoIB_addr ipoib_addr(hw_addr);
Packit 6d2c1b
		ibv_source_qpn_set(qp_init_attr, ipoib_addr.get_qpn());
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	qp = vma_ibv_create_qp(p_ib_ctx->get_ibv_pd(), &qp_init_attr);
Packit 6d2c1b
	if (qp) {
Packit 6d2c1b
		if (qp_type == IBV_QPT_UD && priv_ibv_create_flow_supported(qp, port_num) == -1) {
Packit 6d2c1b
			nd_logdbg("Create_ibv_flow failed on interface %s (errno=%d %m), Traffic will not be offloaded", ifname, errno);
Packit 6d2c1b
			goto qp_failure;
Packit 6d2c1b
		} else {
Packit 6d2c1b
			success = true;
Packit 6d2c1b
Packit 6d2c1b
			if (qp_type == IBV_QPT_RAW_PACKET && !priv_ibv_query_flow_tag_supported(qp, port_num)) {
Packit 6d2c1b
				p_ib_ctx->set_flow_tag_capability(true);
Packit 6d2c1b
			}
Packit 6d2c1b
			nd_logdbg("verified interface %s for flow tag capabilities : %s", ifname, p_ib_ctx->get_flow_tag_capability() ? "enabled" : "disabled");
Packit 6d2c1b
Packit 6d2c1b
			if (qp_type == IBV_QPT_RAW_PACKET && p_ib_ctx->is_packet_pacing_supported() && !priv_ibv_query_burst_supported(qp, port_num)) {
Packit 6d2c1b
				p_ib_ctx->set_burst_capability(true);
Packit 6d2c1b
			}
Packit 6d2c1b
			nd_logdbg("verified interface %s for burst capabilities : %s", ifname, p_ib_ctx->get_burst_capability() ? "enabled" : "disabled");
Packit 6d2c1b
		}
Packit 6d2c1b
	} else {
Packit 6d2c1b
		nd_logdbg("QP creation failed on interface %s (errno=%d %m), Traffic will not be offloaded", ifname, errno);
Packit 6d2c1b
qp_failure:
Packit 6d2c1b
		int err = errno; //verify_raw_qp_privliges can overwrite errno so keep it before the call
Packit 6d2c1b
		if (validate_raw_qp_privliges() == 0) {
Packit 6d2c1b
			// MLNX_OFED raw_qp_privliges file exist with bad value
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* Interface %s will not be offloaded.\n", ifname);
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* Working in this mode might causes VMA malfunction over Ethernet/InfiniBand interfaces\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* WARNING: the following steps will restart your network interface!\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* 1. \"echo options ib_uverbs disable_raw_qp_enforcement=1 > /etc/modprobe.d/ib_uverbs.conf\"\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* 2. Restart openibd or rdma service depending on your system configuration\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* Read the RAW_PACKET QP root access enforcement section in the VMA's User Manual for more information\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"******************************************************************************************************\n");
Packit 6d2c1b
		}
Packit 6d2c1b
		else if (validate_user_has_cap_net_raw_privliges() == 0 || err == EPERM) {
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* Interface %s will not be offloaded.\n", ifname);
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* Offloaded resources are restricted to root or user with CAP_NET_RAW privileges\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* Read the CAP_NET_RAW and root access section in the VMA's User Manual for more information\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
		} else {
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* Interface %s will not be offloaded.\n", ifname);
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"* VMA was not able to create QP for this device (errno = %d).\n", err);
Packit 6d2c1b
			vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n");
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
release_resources:
Packit 6d2c1b
	if(qp) {
Packit 6d2c1b
		IF_VERBS_FAILURE(ibv_destroy_qp(qp)) {
Packit 6d2c1b
			nd_logdbg("qp destroy failed on interface %s (errno=%d %m)", ifname, errno);
Packit 6d2c1b
			success = false;
Packit 6d2c1b
		} ENDIF_VERBS_FAILURE;
Packit 6d2c1b
	}
Packit 6d2c1b
	if (cq) {
Packit 6d2c1b
		IF_VERBS_FAILURE(ibv_destroy_cq(cq)) {
Packit 6d2c1b
			nd_logdbg("cq destroy failed on interface %s (errno=%d %m)", ifname, errno);
Packit 6d2c1b
			success = false;
Packit 6d2c1b
		} ENDIF_VERBS_FAILURE;
Packit 6d2c1b
	}
Packit 6d2c1b
	if (channel) {
Packit 6d2c1b
		IF_VERBS_FAILURE(ibv_destroy_comp_channel(channel)) {
Packit 6d2c1b
			nd_logdbg("channel destroy failed on interface %s (errno=%d %m)", ifname, errno);
Packit 6d2c1b
			success = false;
Packit 6d2c1b
		} ENDIF_VERBS_FAILURE;
Packit 6d2c1b
		VALGRIND_MAKE_MEM_UNDEFINED(channel, sizeof(ibv_comp_channel));
Packit 6d2c1b
	}
Packit 6d2c1b
	return success;
Packit 6d2c1b
}