/*
* Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <linux/rtnetlink.h>
#include <linux/netlink.h>
#include <netinet/in.h>
#include <netinet/ether.h>
#include <arpa/inet.h>
#include "utils/bullseye.h"
#include "utils/lock_wrapper.h"
#include "vlogger/vlogger.h"
#include "vma/util/vtypes.h"
#include "vma/util/utils.h"
#include "route_table_mgr.h"
#include "vma/sock/socket_fd_api.h"
#include "vma/sock/sock-redirect.h"
#include "vma/dev/net_device_table_mgr.h"
#include "ip_address.h"
// debugging macros
#define MODULE_NAME "rtm:"
#define rt_mgr_if_logpanic __log_panic
#define rt_mgr_logerr __log_err
#define rt_mgr_logwarn __log_warn
#define rt_mgr_loginfo __log_info
#define rt_mgr_logdbg __log_dbg
#define rt_mgr_logfunc __log_func
#define rt_mgr_logfuncall __log_funcall
route_table_mgr* g_p_route_table_mgr = NULL;
route_table_mgr::route_table_mgr() : netlink_socket_mgr<route_val>(ROUTE_DATA_TYPE), cache_table_mgr<route_rule_table_key, route_val*>("route_table_mgr")
{
rt_mgr_logdbg("");
//Read Route table from kernel and save it in local variable.
update_tbl();
// create route_entry for each net_dev- needed for receiving port up/down events for net_dev_entry
route_val *p_val;
for (int i = 0; i < m_tab.entries_num; i++)
{
p_val = &m_tab.value[i];
in_addr_t src_addr = p_val->get_src_addr();
in_addr_route_entry_map_t::iterator iter = m_rte_list_for_each_net_dev.find(src_addr);
// if src_addr of interface exists in the map, no need to create another route_entry
if (iter == m_rte_list_for_each_net_dev.end()) {
in_addr_t dst_ip = src_addr;
in_addr_t src_ip = 0;
uint8_t tos = 0;
m_rte_list_for_each_net_dev.insert(pair<in_addr_t, route_entry*> (src_addr, create_new_entry(route_rule_table_key(dst_ip, src_ip, tos), NULL)));
}
}
//Print table
print_val_tbl();
// register to netlink event
g_p_netlink_handler->register_event(nlgrpROUTE, this);
rt_mgr_logdbg("Registered to g_p_netlink_handler");
rt_mgr_logdbg("Done");
}
route_table_mgr::~route_table_mgr()
{
rt_mgr_logdbg("");
// clear all route_entrys created in the constructor
in_addr_route_entry_map_t::iterator iter;
while ((iter = m_rte_list_for_each_net_dev.begin()) != m_rte_list_for_each_net_dev.end()) {
delete(iter->second);
m_rte_list_for_each_net_dev.erase(iter);
}
rt_tbl_cach_entry_map_t::iterator cache_itr;
while ((cache_itr = m_cache_tbl.begin()) != m_cache_tbl.end()) {
delete(cache_itr->second);
m_cache_tbl.erase(cache_itr);
}
rt_mgr_logdbg("Done");
}
void route_table_mgr::update_tbl()
{
auto_unlocker lock(m_lock);
netlink_socket_mgr<route_val>::update_tbl();
rt_mgr_update_source_ip();
return;
}
void route_table_mgr::rt_mgr_update_source_ip()
{
route_val *p_val;
//for route entries which still have no src ip and no gw
for (int i = 0; i < m_tab.entries_num; i++) {
p_val = &m_tab.value[i];
if (p_val->get_src_addr() || p_val->get_gw_addr()) continue;
if (g_p_net_device_table_mgr) { //try to get src ip from net_dev list of the interface
in_addr_t longest_prefix = 0;
in_addr_t correct_src = 0;
local_ip_list_t::iterator lip_iter;
local_ip_list_t lip_offloaded_list = g_p_net_device_table_mgr->get_ip_list(p_val->get_if_index());
if (!lip_offloaded_list.empty()) {
for (lip_iter = lip_offloaded_list.begin(); lip_offloaded_list.end() != lip_iter; lip_iter++)
{
ip_data_t ip = *lip_iter;
if((p_val->get_dst_addr() & ip.netmask) == (ip.local_addr & ip.netmask)) { //found a match in routing table
if((ip.netmask | longest_prefix) != longest_prefix){
longest_prefix = ip.netmask; // this is the longest prefix match
correct_src = ip.local_addr;
}
}
}
if (correct_src) {
p_val->set_src_addr(correct_src);
continue;
}
}
}
// if still no src ip, get it from ioctl
struct sockaddr_in src_addr;
char *if_name = (char *)p_val->get_if_name();
if (!get_ipv4_from_ifname(if_name, &src_addr)) {
p_val->set_src_addr(src_addr.sin_addr.s_addr);
}
else {
// Failed mapping if_name to IPv4 address
rt_mgr_logwarn("could not figure out source ip for rtv = %s", p_val->to_str());
}
}
//for route entries with gateway, do recursive search for src ip
int num_unresolved_src = m_tab.entries_num;
int prev_num_unresolved_src = 0;
do {
prev_num_unresolved_src = num_unresolved_src;
num_unresolved_src = 0;
for (int i = 0; i < m_tab.entries_num; i++) {
p_val = &m_tab.value[i];
if (p_val->get_gw_addr() && !p_val->get_src_addr()) {
route_val* p_val_dst;
in_addr_t in_addr = p_val->get_gw_addr();
unsigned char table_id = p_val->get_table_id();
if (find_route_val(in_addr, table_id, p_val_dst)) {
if (p_val_dst->get_src_addr()) {
p_val->set_src_addr(p_val_dst->get_src_addr());
} else if (p_val == p_val_dst) { //gateway of the entry lead to same entry
local_ip_list_t::iterator lip_iter;
local_ip_list_t lip_offloaded_list = g_p_net_device_table_mgr->get_ip_list(p_val->get_if_index());
for (lip_iter = lip_offloaded_list.begin(); lip_offloaded_list.end() != lip_iter; lip_iter++)
{
ip_data_t ip = *lip_iter;
if(p_val->get_gw_addr() == ip.local_addr) {
p_val->set_gw(0);
p_val->set_src_addr(ip.local_addr);
break;
}
}
if (!p_val->get_src_addr())
num_unresolved_src++;
} else {
num_unresolved_src++;
}
// gateway and source are equal, no need of gw.
if (p_val->get_src_addr() == p_val->get_gw_addr()) {
p_val->set_gw(0);
}
} else {
num_unresolved_src++;
}
}
}
} while (num_unresolved_src && prev_num_unresolved_src > num_unresolved_src);
//for route entries which still have no src ip
for (int i = 0; i < m_tab.entries_num; i++) {
p_val = &m_tab.value[i];
if (p_val->get_src_addr()) continue;
if (p_val->get_gw_addr()) {
rt_mgr_logdbg("could not figure out source ip for gw address. rtv = %s", p_val->to_str());
}
// if still no src ip, get it from ioctl
struct sockaddr_in src_addr;
char *if_name = (char *)p_val->get_if_name();
if (!get_ipv4_from_ifname(if_name, &src_addr)) {
p_val->set_src_addr(src_addr.sin_addr.s_addr);
}
else {
// Failed mapping if_name to IPv4 address
rt_mgr_logdbg("could not figure out source ip for rtv = %s", p_val->to_str());
}
}
}
bool route_table_mgr::parse_enrty(nlmsghdr *nl_header, route_val *p_val)
{
int len;
struct rtmsg *rt_msg;
struct rtattr *rt_attribute;
// get route entry header
rt_msg = (struct rtmsg *) NLMSG_DATA(nl_header);
// we are not concerned about the local and default route table
if (rt_msg->rtm_family != AF_INET || rt_msg->rtm_table == RT_TABLE_LOCAL)
return false;
p_val->set_protocol(rt_msg->rtm_protocol);
p_val->set_scope(rt_msg->rtm_scope);
p_val->set_type(rt_msg->rtm_type);
p_val->set_table_id(rt_msg->rtm_table);
in_addr_t dst_mask = htonl(VMA_NETMASK(rt_msg->rtm_dst_len));
p_val->set_dst_mask(dst_mask);
p_val->set_dst_pref_len(rt_msg->rtm_dst_len);
len = RTM_PAYLOAD(nl_header);
rt_attribute = (struct rtattr *) RTM_RTA(rt_msg);
for (;RTA_OK(rt_attribute, len);rt_attribute=RTA_NEXT(rt_attribute,len)) {
parse_attr(rt_attribute, p_val);
}
p_val->set_state(true);
p_val->set_str();
return true;
}
void route_table_mgr::parse_attr(struct rtattr *rt_attribute, route_val *p_val)
{
switch (rt_attribute->rta_type) {
case RTA_DST:
p_val->set_dst_addr(*(in_addr_t *)RTA_DATA(rt_attribute));
break;
// next hop IPv4 address
case RTA_GATEWAY:
p_val->set_gw(*(in_addr_t *)RTA_DATA(rt_attribute));
break;
// unique ID associated with the network interface
case RTA_OIF:
p_val->set_if_index(*(int *)RTA_DATA(rt_attribute));
char if_name[IFNAMSIZ];
if_indextoname(p_val->get_if_index(),if_name);
p_val->set_if_name(if_name);
break;
case RTA_SRC:
case RTA_PREFSRC:
p_val->set_src_addr(*(in_addr_t *)RTA_DATA(rt_attribute));
break;
case RTA_TABLE:
p_val->set_table_id(*(uint32_t *)RTA_DATA(rt_attribute));
break;
case RTA_METRICS:
{
struct rtattr *rta = (struct rtattr *)RTA_DATA(rt_attribute);
int len = RTA_PAYLOAD(rt_attribute);
uint16_t type;
while (RTA_OK(rta, len)) {
type = rta->rta_type;
switch (type) {
case RTAX_MTU:
p_val->set_mtu(*(uint32_t *)RTA_DATA(rta));
break;
default:
rt_mgr_logdbg("got unexpected METRICS %d %x",
type, *(uint32_t *)RTA_DATA(rta));
break;
}
rta = RTA_NEXT(rta, len);
}
break;
}
default:
rt_mgr_logdbg("got unexpected type %d %x", rt_attribute->rta_type,
*(uint32_t *)RTA_DATA(rt_attribute));
break;
}
}
bool route_table_mgr::find_route_val(in_addr_t &dst, unsigned char table_id, route_val* &p_val)
{
ip_address dst_addr = dst;
rt_mgr_logfunc("dst addr '%s'", dst_addr.to_str().c_str());
route_val *correct_route_val = NULL;
int longest_prefix = -1;
for (int i = 0; i < m_tab.entries_num; i++) {
route_val* p_val_from_tbl = &m_tab.value[i];
if (!p_val_from_tbl->is_deleted() && p_val_from_tbl->is_if_up()) { // value was not deleted
if(p_val_from_tbl->get_table_id() == table_id) { //found a match in routing table ID
if(p_val_from_tbl->get_dst_addr() == (dst & p_val_from_tbl->get_dst_mask())) { //found a match in routing table
if(p_val_from_tbl->get_dst_pref_len() > longest_prefix) { // this is the longest prefix match
longest_prefix = p_val_from_tbl->get_dst_pref_len();
correct_route_val = p_val_from_tbl;
}
}
}
}
}
if (correct_route_val) {
ip_address dst_gw = correct_route_val->get_dst_addr();
p_val = correct_route_val;
rt_mgr_logdbg("found route val[%p]: %s", p_val, p_val->to_str());
return true;
}
rt_mgr_logdbg("destination gw wasn't found");
return false;
}
bool route_table_mgr::route_resolve(IN route_rule_table_key key, OUT route_result &res)
{
in_addr_t dst = key.get_dst_ip();
ip_address dst_addr = dst;
rt_mgr_logdbg("dst addr '%s'", dst_addr.to_str().c_str());
route_val *p_val = NULL;
std::deque<unsigned char> table_id_list;
g_p_rule_table_mgr->rule_resolve(key, table_id_list);
auto_unlocker lock(m_lock);
std::deque<unsigned char>::iterator table_id_iter = table_id_list.begin();
for (; table_id_iter != table_id_list.end(); table_id_iter++) {
if (find_route_val(dst, *table_id_iter, p_val)) {
res.p_src = p_val->get_src_addr();
rt_mgr_logdbg("dst ip '%s' resolved to src addr "
"'%d.%d.%d.%d'", dst_addr.to_str().c_str(),
NIPQUAD(res.p_src));
res.p_gw = p_val->get_gw_addr();
rt_mgr_logdbg("dst ip '%s' resolved to gw addr '%d.%d.%d.%d'",
dst_addr.to_str().c_str(), NIPQUAD(res.p_gw));
res.mtu = p_val->get_mtu();
rt_mgr_logdbg("found route mtu %d", res.mtu);
return true;
}
}
/* prevent usage on false return */
return false;
}
void route_table_mgr::update_entry(INOUT route_entry* p_ent, bool b_register_to_net_dev /*= false*/)
{
rt_mgr_logdbg("entry [%p]", p_ent);
auto_unlocker lock(m_lock);
if (p_ent && !p_ent->is_valid()) { //if entry is found in the collection and is not valid
rt_mgr_logdbg("route_entry is not valid-> update value");
rule_entry* p_rr_entry = p_ent->get_rule_entry();
std::deque<rule_val*>* p_rr_val;
if (p_rr_entry && p_rr_entry->get_val(p_rr_val)) {
route_val* p_val = NULL;
in_addr_t peer_ip = p_ent->get_key().get_dst_ip();
unsigned char table_id;
for (std::deque<rule_val*>::iterator p_rule_val = p_rr_val->begin(); p_rule_val != p_rr_val->end(); p_rule_val++) {
table_id = (*p_rule_val)->get_table_id();
if (find_route_val(peer_ip, table_id, p_val)) {
p_ent->set_val(p_val);
if (b_register_to_net_dev) {
//in_addr_t src_addr = p_val->get_src_addr();
//net_device_val* p_ndv = g_p_net_device_table_mgr->get_net_device_val(src_addr);
// Check if broadcast IP which is NOT supported
if (IS_BROADCAST_N(peer_ip)) {
rt_mgr_logdbg("Disabling Offload for route_entry '%s' - this is BC address", p_ent->to_str().c_str());
// Need to route traffic to/from OS
// Prevent registering of net_device to route entry
}
// Check if: Local loopback over Ethernet case which was not supported before OFED 2.1
/*else if (p_ndv && (p_ndv->get_transport_type() == VMA_TRANSPORT_ETH) && (peer_ip == src_addr)) {
rt_mgr_logdbg("Disabling Offload for route_entry '%s' - this is an Ethernet unicast loopback route", p_ent->to_str().c_str());
// Need to route traffic to/from OS
// Prevent registering of net_device to route entry
}*/
else {
// register to net device for bonding events
p_ent->register_to_net_device();
}
}
// All good, validate the new route entry
p_ent->set_entry_valid();
break;
} else {
rt_mgr_logdbg("could not find route val for route_entry '%s in table %u'", p_ent->to_str().c_str(), table_id);
}
}
}
else {
rt_mgr_logdbg("rule entry is not valid");
}
}
}
route_entry* route_table_mgr::create_new_entry(route_rule_table_key key, const observer *obs)
{
// no need for lock - lock is activated in cache_collection_mgr::register_observer
rt_mgr_logdbg("");
NOT_IN_USE(obs);
route_entry* p_ent = new route_entry(key);
update_entry(p_ent, true);
rt_mgr_logdbg("new entry %p created successfully", p_ent);
return p_ent;
}
void route_table_mgr::new_route_event(route_val* netlink_route_val)
{
if (!netlink_route_val) {
rt_mgr_logdbg("Invalid route entry");
return;
}
if (m_tab.entries_num >= MAX_TABLE_SIZE) {
rt_mgr_logwarn("No available space for new route entry");
return;
}
auto_unlocker lock(m_lock);
route_val* p_route_val = &m_tab.value[m_tab.entries_num];
p_route_val->set_dst_addr(netlink_route_val->get_dst_addr());
p_route_val->set_dst_mask(netlink_route_val->get_dst_mask());
p_route_val->set_dst_pref_len(netlink_route_val->get_dst_pref_len());
p_route_val->set_src_addr(netlink_route_val->get_src_addr());
p_route_val->set_gw(netlink_route_val->get_gw_addr());
p_route_val->set_protocol(netlink_route_val->get_protocol());
p_route_val->set_scope(netlink_route_val->get_scope());
p_route_val->set_type(netlink_route_val->get_type());
p_route_val->set_table_id(netlink_route_val->get_table_id());
p_route_val->set_if_index(netlink_route_val->get_if_index());
p_route_val->set_if_name(const_cast<char*> (netlink_route_val->get_if_name()));
p_route_val->set_mtu((netlink_route_val->get_mtu()));
p_route_val->set_state(true);
p_route_val->set_str();
p_route_val->print_val();
++m_tab.entries_num;
}
void route_table_mgr::notify_cb(event *ev)
{
rt_mgr_logdbg("received route event from netlink");
route_nl_event *route_netlink_ev = dynamic_cast <route_nl_event*>(ev);
if (!route_netlink_ev) {
rt_mgr_logwarn("Received non route event!!!");
return;
}
netlink_route_info* p_netlink_route_info = route_netlink_ev->get_route_info();
if (!p_netlink_route_info) {
rt_mgr_logdbg("Received invalid route event!!!");
return;
}
switch(route_netlink_ev->nl_type) {
case RTM_NEWROUTE:
new_route_event(p_netlink_route_info->get_route_val());
break;
#if 0
case RTM_DELROUTE:
del_route_event(p_netlink_route_info->get_route_val());
break;
#endif
default:
rt_mgr_logdbg("Route event (%u) is not handled", route_netlink_ev->nl_type);
break;
}
}