/*
* Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef UTILS_H
#define UTILS_H
#include <time.h>
#include <string>
#include <string.h>
#include <ifaddrs.h>
#include <linux/if_ether.h>
#include <exception>
#include "vtypes.h"
#include "utils/rdtsc.h"
#include "vlogger/vlogger.h"
#include "vma/proto/mem_buf_desc.h"
#include "vma/util/vma_stats.h"
struct iphdr; //forward declaration
#define VMA_ALIGN(x, y) ((((x) + (y) - 1) / (y)) * (y) )
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
/**
* Check if file type is regular
**/
int check_if_regular_file (char *path);
/**
* L3 and L4 Header Checksum Calculation
*/
void compute_tx_checksum(mem_buf_desc_t* p_mem_buf_desc, bool l3_csum, bool l4_csum);
/**
* IP Header Checksum Calculation
*/
unsigned short compute_ip_checksum(const unsigned short *buf, unsigned int nshort_words);
/**
* get tcp checksum: given IP header and tcp segment (assume checksum field in TCP header contains zero)
* matches RFC 793
*/
unsigned short compute_tcp_checksum(const struct iphdr *p_iphdr, const uint16_t *p_ip_payload);
/**
* get udp checksum: given IP header and UDP datagram (assume checksum field in UDP header contains zero)
* matches RFC 793
*/
unsigned short compute_udp_checksum_rx(const struct iphdr *p_iphdr, const struct udphdr *udphdrp, mem_buf_desc_t* p_rx_wc_buf_desc);
/**
* get user space max number of open fd's using getrlimit, default parameter equals to 1024
*/
int get_sys_max_fd_num(int def_max_fd=1024);
/**
* iovec extensions
* Returns total bytes copyed
*/
int memcpy_fromiovec(u_int8_t* p_dst, const struct iovec* p_iov, size_t sz_iov, size_t sz_src_start_offset, size_t sz_data);
/**
* get base interface from an aliased/vlan tagged one. i.e. eth2:1 --> eth2 / eth2.1 --> eth2
* Functions gets:interface name,output variable for base interface,output size; and returns the base interface
*/
int get_base_interface_name(const char *if_name, char *base_ifname, size_t sz_base_ifname);
/**
* Count bitmark set bits
*/
int netmask_bitcount(uint32_t netmask);
/**
* Set the fd blocking mode
* @param fd the file descriptor on which to operate
* @param block 'true' to set to block
* 'false' to set to non-blocking
*/
void set_fd_block_mode(int fd, bool block);
/**
* @param a number
* @param b number
* @return true if 'a' and 'b' are equal. else false.
*/
bool compare_double(double a, double b);
/**
* Run a system command while bypassing LD_PRELOADed with VMA
* @param cmd_line to be exceuted wiout VMA in process space
* @param return_str is the output of the system call
*/
int run_and_retreive_system_command(const char* cmd_line, char* return_str, int return_str_len);
const char* iphdr_protocol_type_to_str(const int type);
/**
* Read content of file detailed in 'path' (usually a sysfs file) and
* store the file content into the given 'buf' up to 'size' characters.
* print log in case of failure according to the given 'log_level' argument.
* @return length of content that was read, or -1 upon any error
*/
int priv_read_file(const char *path, char *buf, size_t size, vlog_levels_t log_level = VLOG_ERROR);
/**
* like above 'priv_read_file' however make sure that upon success the result in buf is a null terminated string
*/
inline int priv_safe_read_file(const char *path, char *buf, size_t size, vlog_levels_t log_level = VLOG_ERROR){
int ret = -1;
if (size > 0) {
ret = priv_read_file(path, buf, size - 1, log_level);
if (0 <= ret) buf[ret] = '\0';
}
return ret;
}
/**
* like above however make sure that upon success the result in buf is a null terminated string and VLOG_DEBUG
*/
inline int priv_safe_try_read_file(const char *path, char *buf, size_t size) {
int ret = -1;
if (size > 0) {
ret = priv_read_file(path, buf, size - 1, VLOG_DEBUG);
if (0 <= ret) buf[ret] = '\0';
}
return ret;
}
/**
* Read content of file detailed in 'path' (usually a sysfs file)
* upon failure print error
* @return int value (atoi) of the file content, or 'default_value' upon failure
*/
int read_file_to_int(const char *path, int default_value);
/**
* Get interface name and flags from local address
*
* @char ifname[IFNAMSIZ];
* @unsigned int ifflags; Flags as from SIOCGIFFLAGS ioctl.
*
* @return zero on success
*/
int get_ifinfo_from_ip(const struct sockaddr& local_addr, char* ifname, uint32_t &ifflags);
/**
* Get port number from interface name
* @param ifname input interface name of device (e.g. eth1, ib2)
* should be of size IFNAMSIZ
* @return zero on failure, else port number
*/
int get_port_from_ifname(const char* ifname);
/**
* Get interface type value from interface name
*
* @param ifname input interface name of device (e.g. eth1, ib2)
* should be of size IFNAMSIZ
* @return if type on success or -1 on failure
*/
int get_iftype_from_ifname(const char* ifname);
/**
* Get interface mtu from interface name
*
* @param ifname input interface name of device (e.g. eth1, ib2)
* should be of size IFNAMSIZ
* @return mtu length zero on failure
*/
int get_if_mtu_from_ifname(const char* ifname);
/**
* Get the OS TCP window scaling factor when tcp_window_scaling is enabled.
* The value is calculated from the maximum receive buffer value.
*
* @param tcp_rmem_max the maximum size of the receive buffer used by each TCP socket
* @parma core_rmem_max contains the maximum socket receive buffer size in bytes which a user may set by using the SO_RCVBUF socket option.
*
* @return TCP window scaling factor
*/
int get_window_scaling_factor(int tcp_rmem_max, int core_rmem_max);
/**
* Get Ethernet ipv4 address from interface name
*
* @param ifname input interface name of device (e.g. eth1, ib2)
* should be of size IFNAMSIZ
* @param sockaddr_in output interface inet address
*
* @return -1 on failure
*/
int get_ipv4_from_ifname(char *ifname, struct sockaddr_in *addr);
/**
* Get Ethernet ipv4 address from interface index
*
* @param ifindex input interface index of device
* @param sockaddr_in output interface inet address
*
* @return -1 on failure
*/
int get_ipv4_from_ifindex(int ifindex, struct sockaddr_in *addr);
/**
* Get vlan id from interface name
*
* @param ifname input interface name of device (e.g. eth2, eth2.5)
* @return the vlan id or 0 if not a vlan
*/
uint16_t get_vlan_id_from_ifname(const char* ifname);
/**
* Get vlan base name from interface name
*
* @param ifname input interface name of device (e.g. eth2, eth2.5)
* @param base_ifname output base interface name of device (e.g. eth2)
* @param sz_base_ifname input the size of base_ifname param
* @return the vlan base name length or 0 if not a vlan
*/
size_t get_vlan_base_name_from_ifname(const char* ifname, char* base_ifname, size_t sz_base_ifname);
/* Upon success - returns the actual address len in bytes; Upon error - returns zero*/
size_t get_local_ll_addr(const char* ifname, unsigned char* addr, int addr_len, bool is_broadcast);
/* Print warning while RoCE Lag is enabled */
void print_roce_lag_warnings(char* interface, char* disable_path = NULL, const char* port1 = NULL, const char* port2 = NULL);
bool get_bond_active_slave_name(IN const char* bond_name, OUT char* active_slave_name, IN int sz);
bool get_bond_slave_state(IN const char* slave_name, OUT char* curr_state, IN int sz);
bool get_bond_slaves_name_list(IN const char* bond_name, OUT char* slaves_list, IN int sz);
bool check_bond_roce_lag_exist(OUT char* bond_roce_lag_path, int sz, IN const char* slave_name);
bool check_device_exist(const char* ifname, const char *path);
bool check_device_name_ib_name(const char* ifname, const char* ibname);
bool check_netvsc_device_exist(const char* ifname);
bool get_netvsc_slave(IN const char* ifname, OUT char* slave_name, OUT unsigned int &slave_flags);
bool get_interface_oper_state(IN const char* interface_name, OUT char* slaves_list, IN int sz);
int validate_ipoib_prop(const char* ifname, unsigned int ifflags,
const char prop_file[], const char *expected_val,
int val_size, char *filename, char* base_ifname);
int validate_raw_qp_privliges();
bool validate_user_has_cap_net_raw_privliges();
/**
* Get TSO support using interface index
*
* @param if_index input interface index
* @return 0/1 or -1 on failure
*/
int validate_tso(int if_index);
static inline int get_procname(int pid, char *proc, size_t size)
{
char app_full_name[PATH_MAX] = {0};
char proccess_proc_dir[FILE_NAME_MAX_SIZE] = {0};
char* app_base_name = NULL;
int n = -1;
if (NULL == proc) {
return -1;
}
n = snprintf(proccess_proc_dir, sizeof(proccess_proc_dir), "/proc/%d/exe", pid);
if (likely((0 < n) && (n < (int)sizeof(proccess_proc_dir)))) {
n = readlink(proccess_proc_dir, app_full_name, sizeof(app_full_name) - 1);
if (n > 0) {
app_full_name[n] = '\0';
app_base_name = strrchr(app_full_name, '/');
if (app_base_name) {
strncpy(proc, app_base_name + 1, size - 1);
proc[size - 1] = '\0';
return 0;
}
}
}
return -1;
}
static inline in_addr_t prefix_to_netmask(int prefix_length)
{
in_addr_t mask = 0;
if (prefix_length <= 0 || prefix_length > 32) {
return 0;
}
mask = ~mask << (32 - prefix_length);
mask = htonl(mask);
return mask;
}
//Creates multicast MAC from multicast IP
//inline void create_multicast_mac_from_ip(uint8_t (& mc_mac) [6], in_addr_t ip)
inline void create_multicast_mac_from_ip(unsigned char* mc_mac, in_addr_t ip)
{
if(mc_mac == NULL)
return;
mc_mac[0] = 0x01;
mc_mac[1] = 0x00;
mc_mac[2] = 0x5e;
mc_mac[3] = (uint8_t)((ip>> 8)&0x7f);
mc_mac[4] = (uint8_t)((ip>>16)&0xff);
mc_mac[5] = (uint8_t)((ip>>24)&0xff);
}
static inline void create_mgid_from_ipv4_mc_ip(uint8_t *mgid, uint16_t pkey, uint32_t ip)
{
// +--------+----+----+-----------------+---------+-------------------+
// | 8 | 4 | 4 | 16 bits | 16 bits | 80 bits |
// +--------+----+----+-----------------+---------+-------------------+
// |11111111|0001|scop|<IPoIB signature>|< P_Key >| group ID |
// +--------+----+----+-----------------+---------+-------------------+
// |11111111|0001|0010|01000000000011011| | group ID |
// +--------+----+----+-----------------+---------+-------------------+
//Fixed for multicast
mgid[0] = 0xff;
mgid[1] = 0x12;
//IPoIB signature: 0x401b for ipv4, 0x601b for ipv6
mgid[2] = 0x40;
mgid[3] = 0x1b;
//P_Key
mgid[4] = (((unsigned char *)(&pkey))[0]);
mgid[5] = (((unsigned char *)(&pkey))[1]);
//group ID - relevant only for ipv4
mgid[6] = 0x00;
mgid[7] = 0x00;
mgid[8] = 0x00;
mgid[9] = 0x00;
mgid[10] = 0x00;
mgid[11] = 0x00;
mgid[12] = (uint8_t)((ip)&0x0f);
mgid[13] = (uint8_t)((ip>>8)&0xff);
mgid[14] = (uint8_t)((ip>>16)&0xff);
mgid[15] = (uint8_t)((ip>>24)&0xff);
vlog_printf(VLOG_DEBUG, "Translated to mgid: %02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X\n",
((unsigned char *)(mgid))[0],((unsigned char *)(mgid))[1],
((unsigned char *)(mgid))[2],((unsigned char *)(mgid))[3],
((unsigned char *)(mgid))[4],((unsigned char *)(mgid))[5],
((unsigned char *)(mgid))[6],((unsigned char *)(mgid))[7],
((unsigned char *)(mgid))[8],((unsigned char *)(mgid))[9],
((unsigned char *)(mgid))[10],((unsigned char *)(mgid))[11],
((unsigned char *)(mgid))[12],((unsigned char *)(mgid))[13],
((unsigned char *)(mgid))[14],((unsigned char *)(mgid))[15]);
}
/**
* special design for the rx loop.
*/
class loops_timer {
public:
loops_timer();
void start();
int time_left_msec();
void set_timeout_msec(int timeout_msec) { m_timeout_msec = timeout_msec; }
int get_timeout_msec() { return m_timeout_msec; }
inline bool is_timeout() {
if (m_timeout_msec == -1)
return false;
if (m_timer_countdown > 0) {
m_timer_countdown--;
return false;
}
//init counter
m_timer_countdown = m_interval_it;
if (!ts_isset(&m_start)) {
gettime(&m_start);
}
//update timer
gettime(&m_current);
ts_sub(&m_current, &m_start, &m_elapsed);
vlog_printf(VLOG_FUNC_ALL, "update loops_timer (elapsed time=%d sec %d usec \n", ts_to_sec(&m_elapsed), ts_to_usec(&m_elapsed));
// test for timeout
if (m_timeout_msec <= ts_to_msec(&m_elapsed))
return true;
return false;
}
private:
timespec m_start;
timespec m_elapsed;
timespec m_current;
int m_interval_it;
int m_timer_countdown;
int m_timeout_msec;
};
// Returns the filesystem's inode number for the given 'fd' using 'fstat' system call that assumes 32 bit inodes
// This should be safe for 'proc' filesytem and for standard filesystems
uint32_t fd2inode(int fd);
/**
* @class vma_error
*
* base class for vma exceptions classes.
* Note: VMA code should NOT catch vma_error; VMA code should only catch exceptions of derived classes
*/
class vma_error : public std::exception {
char formatted_message[512];
public:
const char * const message;
const char * const function;
const char * const filename;
const int lineno;
const int errnum;
/**
* Create an object that contains const members for all the given arguments, plus a formatted message that will be
* available thru the 'what()' method of base class.
*
* The formatted_message will look like this:
* "vma_error <create internal epoll> (errno=24 Too many open files) in sock/sockinfo.cpp:61"
* catcher can print it to log like this:
* fdcoll_loginfo("recovering from %s", e.what());
*/
vma_error(const char* _message, const char* _function, const char* _filename, int _lineno, int _errnum) throw();
virtual ~vma_error() throw();
virtual const char* what() const throw();
};
/**
* @class vma_exception
* NOTE: ALL exceptions that can be caught by VMA should be derived of this class
*/
class vma_exception : public vma_error {
public:
vma_exception(const char* _message, const char* _function, const char* _filename, int _lineno, int _errnum) throw() :
vma_error(_message, _function, _filename, _lineno, _errnum)
{
}
};
#define create_vma_exception_class(clsname, basecls) \
class clsname : public basecls { \
public: \
clsname(const char* _message, const char* _function, const char* _filename, int _lineno, int _errnum) throw() : \
basecls(_message, _function, _filename, _lineno, _errnum) {} \
}
create_vma_exception_class(vma_unsupported_api, vma_error);
#define throw_vma_exception(msg) throw vma_exception(msg, __PRETTY_FUNCTION__, __FILE__, __LINE__, errno)
// uses for throwing something that is derived from vma_error and has similar CTOR; msg will automatically be class name
#define vma_throw_object(_class) throw _class(#_class, __PRETTY_FUNCTION__, __FILE__, __LINE__, errno)
#define vma_throw_object_with_msg(_class, _msg) throw _class(_msg, __PRETTY_FUNCTION__, __FILE__, __LINE__, errno)
/* Rounding up to nearest power of 2 */
static inline uint32_t align32pow2(uint32_t x)
{
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return x + 1;
}
static inline int ilog_2(uint32_t n) {
if (n == 0)
return 0;
uint32_t t = 0;
while ((1 << t) < (int)n)
++t;
return (int)t;
}
#endif