/*
* Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ip_frag.h"
#include <assert.h>
#include <list>
#include "utils/bullseye.h"
#include "vma/event/event_handler_manager.h"
#include "mem_buf_desc.h"
//#define IP_FRAG_DEBUG 1
#ifdef IP_FRAG_DEBUG
#define frag_dbg(fmt, args...) \
vlog_printf(VLOG_WARNING, "%s:%d : " fmt "\n", __FUNCTION__, __LINE__, ##args)
#else
#define frag_dbg(fmt, args...)
#endif
#define frag_err(fmt, args...) \
vlog_printf(VLOG_ERROR, "%s:%d : " fmt "\n", __FUNCTION__, __LINE__, ##args)
#define frag_panic(fmt, args...) \
{vlog_printf(VLOG_PANIC, "%s:%d : " fmt "\n", __FUNCTION__, __LINE__, ##args); throw;}
#ifdef IP_FRAG_DEBUG
static int debug_drop_every_n_pkt=0; // 0 - Disabled, 1/N is the number of packet dropped
static int debug_drop_index=0; // counter
static int g_ip_frag_count_check = 0;
#define MEMBUF_DEBUG_REF_INC(__p_desc__) {g_ip_frag_count_check++; if (__p_desc__->n_ref_count!=0) frag_panic("REF_INC: p=%p\n", __p_desc__); __p_desc__->n_ref_count++;}
#define MEMBUF_DEBUG_REF_DEC(__p_desc__) {mem_buf_desc_t* frag_list = __p_desc__; while (frag_list) { MEMBUF_DEBUG_REF_DEC_1(frag_list); frag_list = frag_list->p_next_desc; }}
#define MEMBUF_DEBUG_REF_DEC_1(__p_desc__) {g_ip_frag_count_check--; __p_desc__->n_ref_count--; if (__p_desc__->n_ref_count!=0) frag_panic("REF_DEC: p=%p\n", __p_desc__);}
#define PRINT_STATISTICS() {print_statistics();}
#else
#define MEMBUF_DEBUG_REF_INC(__p_desc__)
#define MEMBUF_DEBUG_REF_DEC(__p_desc__)
#define PRINT_STATISTICS()
#endif
ip_frag_manager * g_p_ip_frag_manager = NULL;
ip_frag_hole_desc *hole_base = NULL;
ip_frag_hole_desc *hole_free_list_head = NULL;
int hole_free_list_count = 0;
ip_frag_desc *desc_base = NULL;
ip_frag_desc *desc_free_list_head = NULL;
int desc_free_list_count = 0;
ip_frag_manager::ip_frag_manager() : lock_spin("ip_frag_manager")
{
frag_dbg("");
m_frag_counter = 0;
int i;
frag_dbg("NOTE: ip frag periodic timer is disabled until HW supports ip frag offload");
// g_p_event_handler_manager->register_timer_event(IP_FRAG_CLEANUP_INT, this, PERIODIC_TIMER, 0);
frag_dbg("Created new IPFRAG MANAGER instance");
/* allocate hole list */
desc_base = new ip_frag_desc_t [IP_FRAG_MAX_DESC];
BULLSEYE_EXCLUDE_BLOCK_START
if (!desc_base) {
frag_dbg("Failed to allocate descriptor");
free_frag_resources();
throw_vma_exception("Failed to allocate descriptor");
}
hole_base = new ip_frag_hole_desc [IP_FRAG_MAX_HOLES];
if (!hole_base) {
frag_dbg("Failed to allocate hole descriptor");
free_frag_resources();
throw_vma_exception("Failed to allocate hole descriptor");
}
BULLSEYE_EXCLUDE_BLOCK_END
for (i = 0; i < IP_FRAG_MAX_DESC; i++) {
free_frag_desc(&desc_base[i]);
}
for (i = 0; i < IP_FRAG_MAX_HOLES; i++) {
free_hole_desc(&hole_base[i]);
}
}
void ip_frag_manager::free_frag_resources(void)
{
ip_frags_list_t::iterator i;
ip_frag_desc_t *desc;
frag_dbg("NOTE: ip frag periodic timer is disabled until HW supports ip frag offload");
// g_p_event_handler_manager->unregister_timer_event(this, NULL);
lock();
while (m_frags.size() > 0) {
i = m_frags.begin();
desc = i->second;
destroy_frag_desc(desc);
free_frag_desc(desc);
m_frags.erase(i);
}
owner_desc_map_t temp_buff_map = m_return_descs;
m_return_descs.clear();
unlock();
// Must call cq_mgr outside the lock to avoid ABBA deadlock
return_buffers_to_owners(temp_buff_map);
delete [] desc_base;
delete [] hole_base;
frag_dbg("Deleted IPFRAG MANAGER instance");
}
ip_frag_manager::~ip_frag_manager()
{
free_frag_resources();
}
#if _BullseyeCoverage
#pragma BullseyeCoverage off
#endif
void ip_frag_manager::print_statistics()
{
frag_dbg("free desc=%d, free holes=%d, map size=%d, frags=%d", desc_free_list_count, hole_free_list_count, m_frags.size(), g_ip_frag_count_check);
}
void ip_frag_manager::free_frag(mem_buf_desc_t *frag)
{
mem_buf_desc_t *tail;
// There are cases that we might not have a frag list at all to release
// This is instead of checking the pointer before all calls to free_frag()
if (!frag)
return;
// Change packet size - it will force packet to be discarded
frag->sz_data = IP_FRAG_FREED;
// Return to owner does post_recv() which deals with linked buffers automatically
MEMBUF_DEBUG_REF_DEC(frag);
tail = frag;
while (tail->p_next_desc) {
tail = tail->p_next_desc;
}
tail->p_next_desc = m_return_descs[frag->p_desc_owner];
m_return_descs[frag->p_desc_owner] = frag;
}
//FIXME: use preallocated descriptors!!! instead of malloc
ip_frag_hole_desc* ip_frag_manager::alloc_hole_desc()
{
struct ip_frag_hole_desc *ret;
ret = hole_free_list_head;
if (!ret)
return NULL;
// unlink from hole's free list
hole_free_list_head = ret->next;
hole_free_list_count--;
// clear hole struct
ret->data_first = 0;
ret->data_last = 0;
ret->next = 0;
return ret;
}
void ip_frag_manager::free_hole_desc(struct ip_frag_hole_desc *p)
{
// link in head of free list
p->next = hole_free_list_head;
hole_free_list_head = p;
++hole_free_list_count;
}
ip_frag_desc_t *ip_frag_manager::alloc_frag_desc()
{
ip_frag_desc_t *ret;
ret = desc_free_list_head;
if (!ret)
return NULL;
// unlink from hole's free list
desc_free_list_head = ret->next;
--desc_free_list_count;
ret->next = 0;
return ret;
}
void ip_frag_manager::free_frag_desc(ip_frag_desc_t *p)
{
// link in head of free list
p->next = desc_free_list_head;
desc_free_list_head = p;
desc_free_list_count++;
}
void ip_frag_manager::destroy_frag_desc(ip_frag_desc_t *desc)
{
struct ip_frag_hole_desc *phole, *pphole;
// free holes
phole = desc->hole_list;
while (phole) {
pphole = phole;
phole = phole->next;
free_hole_desc(pphole);
}
// free frags
free_frag(desc->frag_list);
}
/**
* first fragment for given address is detected - setup
*/
ip_frag_desc_t *ip_frag_manager::new_frag_desc(ip_frag_key_t &key)
{
ip_frag_desc_t *desc = NULL;
struct ip_frag_hole_desc *hole = NULL;
hole = alloc_hole_desc();
if (!hole){
frag_dbg("NULL hole");
return NULL;
}
hole->first = IP_FRAG_NINF;
hole->last = IP_FRAG_INF;
desc = alloc_frag_desc();
if (!desc) {
frag_dbg("NULL desc");
free_hole_desc(hole);
return NULL;
}
desc->ttl = IP_FRAG_TTL;
desc->frag_list = 0;
desc->hole_list = hole;
desc->frag_counter = m_frag_counter;
m_frags[key] = desc;
return desc;
}
/**
* Complexity of the algorithm:
* O(1) if packets are coming in order or reverse order
* O(n^2) for random fragments, where n is number of fragments
* returns: 0 if finished OK (if the packet is complete - put it in ret)
* -1 if finished not OK and this packet needs to be droped
*/
int ip_frag_manager::add_frag(iphdr *hdr, mem_buf_desc_t *frag, mem_buf_desc_t **ret)
{
ip_frag_key_t key;
ip_frags_list_t::iterator i;
ip_frag_desc_t *desc;
struct ip_frag_hole_desc *phole, *phole_prev;
struct ip_frag_hole_desc *new_hole;
uint16_t frag_off, frag_first, frag_last;
bool more_frags;
assert(hdr);
assert(frag);
key.ip_id = hdr->id; //id is in network order!
key.src_ip = hdr->saddr;
key.dst_ip = hdr->daddr;
key.ipproto = hdr->protocol;
frag_dbg("Fragment: %d.%d.%d.%d->%d.%d.%d.%d id=%x size=%d",
NIPQUAD(key.src_ip),
NIPQUAD(key.dst_ip),
(int)key.ip_id, (int)ntohs(hdr->tot_len));
#ifdef IP_FRAG_DEBUG
if (debug_drop_every_n_pkt && ((++debug_drop_index) % debug_drop_every_n_pkt == 0)) {
frag_dbg("XXX debug force dropped XXX");
return -1;
}
#endif
lock();
MEMBUF_DEBUG_REF_INC(frag);
PRINT_STATISTICS();
frag_off = ntohs(hdr->frag_off);
more_frags = frag_off & MORE_FRAGMENTS_FLAG;
frag_first = (frag_off & FRAGMENT_OFFSET) * 8;
frag_last = frag_first + ntohs(hdr->tot_len) - (hdr->ihl<<2) - 1; // frag starts from 0!!!
frag_dbg("> fragment: %d-%d, %s more frags", frag_first, frag_last, more_frags?"pending":"no");
m_frag_counter++;
i = m_frags.find(key);
if (i == m_frags.end()) {
/* new fragment */
frag_dbg("> new fragmented packet");
desc = new_frag_desc(key);
}
else {
desc = i->second;
if ((m_frag_counter - desc->frag_counter) > IP_FRAG_SPACE) {
// discard this packet
frag_dbg("expiring packet fragments id=%x", i->first);
destroy_frag_desc(desc);
free_frag_desc(desc);
m_frags.erase(i);
i = m_frags.end();
// Add new fregment
frag_dbg("> new fragmented packet");
desc = new_frag_desc(key);
}
else {
frag_dbg("> old fragmented packet");
}
}
if (desc==NULL) {
MEMBUF_DEBUG_REF_DEC(frag);
PRINT_STATISTICS();
unlock();
return -1;
}
//desc->last_frag_counter = m_frag_counter;
/* 8 step reassembly algorithm as described in RFC 815 */
//step 1
phole_prev = 0; phole = desc->hole_list;
while (phole) {
//step 2 and step 3
if (frag_first >= phole->first && frag_last <= phole->last) {
break;
}
phole_prev = phole;
phole = phole->next;
}
if (!phole) { // the right hole wasn't found
MEMBUF_DEBUG_REF_DEC(frag);
PRINT_STATISTICS();
unlock();
return -1;
}
frag_dbg("> found hole: %d-%d", phole->first, phole->last);
// step 4 - remove hole from list
if (phole_prev)
phole_prev->next = phole->next;
else
desc->hole_list = phole->next;
// step 5
if (frag_first > phole->first) {
new_hole = alloc_hole_desc();
if (!new_hole) {
free_hole_desc(phole); // phole was removed from the list in step 4!
MEMBUF_DEBUG_REF_DEC(frag);
PRINT_STATISTICS();
unlock();
return -1;
}
new_hole->first = phole->first;
new_hole->last = frag_first-1;
new_hole->data_first = phole->data_first;
new_hole->data_last = frag;
new_hole->next = phole->next;
if (phole_prev)
phole_prev->next = new_hole;
else
desc->hole_list = new_hole;
phole_prev = new_hole;
}
//step 6
if (frag_last < phole->last && more_frags) {
new_hole = alloc_hole_desc();
if (!new_hole) {
free_hole_desc(phole); // phole was removed from the list in step 4!
MEMBUF_DEBUG_REF_DEC(frag);
PRINT_STATISTICS();
unlock();
return -1;
}
new_hole->first = frag_last + 1;
new_hole->last = phole->last;
new_hole->data_first = frag;
new_hole->data_last = phole->data_last;
new_hole->next = phole->next;
if (phole_prev)
phole_prev->next = new_hole;
else
desc->hole_list = new_hole;
}
// link frag
if (phole->data_first)
phole->data_first->p_next_desc = frag;
else
desc->frag_list = frag;
frag->p_next_desc = phole->data_last;
free_hole_desc(phole);
if (!desc->hole_list) {
//step 8 - datagram assembly completed
if (i == m_frags.end())
i = m_frags.find(key);
if (i == m_frags.end()){
MEMBUF_DEBUG_REF_DEC(frag);
frag_panic("frag desc lost from map???");
//coverity unreachable
/*unlock();
return -1;*/
}
MEMBUF_DEBUG_REF_DEC(desc->frag_list);
m_frags.erase(i);
*ret = desc->frag_list;
free_frag_desc(desc);
frag_dbg("> PACKET ASSEMBLED");
PRINT_STATISTICS();
unlock();
return 0;
}
frag_dbg("> need more packets");
*ret = NULL;
PRINT_STATISTICS();
unlock();
return 0;
}
void ip_frag_manager::return_buffers_to_owners(const owner_desc_map_t &buff_map)
{
// Assume locked !!!
owner_desc_map_t::const_iterator iter;
for (iter = buff_map.begin(); iter != buff_map.end(); ++iter) {
if(g_buffer_pool_rx)
g_buffer_pool_rx->put_buffers_thread_safe(iter->second);
}
}
void ip_frag_manager::handle_timer_expired(void* user_data)
{
NOT_IN_USE(user_data);
ip_frags_list_t::iterator iter, iter_temp;
ip_frag_desc_t *desc;
uint64_t delta =0;
lock();
if (m_frag_counter > IP_FRAG_SPACE) {
delta = m_frag_counter - IP_FRAG_SPACE;
m_frag_counter -= delta;
}
frag_dbg("calling handle_timer_expired, m_frag_counter=%ld, delta=%ld", m_frag_counter, delta);
PRINT_STATISTICS();
iter = m_frags.begin();
while (iter != m_frags.end()) {
desc = iter->second;
desc->frag_counter -= delta;
if (desc->frag_counter<0 || (desc->ttl <= 0)) { //discard this packet
frag_dbg("expiring packet fragments desc=%p (frag_counter=%d, ttl=%d)", desc, desc->frag_counter, desc->ttl);
destroy_frag_desc(desc);
free_frag_desc(desc);
iter_temp = iter++;
m_frags.erase(iter_temp);
}
else {
iter++;
}
--desc->ttl;
}
owner_desc_map_t temp_buff_map = m_return_descs;
m_return_descs.clear();
PRINT_STATISTICS();
unlock();
// Must call cq_mgr outside the lock to avoid ABBA deadlock
return_buffers_to_owners(temp_buff_map);
}
#if _BullseyeCoverage
#pragma BullseyeCoverage on
#endif