/* * Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "ip_frag.h" #include #include #include "utils/bullseye.h" #include "vma/event/event_handler_manager.h" #include "mem_buf_desc.h" //#define IP_FRAG_DEBUG 1 #ifdef IP_FRAG_DEBUG #define frag_dbg(fmt, args...) \ vlog_printf(VLOG_WARNING, "%s:%d : " fmt "\n", __FUNCTION__, __LINE__, ##args) #else #define frag_dbg(fmt, args...) #endif #define frag_err(fmt, args...) \ vlog_printf(VLOG_ERROR, "%s:%d : " fmt "\n", __FUNCTION__, __LINE__, ##args) #define frag_panic(fmt, args...) \ {vlog_printf(VLOG_PANIC, "%s:%d : " fmt "\n", __FUNCTION__, __LINE__, ##args); throw;} #ifdef IP_FRAG_DEBUG static int debug_drop_every_n_pkt=0; // 0 - Disabled, 1/N is the number of packet dropped static int debug_drop_index=0; // counter static int g_ip_frag_count_check = 0; #define MEMBUF_DEBUG_REF_INC(__p_desc__) {g_ip_frag_count_check++; if (__p_desc__->n_ref_count!=0) frag_panic("REF_INC: p=%p\n", __p_desc__); __p_desc__->n_ref_count++;} #define MEMBUF_DEBUG_REF_DEC(__p_desc__) {mem_buf_desc_t* frag_list = __p_desc__; while (frag_list) { MEMBUF_DEBUG_REF_DEC_1(frag_list); frag_list = frag_list->p_next_desc; }} #define MEMBUF_DEBUG_REF_DEC_1(__p_desc__) {g_ip_frag_count_check--; __p_desc__->n_ref_count--; if (__p_desc__->n_ref_count!=0) frag_panic("REF_DEC: p=%p\n", __p_desc__);} #define PRINT_STATISTICS() {print_statistics();} #else #define MEMBUF_DEBUG_REF_INC(__p_desc__) #define MEMBUF_DEBUG_REF_DEC(__p_desc__) #define PRINT_STATISTICS() #endif ip_frag_manager * g_p_ip_frag_manager = NULL; ip_frag_hole_desc *hole_base = NULL; ip_frag_hole_desc *hole_free_list_head = NULL; int hole_free_list_count = 0; ip_frag_desc *desc_base = NULL; ip_frag_desc *desc_free_list_head = NULL; int desc_free_list_count = 0; ip_frag_manager::ip_frag_manager() : lock_spin("ip_frag_manager") { frag_dbg(""); m_frag_counter = 0; int i; frag_dbg("NOTE: ip frag periodic timer is disabled until HW supports ip frag offload"); // g_p_event_handler_manager->register_timer_event(IP_FRAG_CLEANUP_INT, this, PERIODIC_TIMER, 0); frag_dbg("Created new IPFRAG MANAGER instance"); /* allocate hole list */ desc_base = new ip_frag_desc_t [IP_FRAG_MAX_DESC]; BULLSEYE_EXCLUDE_BLOCK_START if (!desc_base) { frag_dbg("Failed to allocate descriptor"); free_frag_resources(); throw_vma_exception("Failed to allocate descriptor"); } hole_base = new ip_frag_hole_desc [IP_FRAG_MAX_HOLES]; if (!hole_base) { frag_dbg("Failed to allocate hole descriptor"); free_frag_resources(); throw_vma_exception("Failed to allocate hole descriptor"); } BULLSEYE_EXCLUDE_BLOCK_END for (i = 0; i < IP_FRAG_MAX_DESC; i++) { free_frag_desc(&desc_base[i]); } for (i = 0; i < IP_FRAG_MAX_HOLES; i++) { free_hole_desc(&hole_base[i]); } } void ip_frag_manager::free_frag_resources(void) { ip_frags_list_t::iterator i; ip_frag_desc_t *desc; frag_dbg("NOTE: ip frag periodic timer is disabled until HW supports ip frag offload"); // g_p_event_handler_manager->unregister_timer_event(this, NULL); lock(); while (m_frags.size() > 0) { i = m_frags.begin(); desc = i->second; destroy_frag_desc(desc); free_frag_desc(desc); m_frags.erase(i); } owner_desc_map_t temp_buff_map = m_return_descs; m_return_descs.clear(); unlock(); // Must call cq_mgr outside the lock to avoid ABBA deadlock return_buffers_to_owners(temp_buff_map); delete [] desc_base; delete [] hole_base; frag_dbg("Deleted IPFRAG MANAGER instance"); } ip_frag_manager::~ip_frag_manager() { free_frag_resources(); } #if _BullseyeCoverage #pragma BullseyeCoverage off #endif void ip_frag_manager::print_statistics() { frag_dbg("free desc=%d, free holes=%d, map size=%d, frags=%d", desc_free_list_count, hole_free_list_count, m_frags.size(), g_ip_frag_count_check); } void ip_frag_manager::free_frag(mem_buf_desc_t *frag) { mem_buf_desc_t *tail; // There are cases that we might not have a frag list at all to release // This is instead of checking the pointer before all calls to free_frag() if (!frag) return; // Change packet size - it will force packet to be discarded frag->sz_data = IP_FRAG_FREED; // Return to owner does post_recv() which deals with linked buffers automatically MEMBUF_DEBUG_REF_DEC(frag); tail = frag; while (tail->p_next_desc) { tail = tail->p_next_desc; } tail->p_next_desc = m_return_descs[frag->p_desc_owner]; m_return_descs[frag->p_desc_owner] = frag; } //FIXME: use preallocated descriptors!!! instead of malloc ip_frag_hole_desc* ip_frag_manager::alloc_hole_desc() { struct ip_frag_hole_desc *ret; ret = hole_free_list_head; if (!ret) return NULL; // unlink from hole's free list hole_free_list_head = ret->next; hole_free_list_count--; // clear hole struct ret->data_first = 0; ret->data_last = 0; ret->next = 0; return ret; } void ip_frag_manager::free_hole_desc(struct ip_frag_hole_desc *p) { // link in head of free list p->next = hole_free_list_head; hole_free_list_head = p; ++hole_free_list_count; } ip_frag_desc_t *ip_frag_manager::alloc_frag_desc() { ip_frag_desc_t *ret; ret = desc_free_list_head; if (!ret) return NULL; // unlink from hole's free list desc_free_list_head = ret->next; --desc_free_list_count; ret->next = 0; return ret; } void ip_frag_manager::free_frag_desc(ip_frag_desc_t *p) { // link in head of free list p->next = desc_free_list_head; desc_free_list_head = p; desc_free_list_count++; } void ip_frag_manager::destroy_frag_desc(ip_frag_desc_t *desc) { struct ip_frag_hole_desc *phole, *pphole; // free holes phole = desc->hole_list; while (phole) { pphole = phole; phole = phole->next; free_hole_desc(pphole); } // free frags free_frag(desc->frag_list); } /** * first fragment for given address is detected - setup */ ip_frag_desc_t *ip_frag_manager::new_frag_desc(ip_frag_key_t &key) { ip_frag_desc_t *desc = NULL; struct ip_frag_hole_desc *hole = NULL; hole = alloc_hole_desc(); if (!hole){ frag_dbg("NULL hole"); return NULL; } hole->first = IP_FRAG_NINF; hole->last = IP_FRAG_INF; desc = alloc_frag_desc(); if (!desc) { frag_dbg("NULL desc"); free_hole_desc(hole); return NULL; } desc->ttl = IP_FRAG_TTL; desc->frag_list = 0; desc->hole_list = hole; desc->frag_counter = m_frag_counter; m_frags[key] = desc; return desc; } /** * Complexity of the algorithm: * O(1) if packets are coming in order or reverse order * O(n^2) for random fragments, where n is number of fragments * returns: 0 if finished OK (if the packet is complete - put it in ret) * -1 if finished not OK and this packet needs to be droped */ int ip_frag_manager::add_frag(iphdr *hdr, mem_buf_desc_t *frag, mem_buf_desc_t **ret) { ip_frag_key_t key; ip_frags_list_t::iterator i; ip_frag_desc_t *desc; struct ip_frag_hole_desc *phole, *phole_prev; struct ip_frag_hole_desc *new_hole; uint16_t frag_off, frag_first, frag_last; bool more_frags; assert(hdr); assert(frag); key.ip_id = hdr->id; //id is in network order! key.src_ip = hdr->saddr; key.dst_ip = hdr->daddr; key.ipproto = hdr->protocol; frag_dbg("Fragment: %d.%d.%d.%d->%d.%d.%d.%d id=%x size=%d", NIPQUAD(key.src_ip), NIPQUAD(key.dst_ip), (int)key.ip_id, (int)ntohs(hdr->tot_len)); #ifdef IP_FRAG_DEBUG if (debug_drop_every_n_pkt && ((++debug_drop_index) % debug_drop_every_n_pkt == 0)) { frag_dbg("XXX debug force dropped XXX"); return -1; } #endif lock(); MEMBUF_DEBUG_REF_INC(frag); PRINT_STATISTICS(); frag_off = ntohs(hdr->frag_off); more_frags = frag_off & MORE_FRAGMENTS_FLAG; frag_first = (frag_off & FRAGMENT_OFFSET) * 8; frag_last = frag_first + ntohs(hdr->tot_len) - (hdr->ihl<<2) - 1; // frag starts from 0!!! frag_dbg("> fragment: %d-%d, %s more frags", frag_first, frag_last, more_frags?"pending":"no"); m_frag_counter++; i = m_frags.find(key); if (i == m_frags.end()) { /* new fragment */ frag_dbg("> new fragmented packet"); desc = new_frag_desc(key); } else { desc = i->second; if ((m_frag_counter - desc->frag_counter) > IP_FRAG_SPACE) { // discard this packet frag_dbg("expiring packet fragments id=%x", i->first); destroy_frag_desc(desc); free_frag_desc(desc); m_frags.erase(i); i = m_frags.end(); // Add new fregment frag_dbg("> new fragmented packet"); desc = new_frag_desc(key); } else { frag_dbg("> old fragmented packet"); } } if (desc==NULL) { MEMBUF_DEBUG_REF_DEC(frag); PRINT_STATISTICS(); unlock(); return -1; } //desc->last_frag_counter = m_frag_counter; /* 8 step reassembly algorithm as described in RFC 815 */ //step 1 phole_prev = 0; phole = desc->hole_list; while (phole) { //step 2 and step 3 if (frag_first >= phole->first && frag_last <= phole->last) { break; } phole_prev = phole; phole = phole->next; } if (!phole) { // the right hole wasn't found MEMBUF_DEBUG_REF_DEC(frag); PRINT_STATISTICS(); unlock(); return -1; } frag_dbg("> found hole: %d-%d", phole->first, phole->last); // step 4 - remove hole from list if (phole_prev) phole_prev->next = phole->next; else desc->hole_list = phole->next; // step 5 if (frag_first > phole->first) { new_hole = alloc_hole_desc(); if (!new_hole) { free_hole_desc(phole); // phole was removed from the list in step 4! MEMBUF_DEBUG_REF_DEC(frag); PRINT_STATISTICS(); unlock(); return -1; } new_hole->first = phole->first; new_hole->last = frag_first-1; new_hole->data_first = phole->data_first; new_hole->data_last = frag; new_hole->next = phole->next; if (phole_prev) phole_prev->next = new_hole; else desc->hole_list = new_hole; phole_prev = new_hole; } //step 6 if (frag_last < phole->last && more_frags) { new_hole = alloc_hole_desc(); if (!new_hole) { free_hole_desc(phole); // phole was removed from the list in step 4! MEMBUF_DEBUG_REF_DEC(frag); PRINT_STATISTICS(); unlock(); return -1; } new_hole->first = frag_last + 1; new_hole->last = phole->last; new_hole->data_first = frag; new_hole->data_last = phole->data_last; new_hole->next = phole->next; if (phole_prev) phole_prev->next = new_hole; else desc->hole_list = new_hole; } // link frag if (phole->data_first) phole->data_first->p_next_desc = frag; else desc->frag_list = frag; frag->p_next_desc = phole->data_last; free_hole_desc(phole); if (!desc->hole_list) { //step 8 - datagram assembly completed if (i == m_frags.end()) i = m_frags.find(key); if (i == m_frags.end()){ MEMBUF_DEBUG_REF_DEC(frag); frag_panic("frag desc lost from map???"); //coverity unreachable /*unlock(); return -1;*/ } MEMBUF_DEBUG_REF_DEC(desc->frag_list); m_frags.erase(i); *ret = desc->frag_list; free_frag_desc(desc); frag_dbg("> PACKET ASSEMBLED"); PRINT_STATISTICS(); unlock(); return 0; } frag_dbg("> need more packets"); *ret = NULL; PRINT_STATISTICS(); unlock(); return 0; } void ip_frag_manager::return_buffers_to_owners(const owner_desc_map_t &buff_map) { // Assume locked !!! owner_desc_map_t::const_iterator iter; for (iter = buff_map.begin(); iter != buff_map.end(); ++iter) { if(g_buffer_pool_rx) g_buffer_pool_rx->put_buffers_thread_safe(iter->second); } } void ip_frag_manager::handle_timer_expired(void* user_data) { NOT_IN_USE(user_data); ip_frags_list_t::iterator iter, iter_temp; ip_frag_desc_t *desc; uint64_t delta =0; lock(); if (m_frag_counter > IP_FRAG_SPACE) { delta = m_frag_counter - IP_FRAG_SPACE; m_frag_counter -= delta; } frag_dbg("calling handle_timer_expired, m_frag_counter=%ld, delta=%ld", m_frag_counter, delta); PRINT_STATISTICS(); iter = m_frags.begin(); while (iter != m_frags.end()) { desc = iter->second; desc->frag_counter -= delta; if (desc->frag_counter<0 || (desc->ttl <= 0)) { //discard this packet frag_dbg("expiring packet fragments desc=%p (frag_counter=%d, ttl=%d)", desc, desc->frag_counter, desc->ttl); destroy_frag_desc(desc); free_frag_desc(desc); iter_temp = iter++; m_frags.erase(iter_temp); } else { iter++; } --desc->ttl; } owner_desc_map_t temp_buff_map = m_return_descs; m_return_descs.clear(); PRINT_STATISTICS(); unlock(); // Must call cq_mgr outside the lock to avoid ABBA deadlock return_buffers_to_owners(temp_buff_map); } #if _BullseyeCoverage #pragma BullseyeCoverage on #endif