/* * Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "utils/bullseye.h" #include "vma/dev/rfs_uc_tcp_gro.h" #include "vma/dev/gro_mgr.h" #include "vma/dev/ring_simple.h" #include "vma/proto/route_rule_table_key.h" #define MODULE_NAME "rfs_uc_tcp_gro" #define IP_H_LEN_NO_OPTIONS 5 #define TCP_H_LEN_NO_OPTIONS 5 #define TCP_H_LEN_TIMESTAMP 8 rfs_uc_tcp_gro::rfs_uc_tcp_gro(flow_tuple *flow_spec_5t, ring_slave *p_ring, rfs_rule_filter* rule_filter, uint32_t flow_tag_id) : rfs_uc(flow_spec_5t, p_ring, rule_filter, flow_tag_id), m_b_active(false), m_b_reserved(false) { ring_simple* p_check_ring = dynamic_cast(p_ring); if (!p_check_ring) { rfs_logpanic("Incompatible ring type"); } m_p_gro_mgr = &(p_check_ring->m_gro_mgr); m_n_buf_max = m_p_gro_mgr->get_buf_max(); uint32_t mtu = p_check_ring->get_mtu(); m_n_byte_max = m_p_gro_mgr->get_byte_max() - mtu; memset(&m_gro_desc, 0, sizeof(m_gro_desc)); } bool rfs_uc_tcp_gro::rx_dispatch_packet(mem_buf_desc_t* p_rx_pkt_mem_buf_desc_info, void* pv_fd_ready_array /* = NULL */) { struct iphdr* p_ip_h = p_rx_pkt_mem_buf_desc_info->rx.tcp.p_ip_h; struct tcphdr* p_tcp_h = p_rx_pkt_mem_buf_desc_info->rx.tcp.p_tcp_h; if (!m_b_active) { if (!m_b_reserved && m_p_gro_mgr->is_stream_max()) { goto out; } } if (!tcp_ip_check(p_rx_pkt_mem_buf_desc_info, p_ip_h, p_tcp_h)) { if (m_b_active) { flush_gro_desc(pv_fd_ready_array); } goto out; } if (!m_b_active) { if (!m_b_reserved) { m_b_reserved = m_p_gro_mgr->reserve_stream(this); } init_gro_desc(p_rx_pkt_mem_buf_desc_info, p_ip_h, p_tcp_h); m_b_active = true; } else { if (ntohl(p_tcp_h->seq) != m_gro_desc.next_seq) { flush_gro_desc(pv_fd_ready_array); goto out; } if (!timestamp_check(p_tcp_h)) { flush_gro_desc(pv_fd_ready_array); goto out; } add_packet(p_rx_pkt_mem_buf_desc_info, p_ip_h, p_tcp_h); } if (m_gro_desc.buf_count >= m_n_buf_max || m_gro_desc.ip_tot_len >= m_n_byte_max) { flush_gro_desc(pv_fd_ready_array); } return true; out: return rfs_uc::rx_dispatch_packet(p_rx_pkt_mem_buf_desc_info, pv_fd_ready_array); } void rfs_uc_tcp_gro::add_packet(mem_buf_desc_t* mem_buf_desc, struct iphdr* p_ip_h, tcphdr* p_tcp_h) { m_gro_desc.buf_count++; m_gro_desc.ip_tot_len += mem_buf_desc->rx.sz_payload; m_gro_desc.next_seq += mem_buf_desc->rx.sz_payload; m_gro_desc.wnd = p_tcp_h->window; m_gro_desc.ack = p_tcp_h->ack_seq; uint32_t* topt; if (m_gro_desc.ts_present) { topt = (uint32_t *) (p_tcp_h + 1); m_gro_desc.tsecr = *(topt + 2); } mem_buf_desc->reset_ref_count(); mem_buf_desc->lwip_pbuf.pbuf.flags = PBUF_FLAG_IS_CUSTOM; mem_buf_desc->lwip_pbuf.pbuf.len = mem_buf_desc->lwip_pbuf.pbuf.tot_len = mem_buf_desc->rx.sz_payload; mem_buf_desc->lwip_pbuf.pbuf.ref = 1; mem_buf_desc->lwip_pbuf.pbuf.type = PBUF_REF; mem_buf_desc->lwip_pbuf.pbuf.next = NULL; mem_buf_desc->lwip_pbuf.pbuf.payload = (u8_t *)mem_buf_desc->p_buffer + mem_buf_desc->rx.tcp.n_transport_header_len + ntohs(p_ip_h->tot_len) - mem_buf_desc->rx.sz_payload; m_gro_desc.p_last->lwip_pbuf.pbuf.next = &(mem_buf_desc->lwip_pbuf.pbuf); m_gro_desc.p_last->p_next_desc = NULL; mem_buf_desc->p_prev_desc = m_gro_desc.p_last; m_gro_desc.p_last = mem_buf_desc; } void rfs_uc_tcp_gro::flush(void* pv_fd_ready_array) { flush_gro_desc(pv_fd_ready_array); m_b_reserved = false; } struct __attribute__((packed)) tcphdr_ts { tcphdr p_tcp_h; uint32_t popts[3]; }; void rfs_uc_tcp_gro::flush_gro_desc(void* pv_fd_ready_array) { ring_simple* p_ring = dynamic_cast(m_p_ring); if (!p_ring) { rfs_logpanic("Incompatible ring type"); } if (!m_b_active) return; if (m_gro_desc.buf_count > 1) { m_gro_desc.p_ip_h->tot_len = htons(m_gro_desc.ip_tot_len); m_gro_desc.p_tcp_h->ack_seq = m_gro_desc.ack; m_gro_desc.p_tcp_h->window = m_gro_desc.wnd; if (m_gro_desc.ts_present) { tcphdr_ts* p_tcp_ts_h = (tcphdr_ts*) m_gro_desc.p_tcp_h; p_tcp_ts_h->popts[2] = m_gro_desc.tsecr; } m_gro_desc.p_first->rx.tcp.gro = 1; m_gro_desc.p_first->lwip_pbuf.pbuf.flags = PBUF_FLAG_IS_CUSTOM; m_gro_desc.p_first->lwip_pbuf.pbuf.tot_len = m_gro_desc.p_first->lwip_pbuf.pbuf.len = (m_gro_desc.p_first->sz_data - m_gro_desc.p_first->rx.tcp.n_transport_header_len); m_gro_desc.p_first->lwip_pbuf.pbuf.ref = 1; m_gro_desc.p_first->lwip_pbuf.pbuf.type = PBUF_REF; m_gro_desc.p_first->lwip_pbuf.pbuf.payload = (u8_t *)(m_gro_desc.p_first->p_buffer + m_gro_desc.p_first->rx.tcp.n_transport_header_len); m_gro_desc.p_first->rx.is_vma_thr = m_gro_desc.p_last->rx.is_vma_thr; for (mem_buf_desc_t* p_desc = m_gro_desc.p_last; p_desc != m_gro_desc.p_first; p_desc = p_desc->p_prev_desc) { p_desc->p_prev_desc->lwip_pbuf.pbuf.tot_len += p_desc->lwip_pbuf.pbuf.tot_len; } } __log_func("Rx LRO TCP segment info: src_port=%d, dst_port=%d, flags='%s%s%s%s%s%s' seq=%u, ack=%u, win=%u, payload_sz=%u, num_bufs=%u", ntohs(m_gro_desc.p_tcp_h->source), ntohs(m_gro_desc.p_tcp_h->dest), m_gro_desc.p_tcp_h->urg?"U":"", m_gro_desc.p_tcp_h->ack?"A":"", m_gro_desc.p_tcp_h->psh?"P":"", m_gro_desc.p_tcp_h->rst?"R":"", m_gro_desc.p_tcp_h->syn?"S":"", m_gro_desc.p_tcp_h->fin?"F":"", ntohl(m_gro_desc.p_tcp_h->seq), ntohl(m_gro_desc.p_tcp_h->ack_seq), ntohs(m_gro_desc.p_tcp_h->window), m_gro_desc.ip_tot_len - 40, m_gro_desc.buf_count); if (!rfs_uc::rx_dispatch_packet(m_gro_desc.p_first, pv_fd_ready_array)) { p_ring->reclaim_recv_buffers_no_lock(m_gro_desc.p_first); } m_b_active = false; } void rfs_uc_tcp_gro::init_gro_desc(mem_buf_desc_t* mem_buf_desc, iphdr* p_ip_h, tcphdr* p_tcp_h) { m_gro_desc.p_first = m_gro_desc.p_last = mem_buf_desc; m_gro_desc.buf_count = 1; m_gro_desc.p_ip_h = p_ip_h; m_gro_desc.p_tcp_h = p_tcp_h; m_gro_desc.ip_tot_len = ntohs(p_ip_h->tot_len); m_gro_desc.ack = p_tcp_h->ack_seq; m_gro_desc.next_seq = ntohl(p_tcp_h->seq) + mem_buf_desc->rx.sz_payload; m_gro_desc.wnd = p_tcp_h->window; m_gro_desc.ts_present = 0; if (p_tcp_h->doff == TCP_H_LEN_TIMESTAMP) { uint32_t* topt = (uint32_t*)(p_tcp_h + 1); m_gro_desc.ts_present = 1; m_gro_desc.tsval = *(topt+1); m_gro_desc.tsecr = *(topt+2); } } bool rfs_uc_tcp_gro::tcp_ip_check(mem_buf_desc_t* mem_buf_desc, iphdr* p_ip_h, tcphdr* p_tcp_h) { if (mem_buf_desc->rx.sz_payload == 0) { return false; } if (p_ip_h->ihl != IP_H_LEN_NO_OPTIONS) { return false; } if (p_tcp_h->urg || !p_tcp_h->ack || p_tcp_h->rst || p_tcp_h->syn || p_tcp_h->fin) { return false; } if (p_tcp_h->doff != TCP_H_LEN_NO_OPTIONS && p_tcp_h->doff != TCP_H_LEN_TIMESTAMP) { return false; } return true; } bool rfs_uc_tcp_gro::timestamp_check(tcphdr* p_tcp_h) { if (p_tcp_h->doff == TCP_H_LEN_TIMESTAMP) { uint32_t* topt = (uint32_t*)(p_tcp_h + 1); if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { return false; } topt++; if (ntohl(*topt) < ntohl(m_gro_desc.tsval)) { } topt++; if (*topt == 0) { return false; } } return true; }