/*
* Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "utils/bullseye.h"
#include "vma/dev/rfs_uc_tcp_gro.h"
#include "vma/dev/gro_mgr.h"
#include "vma/dev/ring_simple.h"
#include "vma/proto/route_rule_table_key.h"
#define MODULE_NAME "rfs_uc_tcp_gro"
#define IP_H_LEN_NO_OPTIONS 5
#define TCP_H_LEN_NO_OPTIONS 5
#define TCP_H_LEN_TIMESTAMP 8
rfs_uc_tcp_gro::rfs_uc_tcp_gro(flow_tuple *flow_spec_5t, ring_slave *p_ring, rfs_rule_filter* rule_filter, uint32_t flow_tag_id) :
rfs_uc(flow_spec_5t, p_ring, rule_filter, flow_tag_id),
m_b_active(false), m_b_reserved(false)
{
ring_simple* p_check_ring = dynamic_cast<ring_simple*>(p_ring);
if (!p_check_ring) {
rfs_logpanic("Incompatible ring type");
}
m_p_gro_mgr = &(p_check_ring->m_gro_mgr);
m_n_buf_max = m_p_gro_mgr->get_buf_max();
uint32_t mtu = p_check_ring->get_mtu();
m_n_byte_max = m_p_gro_mgr->get_byte_max() - mtu;
memset(&m_gro_desc, 0, sizeof(m_gro_desc));
}
bool rfs_uc_tcp_gro::rx_dispatch_packet(mem_buf_desc_t* p_rx_pkt_mem_buf_desc_info, void* pv_fd_ready_array /* = NULL */)
{
struct iphdr* p_ip_h = p_rx_pkt_mem_buf_desc_info->rx.tcp.p_ip_h;
struct tcphdr* p_tcp_h = p_rx_pkt_mem_buf_desc_info->rx.tcp.p_tcp_h;
if (!m_b_active) {
if (!m_b_reserved && m_p_gro_mgr->is_stream_max()) {
goto out;
}
}
if (!tcp_ip_check(p_rx_pkt_mem_buf_desc_info, p_ip_h, p_tcp_h)) {
if (m_b_active) {
flush_gro_desc(pv_fd_ready_array);
}
goto out;
}
if (!m_b_active) {
if (!m_b_reserved) {
m_b_reserved = m_p_gro_mgr->reserve_stream(this);
}
init_gro_desc(p_rx_pkt_mem_buf_desc_info, p_ip_h, p_tcp_h);
m_b_active = true;
} else {
if (ntohl(p_tcp_h->seq) != m_gro_desc.next_seq) {
flush_gro_desc(pv_fd_ready_array);
goto out;
}
if (!timestamp_check(p_tcp_h)) {
flush_gro_desc(pv_fd_ready_array);
goto out;
}
add_packet(p_rx_pkt_mem_buf_desc_info, p_ip_h, p_tcp_h);
}
if (m_gro_desc.buf_count >= m_n_buf_max || m_gro_desc.ip_tot_len >= m_n_byte_max) {
flush_gro_desc(pv_fd_ready_array);
}
return true;
out:
return rfs_uc::rx_dispatch_packet(p_rx_pkt_mem_buf_desc_info, pv_fd_ready_array);
}
void rfs_uc_tcp_gro::add_packet(mem_buf_desc_t* mem_buf_desc, struct iphdr* p_ip_h, tcphdr* p_tcp_h)
{
m_gro_desc.buf_count++;
m_gro_desc.ip_tot_len += mem_buf_desc->rx.sz_payload;
m_gro_desc.next_seq += mem_buf_desc->rx.sz_payload;
m_gro_desc.wnd = p_tcp_h->window;
m_gro_desc.ack = p_tcp_h->ack_seq;
uint32_t* topt;
if (m_gro_desc.ts_present) {
topt = (uint32_t *) (p_tcp_h + 1);
m_gro_desc.tsecr = *(topt + 2);
}
mem_buf_desc->reset_ref_count();
mem_buf_desc->lwip_pbuf.pbuf.flags = PBUF_FLAG_IS_CUSTOM;
mem_buf_desc->lwip_pbuf.pbuf.len = mem_buf_desc->lwip_pbuf.pbuf.tot_len = mem_buf_desc->rx.sz_payload;
mem_buf_desc->lwip_pbuf.pbuf.ref = 1;
mem_buf_desc->lwip_pbuf.pbuf.type = PBUF_REF;
mem_buf_desc->lwip_pbuf.pbuf.next = NULL;
mem_buf_desc->lwip_pbuf.pbuf.payload = (u8_t *)mem_buf_desc->p_buffer + mem_buf_desc->rx.tcp.n_transport_header_len + ntohs(p_ip_h->tot_len) - mem_buf_desc->rx.sz_payload;
m_gro_desc.p_last->lwip_pbuf.pbuf.next = &(mem_buf_desc->lwip_pbuf.pbuf);
m_gro_desc.p_last->p_next_desc = NULL;
mem_buf_desc->p_prev_desc = m_gro_desc.p_last;
m_gro_desc.p_last = mem_buf_desc;
}
void rfs_uc_tcp_gro::flush(void* pv_fd_ready_array)
{
flush_gro_desc(pv_fd_ready_array);
m_b_reserved = false;
}
struct __attribute__((packed)) tcphdr_ts
{
tcphdr p_tcp_h;
uint32_t popts[3];
};
void rfs_uc_tcp_gro::flush_gro_desc(void* pv_fd_ready_array)
{
ring_simple* p_ring = dynamic_cast<ring_simple*>(m_p_ring);
if (!p_ring) {
rfs_logpanic("Incompatible ring type");
}
if (!m_b_active) return;
if (m_gro_desc.buf_count > 1) {
m_gro_desc.p_ip_h->tot_len = htons(m_gro_desc.ip_tot_len);
m_gro_desc.p_tcp_h->ack_seq = m_gro_desc.ack;
m_gro_desc.p_tcp_h->window = m_gro_desc.wnd;
if (m_gro_desc.ts_present) {
tcphdr_ts* p_tcp_ts_h = (tcphdr_ts*) m_gro_desc.p_tcp_h;
p_tcp_ts_h->popts[2] = m_gro_desc.tsecr;
}
m_gro_desc.p_first->rx.tcp.gro = 1;
m_gro_desc.p_first->lwip_pbuf.pbuf.flags = PBUF_FLAG_IS_CUSTOM;
m_gro_desc.p_first->lwip_pbuf.pbuf.tot_len = m_gro_desc.p_first->lwip_pbuf.pbuf.len = (m_gro_desc.p_first->sz_data - m_gro_desc.p_first->rx.tcp.n_transport_header_len);
m_gro_desc.p_first->lwip_pbuf.pbuf.ref = 1;
m_gro_desc.p_first->lwip_pbuf.pbuf.type = PBUF_REF;
m_gro_desc.p_first->lwip_pbuf.pbuf.payload = (u8_t *)(m_gro_desc.p_first->p_buffer + m_gro_desc.p_first->rx.tcp.n_transport_header_len);
m_gro_desc.p_first->rx.is_vma_thr = m_gro_desc.p_last->rx.is_vma_thr;
for (mem_buf_desc_t* p_desc = m_gro_desc.p_last; p_desc != m_gro_desc.p_first; p_desc = p_desc->p_prev_desc) {
p_desc->p_prev_desc->lwip_pbuf.pbuf.tot_len += p_desc->lwip_pbuf.pbuf.tot_len;
}
}
__log_func("Rx LRO TCP segment info: src_port=%d, dst_port=%d, flags='%s%s%s%s%s%s' seq=%u, ack=%u, win=%u, payload_sz=%u, num_bufs=%u",
ntohs(m_gro_desc.p_tcp_h->source), ntohs(m_gro_desc.p_tcp_h->dest),
m_gro_desc.p_tcp_h->urg?"U":"", m_gro_desc.p_tcp_h->ack?"A":"", m_gro_desc.p_tcp_h->psh?"P":"",
m_gro_desc.p_tcp_h->rst?"R":"", m_gro_desc.p_tcp_h->syn?"S":"", m_gro_desc.p_tcp_h->fin?"F":"",
ntohl(m_gro_desc.p_tcp_h->seq), ntohl(m_gro_desc.p_tcp_h->ack_seq), ntohs(m_gro_desc.p_tcp_h->window),
m_gro_desc.ip_tot_len - 40, m_gro_desc.buf_count);
if (!rfs_uc::rx_dispatch_packet(m_gro_desc.p_first, pv_fd_ready_array)) {
p_ring->reclaim_recv_buffers_no_lock(m_gro_desc.p_first);
}
m_b_active = false;
}
void rfs_uc_tcp_gro::init_gro_desc(mem_buf_desc_t* mem_buf_desc, iphdr* p_ip_h, tcphdr* p_tcp_h)
{
m_gro_desc.p_first = m_gro_desc.p_last = mem_buf_desc;
m_gro_desc.buf_count = 1;
m_gro_desc.p_ip_h = p_ip_h;
m_gro_desc.p_tcp_h = p_tcp_h;
m_gro_desc.ip_tot_len = ntohs(p_ip_h->tot_len);
m_gro_desc.ack = p_tcp_h->ack_seq;
m_gro_desc.next_seq = ntohl(p_tcp_h->seq) + mem_buf_desc->rx.sz_payload;
m_gro_desc.wnd = p_tcp_h->window;
m_gro_desc.ts_present = 0;
if (p_tcp_h->doff == TCP_H_LEN_TIMESTAMP) {
uint32_t* topt = (uint32_t*)(p_tcp_h + 1);
m_gro_desc.ts_present = 1;
m_gro_desc.tsval = *(topt+1);
m_gro_desc.tsecr = *(topt+2);
}
}
bool rfs_uc_tcp_gro::tcp_ip_check(mem_buf_desc_t* mem_buf_desc, iphdr* p_ip_h, tcphdr* p_tcp_h)
{
if (mem_buf_desc->rx.sz_payload == 0) {
return false;
}
if (p_ip_h->ihl != IP_H_LEN_NO_OPTIONS) {
return false;
}
if (p_tcp_h->urg || !p_tcp_h->ack || p_tcp_h->rst || p_tcp_h->syn || p_tcp_h->fin) {
return false;
}
if (p_tcp_h->doff != TCP_H_LEN_NO_OPTIONS && p_tcp_h->doff != TCP_H_LEN_TIMESTAMP) {
return false;
}
return true;
}
bool rfs_uc_tcp_gro::timestamp_check(tcphdr* p_tcp_h)
{
if (p_tcp_h->doff == TCP_H_LEN_TIMESTAMP) {
uint32_t* topt = (uint32_t*)(p_tcp_h + 1);
if (*topt != htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP)) {
return false;
}
topt++;
if (ntohl(*topt) < ntohl(m_gro_desc.tsval)) {
}
topt++;
if (*topt == 0) {
return false;
}
}
return true;
}