Blame src/vma/dev/rfs_uc_tcp_gro.cpp

Packit 6d2c1b
/*
Packit 6d2c1b
 * Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved.
Packit 6d2c1b
 *
Packit 6d2c1b
 * This software is available to you under a choice of one of two
Packit 6d2c1b
 * licenses.  You may choose to be licensed under the terms of the GNU
Packit 6d2c1b
 * General Public License (GPL) Version 2, available from the file
Packit 6d2c1b
 * COPYING in the main directory of this source tree, or the
Packit 6d2c1b
 * BSD license below:
Packit 6d2c1b
 *
Packit 6d2c1b
 *     Redistribution and use in source and binary forms, with or
Packit 6d2c1b
 *     without modification, are permitted provided that the following
Packit 6d2c1b
 *     conditions are met:
Packit 6d2c1b
 *
Packit 6d2c1b
 *      - Redistributions of source code must retain the above
Packit 6d2c1b
 *        copyright notice, this list of conditions and the following
Packit 6d2c1b
 *        disclaimer.
Packit 6d2c1b
 *
Packit 6d2c1b
 *      - Redistributions in binary form must reproduce the above
Packit 6d2c1b
 *        copyright notice, this list of conditions and the following
Packit 6d2c1b
 *        disclaimer in the documentation and/or other materials
Packit 6d2c1b
 *        provided with the distribution.
Packit 6d2c1b
 *
Packit 6d2c1b
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Packit 6d2c1b
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Packit 6d2c1b
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Packit 6d2c1b
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
Packit 6d2c1b
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
Packit 6d2c1b
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
Packit 6d2c1b
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
Packit 6d2c1b
 * SOFTWARE.
Packit 6d2c1b
 */
Packit 6d2c1b
Packit 6d2c1b
#include "utils/bullseye.h"
Packit 6d2c1b
#include "vma/dev/rfs_uc_tcp_gro.h"
Packit 6d2c1b
#include "vma/dev/gro_mgr.h"
Packit 6d2c1b
#include "vma/dev/ring_simple.h"
Packit 6d2c1b
#include "vma/proto/route_rule_table_key.h"
Packit 6d2c1b
Packit 6d2c1b
#define MODULE_NAME 		"rfs_uc_tcp_gro"
Packit 6d2c1b
Packit 6d2c1b
#define IP_H_LEN_NO_OPTIONS 5
Packit 6d2c1b
#define TCP_H_LEN_NO_OPTIONS 5
Packit 6d2c1b
#define TCP_H_LEN_TIMESTAMP 8
Packit 6d2c1b
Packit 6d2c1b
Packit 6d2c1b
rfs_uc_tcp_gro::rfs_uc_tcp_gro(flow_tuple *flow_spec_5t, ring_slave *p_ring, rfs_rule_filter* rule_filter, uint32_t flow_tag_id) :
Packit 6d2c1b
	rfs_uc(flow_spec_5t, p_ring, rule_filter, flow_tag_id),
Packit 6d2c1b
	m_b_active(false), m_b_reserved(false)
Packit 6d2c1b
{
Packit 6d2c1b
	ring_simple* p_check_ring = dynamic_cast<ring_simple*>(p_ring);
Packit 6d2c1b
Packit 6d2c1b
	if (!p_check_ring) {
Packit 6d2c1b
		rfs_logpanic("Incompatible ring type");
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	m_p_gro_mgr = &(p_check_ring->m_gro_mgr);
Packit 6d2c1b
	m_n_buf_max = m_p_gro_mgr->get_buf_max();
Packit 6d2c1b
	uint32_t mtu = p_check_ring->get_mtu();
Packit 6d2c1b
	m_n_byte_max = m_p_gro_mgr->get_byte_max() - mtu;
Packit 6d2c1b
	memset(&m_gro_desc, 0, sizeof(m_gro_desc));
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
bool rfs_uc_tcp_gro::rx_dispatch_packet(mem_buf_desc_t* p_rx_pkt_mem_buf_desc_info, void* pv_fd_ready_array /* = NULL */)
Packit 6d2c1b
{
Packit 6d2c1b
	struct iphdr* p_ip_h = p_rx_pkt_mem_buf_desc_info->rx.tcp.p_ip_h;
Packit 6d2c1b
	struct tcphdr* p_tcp_h = p_rx_pkt_mem_buf_desc_info->rx.tcp.p_tcp_h;
Packit 6d2c1b
Packit 6d2c1b
	if (!m_b_active) {
Packit 6d2c1b
		if (!m_b_reserved && m_p_gro_mgr->is_stream_max()) {
Packit 6d2c1b
			goto out;
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (!tcp_ip_check(p_rx_pkt_mem_buf_desc_info, p_ip_h, p_tcp_h)) {
Packit 6d2c1b
		if (m_b_active) {
Packit 6d2c1b
			flush_gro_desc(pv_fd_ready_array);
Packit 6d2c1b
		}
Packit 6d2c1b
		goto out;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (!m_b_active) {
Packit 6d2c1b
		if (!m_b_reserved) {
Packit 6d2c1b
			m_b_reserved = m_p_gro_mgr->reserve_stream(this);
Packit 6d2c1b
		}
Packit 6d2c1b
		init_gro_desc(p_rx_pkt_mem_buf_desc_info, p_ip_h, p_tcp_h);
Packit 6d2c1b
		m_b_active = true;
Packit 6d2c1b
	} else {
Packit 6d2c1b
		if (ntohl(p_tcp_h->seq) != m_gro_desc.next_seq) {
Packit 6d2c1b
			flush_gro_desc(pv_fd_ready_array);
Packit 6d2c1b
			goto out;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		if (!timestamp_check(p_tcp_h)) {
Packit 6d2c1b
			flush_gro_desc(pv_fd_ready_array);
Packit 6d2c1b
			goto out;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		add_packet(p_rx_pkt_mem_buf_desc_info, p_ip_h, p_tcp_h);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (m_gro_desc.buf_count >= m_n_buf_max || m_gro_desc.ip_tot_len >= m_n_byte_max) {
Packit 6d2c1b
		flush_gro_desc(pv_fd_ready_array);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	return true;
Packit 6d2c1b
Packit 6d2c1b
out:
Packit 6d2c1b
	return rfs_uc::rx_dispatch_packet(p_rx_pkt_mem_buf_desc_info, pv_fd_ready_array);
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void rfs_uc_tcp_gro::add_packet(mem_buf_desc_t* mem_buf_desc, struct iphdr* p_ip_h, tcphdr* p_tcp_h)
Packit 6d2c1b
{
Packit 6d2c1b
	m_gro_desc.buf_count++;
Packit 6d2c1b
	m_gro_desc.ip_tot_len += mem_buf_desc->rx.sz_payload;
Packit 6d2c1b
	m_gro_desc.next_seq += mem_buf_desc->rx.sz_payload;
Packit 6d2c1b
	m_gro_desc.wnd = p_tcp_h->window;
Packit 6d2c1b
	m_gro_desc.ack = p_tcp_h->ack_seq;
Packit 6d2c1b
Packit 6d2c1b
	uint32_t* topt;
Packit 6d2c1b
	if (m_gro_desc.ts_present) {
Packit 6d2c1b
		topt = (uint32_t *) (p_tcp_h + 1);
Packit 6d2c1b
		m_gro_desc.tsecr = *(topt + 2);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	mem_buf_desc->reset_ref_count();
Packit 6d2c1b
Packit 6d2c1b
	mem_buf_desc->lwip_pbuf.pbuf.flags = PBUF_FLAG_IS_CUSTOM;
Packit 6d2c1b
	mem_buf_desc->lwip_pbuf.pbuf.len = mem_buf_desc->lwip_pbuf.pbuf.tot_len = mem_buf_desc->rx.sz_payload;
Packit 6d2c1b
	mem_buf_desc->lwip_pbuf.pbuf.ref = 1;
Packit 6d2c1b
	mem_buf_desc->lwip_pbuf.pbuf.type = PBUF_REF;
Packit 6d2c1b
	mem_buf_desc->lwip_pbuf.pbuf.next = NULL;
Packit 6d2c1b
	mem_buf_desc->lwip_pbuf.pbuf.payload = (u8_t *)mem_buf_desc->p_buffer + mem_buf_desc->rx.tcp.n_transport_header_len + ntohs(p_ip_h->tot_len) - mem_buf_desc->rx.sz_payload;
Packit 6d2c1b
Packit 6d2c1b
Packit 6d2c1b
	m_gro_desc.p_last->lwip_pbuf.pbuf.next = &(mem_buf_desc->lwip_pbuf.pbuf);
Packit 6d2c1b
	m_gro_desc.p_last->p_next_desc = NULL;
Packit 6d2c1b
	mem_buf_desc->p_prev_desc = m_gro_desc.p_last;
Packit 6d2c1b
	m_gro_desc.p_last = mem_buf_desc;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void rfs_uc_tcp_gro::flush(void* pv_fd_ready_array)
Packit 6d2c1b
{
Packit 6d2c1b
	flush_gro_desc(pv_fd_ready_array);
Packit 6d2c1b
	m_b_reserved = false;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
struct __attribute__((packed)) tcphdr_ts
Packit 6d2c1b
{
Packit 6d2c1b
	tcphdr p_tcp_h;
Packit 6d2c1b
	uint32_t popts[3];
Packit 6d2c1b
};
Packit 6d2c1b
Packit 6d2c1b
void rfs_uc_tcp_gro::flush_gro_desc(void* pv_fd_ready_array)
Packit 6d2c1b
{
Packit 6d2c1b
	ring_simple* p_ring = dynamic_cast<ring_simple*>(m_p_ring);
Packit 6d2c1b
Packit 6d2c1b
	if (!p_ring) {
Packit 6d2c1b
		rfs_logpanic("Incompatible ring type");
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (!m_b_active) return;
Packit 6d2c1b
Packit 6d2c1b
	if (m_gro_desc.buf_count > 1) {
Packit 6d2c1b
		m_gro_desc.p_ip_h->tot_len = htons(m_gro_desc.ip_tot_len);
Packit 6d2c1b
		m_gro_desc.p_tcp_h->ack_seq = m_gro_desc.ack;
Packit 6d2c1b
		m_gro_desc.p_tcp_h->window = m_gro_desc.wnd;
Packit 6d2c1b
Packit 6d2c1b
		if (m_gro_desc.ts_present) {
Packit 6d2c1b
			tcphdr_ts* p_tcp_ts_h = (tcphdr_ts*) m_gro_desc.p_tcp_h;
Packit 6d2c1b
			p_tcp_ts_h->popts[2] = m_gro_desc.tsecr;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		m_gro_desc.p_first->rx.tcp.gro = 1;
Packit 6d2c1b
Packit 6d2c1b
		m_gro_desc.p_first->lwip_pbuf.pbuf.flags = PBUF_FLAG_IS_CUSTOM;
Packit 6d2c1b
		m_gro_desc.p_first->lwip_pbuf.pbuf.tot_len = m_gro_desc.p_first->lwip_pbuf.pbuf.len = (m_gro_desc.p_first->sz_data - m_gro_desc.p_first->rx.tcp.n_transport_header_len);
Packit 6d2c1b
		m_gro_desc.p_first->lwip_pbuf.pbuf.ref = 1;
Packit 6d2c1b
		m_gro_desc.p_first->lwip_pbuf.pbuf.type = PBUF_REF;
Packit 6d2c1b
		m_gro_desc.p_first->lwip_pbuf.pbuf.payload = (u8_t *)(m_gro_desc.p_first->p_buffer + m_gro_desc.p_first->rx.tcp.n_transport_header_len);
Packit 6d2c1b
		m_gro_desc.p_first->rx.is_vma_thr = m_gro_desc.p_last->rx.is_vma_thr;
Packit 6d2c1b
Packit 6d2c1b
		for (mem_buf_desc_t* p_desc = m_gro_desc.p_last; p_desc != m_gro_desc.p_first; p_desc = p_desc->p_prev_desc) {
Packit 6d2c1b
			p_desc->p_prev_desc->lwip_pbuf.pbuf.tot_len += p_desc->lwip_pbuf.pbuf.tot_len;
Packit 6d2c1b
		}
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	__log_func("Rx LRO TCP segment info: src_port=%d, dst_port=%d, flags='%s%s%s%s%s%s' seq=%u, ack=%u, win=%u, payload_sz=%u, num_bufs=%u",
Packit 6d2c1b
					ntohs(m_gro_desc.p_tcp_h->source), ntohs(m_gro_desc.p_tcp_h->dest),
Packit 6d2c1b
					m_gro_desc.p_tcp_h->urg?"U":"", m_gro_desc.p_tcp_h->ack?"A":"", m_gro_desc.p_tcp_h->psh?"P":"",
Packit 6d2c1b
					m_gro_desc.p_tcp_h->rst?"R":"", m_gro_desc.p_tcp_h->syn?"S":"", m_gro_desc.p_tcp_h->fin?"F":"",
Packit 6d2c1b
					ntohl(m_gro_desc.p_tcp_h->seq), ntohl(m_gro_desc.p_tcp_h->ack_seq), ntohs(m_gro_desc.p_tcp_h->window),
Packit 6d2c1b
					m_gro_desc.ip_tot_len - 40, m_gro_desc.buf_count);
Packit 6d2c1b
Packit 6d2c1b
	if (!rfs_uc::rx_dispatch_packet(m_gro_desc.p_first, pv_fd_ready_array)) {
Packit 6d2c1b
		p_ring->reclaim_recv_buffers_no_lock(m_gro_desc.p_first);
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	m_b_active = false;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
void rfs_uc_tcp_gro::init_gro_desc(mem_buf_desc_t* mem_buf_desc, iphdr* p_ip_h, tcphdr* p_tcp_h)
Packit 6d2c1b
{
Packit 6d2c1b
	m_gro_desc.p_first = m_gro_desc.p_last = mem_buf_desc;
Packit 6d2c1b
	m_gro_desc.buf_count = 1;
Packit 6d2c1b
	m_gro_desc.p_ip_h = p_ip_h;
Packit 6d2c1b
	m_gro_desc.p_tcp_h = p_tcp_h;
Packit 6d2c1b
	m_gro_desc.ip_tot_len = ntohs(p_ip_h->tot_len);
Packit 6d2c1b
	m_gro_desc.ack = p_tcp_h->ack_seq;
Packit 6d2c1b
	m_gro_desc.next_seq = ntohl(p_tcp_h->seq) + mem_buf_desc->rx.sz_payload;
Packit 6d2c1b
	m_gro_desc.wnd = p_tcp_h->window;
Packit 6d2c1b
	m_gro_desc.ts_present = 0;
Packit 6d2c1b
	if (p_tcp_h->doff == TCP_H_LEN_TIMESTAMP) {
Packit 6d2c1b
		uint32_t* topt = (uint32_t*)(p_tcp_h + 1);
Packit 6d2c1b
		m_gro_desc.ts_present = 1;
Packit 6d2c1b
		m_gro_desc.tsval = *(topt+1);
Packit 6d2c1b
		m_gro_desc.tsecr = *(topt+2);
Packit 6d2c1b
	}
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
bool rfs_uc_tcp_gro::tcp_ip_check(mem_buf_desc_t* mem_buf_desc, iphdr* p_ip_h, tcphdr* p_tcp_h)
Packit 6d2c1b
{
Packit 6d2c1b
Packit 6d2c1b
	if (mem_buf_desc->rx.sz_payload == 0) {
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (p_ip_h->ihl != IP_H_LEN_NO_OPTIONS) {
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (p_tcp_h->urg || !p_tcp_h->ack || p_tcp_h->rst || p_tcp_h->syn || p_tcp_h->fin) {
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	if (p_tcp_h->doff != TCP_H_LEN_NO_OPTIONS && p_tcp_h->doff != TCP_H_LEN_TIMESTAMP) {
Packit 6d2c1b
		return false;
Packit 6d2c1b
	}
Packit 6d2c1b
Packit 6d2c1b
	return true;
Packit 6d2c1b
}
Packit 6d2c1b
Packit 6d2c1b
bool rfs_uc_tcp_gro::timestamp_check(tcphdr* p_tcp_h)
Packit 6d2c1b
{
Packit 6d2c1b
	if (p_tcp_h->doff == TCP_H_LEN_TIMESTAMP) {
Packit 6d2c1b
		uint32_t* topt = (uint32_t*)(p_tcp_h + 1);
Packit 6d2c1b
		if (*topt != htonl((TCPOPT_NOP << 24) |
Packit 6d2c1b
				(TCPOPT_NOP << 16) |
Packit 6d2c1b
				(TCPOPT_TIMESTAMP << 8) |
Packit 6d2c1b
				TCPOLEN_TIMESTAMP)) {
Packit 6d2c1b
			return false;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		topt++;
Packit 6d2c1b
Packit 6d2c1b
		if (ntohl(*topt) < ntohl(m_gro_desc.tsval)) {
Packit 6d2c1b
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
		topt++;
Packit 6d2c1b
Packit 6d2c1b
		if (*topt == 0) {
Packit 6d2c1b
			return false;
Packit 6d2c1b
		}
Packit 6d2c1b
Packit 6d2c1b
	}
Packit 6d2c1b
	return true;
Packit 6d2c1b
}