Blame src/vma/iomux/select_call.cpp

Packit Service aa3af4
/*
Packit Service aa3af4
 * Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved.
Packit Service aa3af4
 *
Packit Service aa3af4
 * This software is available to you under a choice of one of two
Packit Service aa3af4
 * licenses.  You may choose to be licensed under the terms of the GNU
Packit Service aa3af4
 * General Public License (GPL) Version 2, available from the file
Packit Service aa3af4
 * COPYING in the main directory of this source tree, or the
Packit Service aa3af4
 * BSD license below:
Packit Service aa3af4
 *
Packit Service aa3af4
 *     Redistribution and use in source and binary forms, with or
Packit Service aa3af4
 *     without modification, are permitted provided that the following
Packit Service aa3af4
 *     conditions are met:
Packit Service aa3af4
 *
Packit Service aa3af4
 *      - Redistributions of source code must retain the above
Packit Service aa3af4
 *        copyright notice, this list of conditions and the following
Packit Service aa3af4
 *        disclaimer.
Packit Service aa3af4
 *
Packit Service aa3af4
 *      - Redistributions in binary form must reproduce the above
Packit Service aa3af4
 *        copyright notice, this list of conditions and the following
Packit Service aa3af4
 *        disclaimer in the documentation and/or other materials
Packit Service aa3af4
 *        provided with the distribution.
Packit Service aa3af4
 *
Packit Service aa3af4
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Packit Service aa3af4
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Packit Service aa3af4
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Packit Service aa3af4
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
Packit Service aa3af4
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
Packit Service aa3af4
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
Packit Service aa3af4
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
Packit Service aa3af4
 * SOFTWARE.
Packit Service aa3af4
 */
Packit Service aa3af4
Packit Service aa3af4
Packit Service aa3af4
#include "select_call.h"
Packit Service aa3af4
Packit Service aa3af4
#include "utils/bullseye.h"
Packit Service aa3af4
#include "vlogger/vlogger.h"
Packit Service aa3af4
#include <vma/util/vtypes.h>
Packit Service aa3af4
#include <vma/sock/sock-redirect.h>
Packit Service aa3af4
#include <vma/sock/fd_collection.h>
Packit Service aa3af4
#include <vma/dev/net_device_table_mgr.h>
Packit Service aa3af4
Packit Service aa3af4
#define MODULE_NAME "select_call:"
Packit Service aa3af4
Packit Service aa3af4
Packit Service aa3af4
#define FD_COPY(__fddst, __fdsrc, __nfds) \
Packit Service aa3af4
		memcpy(__FDS_BITS(__fddst), __FDS_BITS(__fdsrc), ((__nfds) + 7) >> 3)
Packit Service aa3af4
Packit Service aa3af4
#undef  FD_ZERO // Remove select.h origianl FD_ZERO and define our own with limit size
Packit Service aa3af4
#define FD_ZERO(__fddst, __nfds) \
Packit Service aa3af4
		memset(__FDS_BITS(__fddst), 0, ((__nfds) + 7) >> 3)
Packit Service aa3af4
iomux_func_stats_t g_select_stats;
Packit Service aa3af4
Packit Service aa3af4
select_call::select_call(int *off_fds_buffer, offloaded_mode_t *off_modes_buffer,
Packit Service aa3af4
                         int nfds, fd_set *readfds, fd_set *writefds,
Packit Service aa3af4
                         fd_set *exceptfds, timeval *timeout, const sigset_t *__sigmask /* = NULL */) :
Packit Service aa3af4
	io_mux_call(off_fds_buffer, off_modes_buffer, nfds, __sigmask),
Packit Service aa3af4
	m_nfds(nfds), m_readfds(readfds), m_writefds(writefds),
Packit Service aa3af4
	m_exceptfds(exceptfds), m_timeout(timeout), m_nfds_with_cq(0), m_b_run_prepare_to_poll(false)
Packit Service aa3af4
{
Packit Service aa3af4
	int fd;
Packit Service aa3af4
	//socket_fd_api* temp_sock_fd_api = NULL; 
Packit Service aa3af4
Packit Service aa3af4
	if (m_nfds > FD_SETSIZE) {
Packit Service aa3af4
		errno = ENOMEM;
Packit Service aa3af4
		vma_throw_object(io_mux_call::io_error);
Packit Service aa3af4
	}
Packit Service aa3af4
Packit Service aa3af4
	// create stats
Packit Service aa3af4
	m_p_stats = &g_select_stats;
Packit Service aa3af4
        vma_stats_instance_get_select_block(m_p_stats);
Packit Service aa3af4
Packit Service aa3af4
	bool offloaded_read  = !!m_readfds;
Packit Service aa3af4
	bool offloaded_write = !!m_writefds;
Packit Service aa3af4
Packit Service aa3af4
	if (offloaded_read || offloaded_write) {
Packit Service aa3af4
		FD_ZERO(&m_os_rfds, m_nfds);
Packit Service aa3af4
		FD_ZERO(&m_os_wfds, m_nfds);
Packit Service aa3af4
Packit Service aa3af4
		//covers the case of select(readfds = NULL)
Packit Service aa3af4
		if(!m_readfds) {
Packit Service aa3af4
			FD_ZERO(&m_cq_rfds, m_nfds);
Packit Service aa3af4
			m_readfds = &m_cq_rfds;
Packit Service aa3af4
		}
Packit Service aa3af4
Packit Service aa3af4
		// get offloaded fds in read set
Packit Service aa3af4
		for (fd = 0; fd < m_nfds; ++fd) {
Packit Service aa3af4
Packit Service aa3af4
			bool check_read = offloaded_read && FD_ISSET(fd, m_readfds);
Packit Service aa3af4
			bool check_write = offloaded_write && FD_ISSET(fd, m_writefds);
Packit Service aa3af4
Packit Service aa3af4
			socket_fd_api* psock = fd_collection_get_sockfd(fd);
Packit Service aa3af4
Packit Service aa3af4
			if (psock && psock->get_type() == FD_TYPE_SOCKET) {
Packit Service aa3af4
Packit Service aa3af4
				offloaded_mode_t off_mode  = OFF_NONE;
Packit Service aa3af4
				if (check_read)  off_mode = (offloaded_mode_t)(off_mode | OFF_READ);
Packit Service aa3af4
				if (check_write) off_mode = (offloaded_mode_t)(off_mode | OFF_WRITE);
Packit Service aa3af4
Packit Service aa3af4
				if (off_mode) {
Packit Service aa3af4
					__log_func("---> fd=%d IS SET for read or write!", fd);
Packit Service aa3af4
Packit Service aa3af4
					m_p_all_offloaded_fds[m_num_all_offloaded_fds] = fd;
Packit Service aa3af4
					m_p_offloaded_modes[m_num_all_offloaded_fds] = off_mode;
Packit Service aa3af4
					m_num_all_offloaded_fds++;
Packit Service aa3af4
					if (! psock->skip_os_select()) {
Packit Service aa3af4
						if (check_read) {
Packit Service aa3af4
							FD_SET(fd, &m_os_rfds);
Packit Service aa3af4
							if (psock->is_readable(NULL)) {
Packit Service aa3af4
								io_mux_call::update_fd_array(&m_fd_ready_array, fd);
Packit Service aa3af4
								m_n_ready_rfds++;
Packit Service aa3af4
								m_n_all_ready_fds++;
Packit Service aa3af4
							} else {
Packit Service aa3af4
								// Instructing the socket to sample the OS immediately to prevent hitting EAGAIN on recvfrom(),
Packit Service aa3af4
								// after iomux returned a shadow fd as ready (only for non-blocking sockets)
Packit Service aa3af4
								psock->set_immediate_os_sample();
Packit Service aa3af4
							}
Packit Service aa3af4
						}
Packit Service aa3af4
						if (check_write) {
Packit Service aa3af4
							FD_SET(fd, &m_os_wfds);
Packit Service aa3af4
						}
Packit Service aa3af4
					}
Packit Service aa3af4
					else
Packit Service aa3af4
						__log_func("fd=%d must be skipped from os r select()", fd);
Packit Service aa3af4
Packit Service aa3af4
				}
Packit Service aa3af4
			}
Packit Service aa3af4
                        else {
Packit Service aa3af4
                                if (check_read)  {
Packit Service aa3af4
                                        FD_SET(fd, &m_os_rfds);
Packit Service aa3af4
                                }
Packit Service aa3af4
                                if (check_write) {
Packit Service aa3af4
                                        FD_SET(fd, &m_os_wfds);
Packit Service aa3af4
                                }
Packit Service aa3af4
                        }
Packit Service aa3af4
Packit Service aa3af4
		}
Packit Service aa3af4
	}
Packit Service aa3af4
	__log_func("num all offloaded_fds=%d", m_num_all_offloaded_fds);
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
Packit Service aa3af4
void select_call::prepare_to_poll()
Packit Service aa3af4
{
Packit Service aa3af4
	/* 
Packit Service aa3af4
	 * Create copies of all sets and zero out the originals.
Packit Service aa3af4
	 * This is needed because polling might be successful.
Packit Service aa3af4
	 * 
Packit Service aa3af4
	 * If the read set is zero, use the local copy every time.
Packit Service aa3af4
	 * This is OK because it will hold only the CQ, and wait()
Packit Service aa3af4
	 * clears the CQ from the set after orig_select() call.
Packit Service aa3af4
	 * 
Packit Service aa3af4
	 * m_readfds is non-NULL here because there are offloaded sockets.
Packit Service aa3af4
	 */
Packit Service aa3af4
Packit Service aa3af4
	// copy sets, and zero out the originals
Packit Service aa3af4
	if (m_readfds) {
Packit Service aa3af4
		FD_COPY(&m_orig_readfds, m_readfds, m_nfds);
Packit Service aa3af4
		FD_ZERO(m_readfds, m_nfds);
Packit Service aa3af4
	}
Packit Service aa3af4
Packit Service aa3af4
	if (m_writefds) {
Packit Service aa3af4
		FD_COPY(&m_orig_writefds, m_writefds, m_nfds);
Packit Service aa3af4
		FD_ZERO(m_writefds, m_nfds);
Packit Service aa3af4
	}
Packit Service aa3af4
	if (m_exceptfds) {
Packit Service aa3af4
		FD_COPY(&m_orig_exceptfds, m_exceptfds, m_nfds);
Packit Service aa3af4
		FD_ZERO(m_exceptfds, m_nfds);
Packit Service aa3af4
	}
Packit Service aa3af4
	m_b_run_prepare_to_poll = true;
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
void select_call::prepare_to_block()
Packit Service aa3af4
{
Packit Service aa3af4
	m_cqepfd = g_p_net_device_table_mgr->global_ring_epfd_get();
Packit Service aa3af4
	m_nfds_with_cq = max(m_cqepfd + 1, m_nfds);
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
bool select_call::wait_os(bool zero_timeout)
Packit Service aa3af4
{
Packit Service aa3af4
	timeval to, *pto = NULL;
Packit Service aa3af4
	timespec to_pselect, *pto_pselect = NULL;
Packit Service aa3af4
	
Packit Service aa3af4
/* Avner: I put it in comment, because this logic is wrong
Packit Service aa3af4
Packit Service aa3af4
	// optimization: do not call os select if ALL fds are excluded
Packit Service aa3af4
	// extend check to write/except fds
Packit Service aa3af4
	if (m_rfd_count == m_n_exclude_fds)
Packit Service aa3af4
		return;
Packit Service aa3af4
*/
Packit Service aa3af4
	
Packit Service aa3af4
	if (zero_timeout) {
Packit Service aa3af4
		to.tv_sec = to.tv_usec = 0;
Packit Service aa3af4
		pto = &to;
Packit Service aa3af4
	}
Packit Service aa3af4
	else {
Packit Service aa3af4
		pto = m_timeout;
Packit Service aa3af4
	}
Packit Service aa3af4
Packit Service aa3af4
	// Restore original sets
Packit Service aa3af4
	if (m_b_run_prepare_to_poll) {
Packit Service aa3af4
		if (m_readfds)	FD_COPY(m_readfds, &m_os_rfds, m_nfds);
Packit Service aa3af4
		if (m_writefds)	FD_COPY(m_writefds, &m_os_wfds, m_nfds);
Packit Service aa3af4
		if (m_exceptfds)FD_COPY(m_exceptfds, &m_orig_exceptfds, m_nfds);
Packit Service aa3af4
	}
Packit Service aa3af4
	__log_func("calling os select: %d", m_nfds);
Packit Service aa3af4
	if (m_sigmask) {
Packit Service aa3af4
		if (pto) {
Packit Service aa3af4
			to_pselect.tv_sec = pto->tv_sec;
Packit Service aa3af4
			to_pselect.tv_nsec = pto->tv_usec * 1000;
Packit Service aa3af4
			pto_pselect = &to_pselect;
Packit Service aa3af4
		}
Packit Service aa3af4
		m_n_all_ready_fds = orig_os_api.pselect(m_nfds, m_readfds, m_writefds, m_exceptfds, pto_pselect, m_sigmask);
Packit Service aa3af4
	} else {
Packit Service aa3af4
		m_n_all_ready_fds = orig_os_api.select(m_nfds, m_readfds, m_writefds, m_exceptfds, pto);
Packit Service aa3af4
	}
Packit Service aa3af4
	if (m_n_all_ready_fds < 0) {
Packit Service aa3af4
		vma_throw_object(io_mux_call::io_error);
Packit Service aa3af4
	}
Packit Service aa3af4
	if (m_n_all_ready_fds > 0) {
Packit Service aa3af4
		__log_func("wait_os() returned with %d", m_n_all_ready_fds);
Packit Service aa3af4
	}
Packit Service aa3af4
	return false; // No cq_fd in select() event
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
bool select_call::wait(const timeval &elapsed)
Packit Service aa3af4
{
Packit Service aa3af4
	timeval timeout, *pto = NULL;
Packit Service aa3af4
	timespec to_pselect, *pto_pselect = NULL;
Packit Service aa3af4
	
Packit Service aa3af4
	BULLSEYE_EXCLUDE_BLOCK_START
Packit Service aa3af4
	if (m_n_all_ready_fds > 0) {
Packit Service aa3af4
		__log_panic("wait() called when there are ready fd's!!!");
Packit Service aa3af4
		// YossiE TODO make this and some more checks as debug assertions
Packit Service aa3af4
		// In all functions
Packit Service aa3af4
	}
Packit Service aa3af4
	BULLSEYE_EXCLUDE_BLOCK_END
Packit Service aa3af4
Packit Service aa3af4
	// Restore original sets
Packit Service aa3af4
	if (m_b_run_prepare_to_poll) {
Packit Service aa3af4
		if (m_readfds)	FD_COPY(m_readfds, &m_os_rfds, m_nfds);
Packit Service aa3af4
		if (m_writefds)	FD_COPY(m_writefds, &m_os_wfds, m_nfds);
Packit Service aa3af4
		if (m_exceptfds)FD_COPY(m_exceptfds, &m_orig_exceptfds, m_nfds);
Packit Service aa3af4
	}
Packit Service aa3af4
Packit Service aa3af4
	// Call OS select() on original sets + CQ epfd in read set
Packit Service aa3af4
	if (m_readfds)
Packit Service aa3af4
		FD_SET(m_cqepfd, m_readfds);
Packit Service aa3af4
	if (m_timeout) {
Packit Service aa3af4
		tv_sub(m_timeout, &elapsed, &timeout);
Packit Service aa3af4
		if (timeout.tv_sec < 0 || timeout.tv_usec < 0) {
Packit Service aa3af4
			// Already reached timeout
Packit Service aa3af4
			return false;
Packit Service aa3af4
		}
Packit Service aa3af4
		pto = &timeout;
Packit Service aa3af4
	}
Packit Service aa3af4
Packit Service aa3af4
	__log_func("going to wait on select CQ+OS nfds=%d cqfd=%d pto=%p!!!", m_nfds_with_cq, m_cqepfd, pto);
Packit Service aa3af4
Packit Service aa3af4
	// ACTUAL CALL TO SELECT
Packit Service aa3af4
	if (m_sigmask) {
Packit Service aa3af4
		if (pto) {
Packit Service aa3af4
			to_pselect.tv_sec = pto->tv_sec;
Packit Service aa3af4
			to_pselect.tv_nsec = pto->tv_usec * 1000;
Packit Service aa3af4
			pto_pselect = &to_pselect;
Packit Service aa3af4
		}
Packit Service aa3af4
		m_n_all_ready_fds = orig_os_api.pselect(m_nfds, m_readfds, m_writefds, m_exceptfds, pto_pselect, m_sigmask);
Packit Service aa3af4
	} else {
Packit Service aa3af4
		m_n_all_ready_fds = orig_os_api.select(m_nfds_with_cq, m_readfds, m_writefds, m_exceptfds, pto);
Packit Service aa3af4
	}
Packit Service aa3af4
	__log_func("done select CQ+OS nfds=%d cqfd=%d pto=%p ready=%d!!!", m_nfds_with_cq, m_cqepfd, pto, m_n_all_ready_fds);
Packit Service aa3af4
	if (m_n_all_ready_fds < 0) {
Packit Service aa3af4
		vma_throw_object(io_mux_call::io_error);
Packit Service aa3af4
	}
Packit Service aa3af4
Packit Service aa3af4
	// Clear CQ from the set and don't count it
Packit Service aa3af4
	if (m_readfds)
Packit Service aa3af4
	{
Packit Service aa3af4
		if (FD_ISSET(m_cqepfd, m_readfds)) {
Packit Service aa3af4
			FD_CLR(m_cqepfd, m_readfds); // Not needed if m_readfds is NULL
Packit Service aa3af4
			--m_n_all_ready_fds;
Packit Service aa3af4
			return true;
Packit Service aa3af4
		}
Packit Service aa3af4
	}
Packit Service aa3af4
	return false;
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
bool select_call::is_timeout(const timeval &elapsed)
Packit Service aa3af4
{
Packit Service aa3af4
	return m_timeout && tv_cmp(m_timeout, &elapsed, <=);
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
void select_call::set_offloaded_rfd_ready(int fd_index)
Packit Service aa3af4
{
Packit Service aa3af4
	if (m_p_offloaded_modes[fd_index] & OFF_READ) { //TODO: consider removing
Packit Service aa3af4
		int fd = m_p_all_offloaded_fds[fd_index];
Packit Service aa3af4
		if (!FD_ISSET(fd, m_readfds)) {
Packit Service aa3af4
			FD_SET(fd, m_readfds);
Packit Service aa3af4
			++m_n_ready_rfds;
Packit Service aa3af4
			++m_n_all_ready_fds;
Packit Service aa3af4
			__log_func("ready offloaded fd: %d", fd);
Packit Service aa3af4
		}
Packit Service aa3af4
	}
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
void select_call::set_rfd_ready(int fd)
Packit Service aa3af4
{
Packit Service aa3af4
	// This function also checks that fd was in the original read set
Packit Service aa3af4
	if (!FD_ISSET(fd, m_readfds) && FD_ISSET(fd, &m_orig_readfds)) {
Packit Service aa3af4
		FD_SET(fd, m_readfds);
Packit Service aa3af4
		++m_n_ready_rfds;
Packit Service aa3af4
//		if (!FD_ISSET(fd, m_writefds))
Packit Service aa3af4
		++m_n_all_ready_fds;
Packit Service aa3af4
	}
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
void select_call::set_offloaded_wfd_ready(int fd_index)
Packit Service aa3af4
{
Packit Service aa3af4
	if (m_p_offloaded_modes[fd_index] & OFF_WRITE) { //TODO: consider removing
Packit Service aa3af4
		int fd = m_p_all_offloaded_fds[fd_index];
Packit Service aa3af4
		if (!FD_ISSET(fd, m_writefds)) {
Packit Service aa3af4
			FD_SET(fd, m_writefds);
Packit Service aa3af4
			++m_n_ready_wfds;
Packit Service aa3af4
			++m_n_all_ready_fds;
Packit Service aa3af4
			__log_func("ready offloaded w fd: %d", fd);
Packit Service aa3af4
		}
Packit Service aa3af4
	}
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
void select_call::set_wfd_ready(int fd)
Packit Service aa3af4
{
Packit Service aa3af4
	// This function also checks that fd was in the original read set
Packit Service aa3af4
	if (!FD_ISSET(fd, m_writefds) && FD_ISSET(fd, &m_orig_writefds)) { //TODO: why do we need the last 'if'??
Packit Service aa3af4
		FD_SET(fd, m_writefds);
Packit Service aa3af4
		++m_n_ready_wfds;
Packit Service aa3af4
//		if (!FD_ISSET(fd, m_readfds))
Packit Service aa3af4
		++m_n_all_ready_fds;
Packit Service aa3af4
		__log_func("ready w fd: %d", fd);
Packit Service aa3af4
	}
Packit Service aa3af4
}
Packit Service aa3af4
Packit Service aa3af4
void select_call::set_efd_ready(int fd, int errors)
Packit Service aa3af4
{
Packit Service aa3af4
	/* TODO currently consider errors as ready to write OR read */
Packit Service aa3af4
	NOT_IN_USE(errors);
Packit Service aa3af4
	NOT_IN_USE(fd);
Packit Service aa3af4
}