Blob Blame History Raw
/*
 * Soft:        Keepalived is a failover program for the LVS project
 *              <www.linuxvirtualserver.org>. It monitor & manipulate
 *              a loadbalanced server pool using multi-layer checks.
 *
 * Part:        PING checker.
 *
 * Author:      Jie Liu, <liujie165@huawei.com>
 *
 *              This program is distributed in the hope that it will be useful,
 *              but WITHOUT ANY WARRANTY; without even the implied warranty of
 *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *              See the GNU General Public License for more details.
 *
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
 *
 * Copyright (C) 2019-2019 Alexandre Cassen, <acassen@gmail.com>
 */
#include "config.h"

/* system includes */
#include <netinet/in.h>
#include <netinet/ip_icmp.h>
#include <netinet/icmp6.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <fcntl.h>

#include "check_ping.h"
#include "check_api.h"
#include "logger.h"
#include "layer4.h"
#include "parser.h"
#include "smtp.h"
#include "ipwrapper.h"

#define ICMP_BUFSIZE 128
#define SOCK_RECV_BUFF 128*1024

static const char * const ping_group_range = "/proc/sys/net/ipv4/ping_group_range";

static gid_t save_gid_min;
static bool checked_ping_group_range;

static uint16_t seq_no;

static void icmp_connect_thread(thread_ref_t);

bool
set_ping_group_range(bool set)
{
	char buf[10 + 1 + 10 + 1 + 1];	/* 2000000000<TAB>4294967295<NL> */
	int fd;
	ssize_t len, ret;
	unsigned long val[2];
	char *endptr;

	if (set == checked_ping_group_range)
		return true;

	fd = open(ping_group_range, O_RDWR);
	if (fd == -1)
		return false;
	len = read(fd, buf, sizeof(buf));

	if (len == -1 || len == sizeof(buf)) {
		close(fd);
		return false;
	}

	buf[len] = '\0';

	val[0] = strtoul(buf, &endptr, 10);
#if ULONG_MAX >= 1UL << 32
	if (val[0] >= 1UL << 32 || (*endptr != '\t' && *endptr != ' ')) {
		close(fd);
		return false;
	}
#endif

	val[1] = strtol(endptr + 1, &endptr, 10);
#if ULONG_MAX >= 1UL << 32
	if (val[1] >= 1UL << 32 || *endptr != '\n') {
		close(fd);
		return false;
	}
#endif

	checked_ping_group_range = set;

	if ((set && val[0] == 0) ||
	    (!set && val[0] == save_gid_min)) {
		close(fd);
		return true;
	}

	if (set && val[0] > 1 && val[1] > 1)
		log_message(LOG_INFO, "Warning: %s being expanded from %lu %lu to 0 %lu",
				ping_group_range, val[0], val[1], val[1]);

	len = sprintf(buf, "%u\t%lu\t\n", set ? 0 : save_gid_min, val[1]);

	lseek(fd, 0, SEEK_SET);
	ret = write(fd, buf, len);
	if (ret == -1)
		log_message(LOG_INFO, "Write %s failed - errno %d", ping_group_range, errno);
	else if (ret != len)
		log_message(LOG_INFO, "Write %s wrote %zd bytes instead of %zd", ping_group_range, ret, len);
	close(fd);

	if (set)
		save_gid_min = val[0];

	return true;
}

/* Configuration stream handling */
static void
free_ping_check(checker_t *checker)
{
	FREE(checker->co);
	FREE_PTR(checker->data);
	FREE(checker);
}

static void
dump_ping_check(FILE *fp, const checker_t *checker)
{
	conf_write(fp, "   Keepalive method = PING_CHECK");
	dump_checker_opts(fp, checker);
}

static bool
ping_check_compare(const checker_t *a, checker_t *b)
{
	return compare_conn_opts(a->co, b->co);
}

static void
ping_check_handler(__attribute__((unused)) const vector_t *strvec)
{
	ping_check_t *ping_check = sizeof(ping_check_t) ? MALLOC(sizeof (ping_check_t)) : NULL;

	/* queue new checker */
	queue_checker(free_ping_check, dump_ping_check, icmp_connect_thread,
		      ping_check_compare, ping_check, CHECKER_NEW_CO(), true);

	if (!checked_ping_group_range)
		set_ping_group_range(true);
}

static void
ping_check_end_handler(void)
{
	if (!check_conn_opts(CHECKER_GET_CO()))
		dequeue_new_checker();
}

void
install_ping_check_keyword(void)
{
	/* We don't want some common keywords */
	install_keyword("PING_CHECK", &ping_check_handler);
	install_sublevel();
	install_checker_common_keywords(true);
	install_sublevel_end_handler(ping_check_end_handler);
	install_sublevel_end();
}

static enum connect_result
ping_it(int fd, conn_opts_t* co)
{
	struct icmphdr *icmp_hdr;
	char send_buf[sizeof(*icmp_hdr) + ICMP_BUFSIZE];

	set_buf(send_buf + sizeof(*icmp_hdr), ICMP_BUFSIZE);

	icmp_hdr = (struct icmphdr *)send_buf;

	memset(icmp_hdr, 0, sizeof(*icmp_hdr));
	icmp_hdr->type = ICMP_ECHO;
	icmp_hdr->un.echo.sequence = seq_no++;

	if (sendto(fd, send_buf, sizeof(send_buf), 0, (struct sockaddr*)&co->dst, sizeof(struct sockaddr)) < 0) {
		log_message(LOG_INFO, "send ICMP packet fail");
		return connect_error;
	}
	return connect_success;
}

static enum connect_result
recv_it(int fd)
{
	ssize_t len;
	const struct icmphdr *icmp_hdr;
	char recv_buf[sizeof(*icmp_hdr) + ICMP_BUFSIZE];

	len = recv(fd, recv_buf, sizeof(recv_buf), 0);

	if (len < 0) {
		log_message(LOG_INFO, "recv ICMP packet error");
		return connect_error;
	}

	if ((size_t)len < sizeof(*icmp_hdr)) {
		log_message(LOG_INFO, "Error, got short ICMP packet, %zd bytes", len);
		return connect_error;
	}

	icmp_hdr = (const struct icmphdr *)recv_buf;
	if (icmp_hdr->type != ICMP_ECHOREPLY) {
		log_message(LOG_INFO, "Got ICMP packet with type 0x%x", icmp_hdr->type);
		return connect_error;
	}

	return connect_success;
}

static enum connect_result
ping6_it(int fd, conn_opts_t* co)
{
	struct icmp6_hdr* icmp6_hdr;
	char send_buf[sizeof(*icmp6_hdr) + ICMP_BUFSIZE];

	set_buf(send_buf + sizeof(*icmp6_hdr), ICMP_BUFSIZE);

	icmp6_hdr = (struct icmp6_hdr *)&send_buf;

	memset(icmp6_hdr, 0, sizeof(*icmp6_hdr));
	icmp6_hdr->icmp6_type = ICMP6_ECHO_REQUEST;
	icmp6_hdr->icmp6_seq = seq_no++;

	if (sendto(fd, send_buf, sizeof(send_buf), 0, (struct sockaddr_in6 *)&co->dst, sizeof(struct sockaddr_in6)) < 0) {
		log_message(LOG_INFO, "send ICMPv6 packet fail - errno %d", errno);
		return connect_error;
	}

	return connect_success;
}

static enum connect_result
recv6_it(int fd)
{
	ssize_t len;
	const struct icmp6_hdr* icmp6_hdr;
	char recv_buf[sizeof (*icmp6_hdr) + ICMP_BUFSIZE];

	len = recv(fd, recv_buf, sizeof(recv_buf), 0);

	if (len < 0) {
		log_message(LOG_INFO, "recv ICMPv6 packet error");
		return connect_error;
	}

	if ((size_t)len < sizeof(*icmp6_hdr)) {
		log_message(LOG_INFO, "Error, got short ICMPv6 packet, %zd bytes", len);
		return connect_error;
	}

	icmp6_hdr = (const struct icmp6_hdr*)recv_buf;
	if (icmp6_hdr->icmp6_type != ICMP6_ECHO_REPLY) {
		log_message(LOG_INFO, "Got ICMPv6 packet with type 0x%x", icmp6_hdr->icmp6_type);
		return connect_error;
	}

	return connect_success;
}

static void
icmp_epilog(thread_ref_t thread, bool is_success)
{
	checker_t *checker;
	unsigned long delay;
	bool checker_was_up;
	bool rs_was_alive;

	checker = THREAD_ARG(thread);

	delay = checker->delay_loop;
	if (is_success || ((checker->is_up || !checker->has_run) && checker->retry_it >= checker->retry)) {
		checker->retry_it = 0;

		if (is_success && (!checker->is_up || !checker->has_run)) {
			log_message(LOG_INFO, "ICMP connection to %s success."
					, FMT_CHK(checker));
			checker_was_up = checker->is_up;
			rs_was_alive = checker->rs->alive;
			update_svr_checker_state(UP, checker);
			if (checker->rs->smtp_alert && !checker_was_up &&
			    (rs_was_alive != checker->rs->alive || !global_data->no_checker_emails))
				smtp_alert(SMTP_MSG_RS, checker, NULL,
					   "=> ICMP CHECK succeed on service <=");
		} else if (!is_success &&
			   (checker->is_up || !checker->has_run)) {
			if (checker->retry && checker->has_run)
				log_message(LOG_INFO
				    , "ICMP CHECK on service %s of %s failed after %u retries."
				    , FMT_CHK(checker), FMT_VS(checker->vs)
				    , checker->retry);
			else
				log_message(LOG_INFO
				    , "ICMP CHECK on service %s failed."
				    , FMT_CHK(checker));
			checker_was_up = checker->is_up;
			rs_was_alive = checker->rs->alive;
			update_svr_checker_state(DOWN, checker);
			if (checker->rs->smtp_alert && checker_was_up &&
			    (rs_was_alive != checker->rs->alive || !global_data->no_checker_emails))
				smtp_alert(SMTP_MSG_RS, checker, NULL,
					   "=> ICMP CHECK failed on service <=");
		}
	} else if (checker->is_up) {
		delay = checker->delay_before_retry;
		++checker->retry_it;
	}

	checker->has_run = true;

	thread_add_timer(thread->master, icmp_connect_thread, checker, delay);
}

static void
icmp_check_thread(thread_ref_t thread)
{
	checker_t *checker = THREAD_ARG(thread);
	int status;

	if (thread->type == THREAD_READ_TIMEOUT) {
		if (checker->is_up &&
		    (global_data->checker_log_all_failures || checker->log_all_failures))
			log_message(LOG_INFO, "ICMP connection to address %s timeout.", FMT_CHK(checker));
		status = connect_error;
	} else
		status = checker->co->dst.ss_family == AF_INET ?
				recv_it(thread->u.f.fd) : recv6_it(thread->u.f.fd);

	/*
	 * If status = connect_success, then we start udp check with the record of icmp failed times.
	 * Otherwise we will do the icmp connect again until it reaches the unhealthy threshold.
	 * we handle fd uniform.
	 */
	thread_close_fd(thread);

	if (status == connect_success)
		icmp_epilog(thread, 1);
	else if (status == connect_error) {
		if (checker->is_up &&
		    thread->type != THREAD_READ_TIMEOUT &&
		    (global_data->checker_log_all_failures || checker->log_all_failures))
			log_message(LOG_INFO, "ICMP connection to %s of %s failed."
				,FMT_CHK(checker), FMT_VS(checker->vs));
		icmp_epilog(thread, 0);
	}

	return;
}

static void
icmp_connect_thread(thread_ref_t thread)
{
	checker_t *checker = THREAD_ARG(thread);
	conn_opts_t *co = checker->co;
	int fd;
	int status;
	int size = SOCK_RECV_BUFF;

	if (!checker->enabled) {
		thread_add_timer(thread->master, icmp_connect_thread, checker,
				checker->delay_loop);
		return;
	}

	 /*
	  * If we config a real server in several virtual server, the icmp_ratelimit should be cancelled.
	  * echo 0 > /proc/sys/net/ipv4/icmp_ratelimit
	  */
	if ((fd = socket(co->dst.ss_family, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK,
			 co->dst.ss_family == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6)) == -1) {
		log_message(LOG_INFO, "ICMP%s connect fail to create socket. Rescheduling.",
				co->dst.ss_family == AF_INET ? "" : "v6");
		thread_add_timer(thread->master, icmp_connect_thread, checker,
				checker->delay_loop);
		return;
	}

#if !HAVE_DECL_SOCK_NONBLOCK
	if (set_sock_flags(fd, F_SETFL, O_NONBLOCK))
		log_message(LOG_INFO, "Unable to set NONBLOCK on icmp_connect socket - %s (%d)", strerror(errno), errno);
#endif

#if !HAVE_DECL_SOCK_CLOEXEC
	if (set_sock_flags(fd, F_SETFD, FD_CLOEXEC))
		log_message(LOG_INFO, "Unable to set CLOEXEC on icmp_connect socket - %s (%d)", strerror(errno), errno);
#endif

	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)))
		log_message(LOG_INFO, "setsockopt SO_RCVBUF for socket %d failed (%d) - %m", fd, errno);

	/*
	 * OK if setsockopt fails
	 * Prevent users from pinging broadcast or multicast addresses
	 */
	if (co->dst.ss_family == AF_INET)
		status = ping_it(fd, co);
	else
		status = ping6_it(fd, co);

	/* handle icmp send status & register check worker thread */
	if (udp_icmp_check_state(fd, status, thread, icmp_check_thread,
		co->connection_to)) {
		close(fd);
		icmp_epilog(thread, false);
	}
	return;
}

#ifdef THREAD_DUMP
void
register_check_ping_addresses(void)
{
	register_thread_address("icmp_check_thread", icmp_check_thread);
	register_thread_address("icmp_connect_thread", icmp_connect_thread);
}
#endif