Blob Blame History Raw
/*
 * Soft:        Keepalived is a failover program for the LVS project
 *              <www.linuxvirtualserver.org>. It monitor & manipulate
 *              a loadbalanced server pool using multi-layer checks.
 *
 * Part:        Process management
 *
 * Author:      Alexandre Cassen, <acassen@linux-vs.org>
 *
 *              This program is distributed in the hope that it will be useful,
 *              but WITHOUT ANY WARRANTY; without even the implied warranty of
 *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *              See the GNU General Public License for more details.
 *
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
 *
 * Copyright (C) 2001-2016 Alexandre Cassen, <acassen@gmail.com>
 */

#include "config.h"

#include <sys/mman.h>
#include <sys/resource.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <stdbool.h>

#include "process.h"
#include "utils.h"
#include "logger.h"
#if HAVE_DECL_RLIMIT_RTTIME == 1
#include "signals.h"
#endif
#include "warnings.h"
#include "bitops.h"

static unsigned cur_rt_priority;
static unsigned max_rt_priority;
long min_auto_priority_delay;

#if HAVE_DECL_RLIMIT_RTTIME == 1
static unsigned cur_rlimit_rttime;
static unsigned default_rlimit_rttime;
static struct rlimit orig_rlimit_rt;
#endif

static int cur_priority;
static int orig_priority;
static bool orig_priority_set;
static bool process_locked_in_memory;

static struct rlimit orig_fd_limit;

/* rlimit values to set for child processes */
bool rlimit_nofile_set;
static struct rlimit core;
bool rlimit_core_set;

static void
set_process_dont_swap(size_t stack_reserve)
{
	/* Ensure stack pages allocated */
	size_t pagesize = (size_t)sysconf(_SC_PAGESIZE);
	char stack[stack_reserve];
	size_t i;

	stack[0] = 23;		/* A random number */
	for (i = 0; i < stack_reserve; i += pagesize)
		stack[i] = stack[0];

	if (mlockall(MCL_FUTURE) == -1)
		log_message(LOG_INFO, "Unable to lock process in memory - %s", strerror(errno));
	else
		process_locked_in_memory = true;
}

static void
set_process_priority(int priority)
{
	if (!orig_priority_set) {
		orig_priority = getpriority(PRIO_PROCESS, 0);
		orig_priority_set = true;
	}

	errno = 0;
	if (setpriority(PRIO_PROCESS, 0, priority) == -1 && errno) {
		log_message(LOG_INFO, "Unable to set process priority to %d - %s", priority, strerror(errno));
		return;
	}

	cur_priority = priority;
}

static void
reset_process_priority(void)
{
	errno = 0;
	if (setpriority(PRIO_PROCESS, 0, orig_priority) == -1 && errno) {
		log_message(LOG_INFO, "Unable to reset process priority - %m");
		return;
	}

	cur_priority = 0;
}

/* NOTE: This function generates a "stack protector not protecting local variables:
   variable length buffer" warning */
RELAX_STACK_PROTECTOR_START
void
set_process_priorities(int realtime_priority, int max_realtime_priority, long min_delay,
#if HAVE_DECL_RLIMIT_RTTIME == 1
		       int rlimit_rt,
#endif
		       int process_priority, int no_swap_stack_size)
{
	if (max_realtime_priority != -1)
		max_rt_priority = max_realtime_priority;

	if (realtime_priority) {
		/* Set realtime priority */
		struct sched_param sp = {
			.sched_priority = realtime_priority
		};

		if (sched_setscheduler(getpid(), SCHED_RR | SCHED_RESET_ON_FORK, &sp))
			log_message(LOG_WARNING, "child process: cannot raise priority");
		else {
			cur_rt_priority = realtime_priority;
#if HAVE_DECL_RLIMIT_RTTIME == 1
			if (rlimit_rt)
			{
				struct rlimit rlim;

				set_sigxcpu_handler();

				rlim.rlim_cur = rlimit_rt / 2;	/* Get warnings if approaching limit */
				rlim.rlim_max = rlimit_rt;
				if (setrlimit(RLIMIT_RTTIME, &rlim))
					log_message(LOG_WARNING, "child process cannot set realtime rlimit");
				else
					cur_rlimit_rttime = rlimit_rt;
			}
#endif
		}
	}
	else {
		if (process_priority)
			set_process_priority(process_priority);

#if HAVE_DECL_RLIMIT_RTTIME == 1
		default_rlimit_rttime = rlimit_rt;
#endif
	}

	if (min_delay)
		min_auto_priority_delay = min_delay;

// TODO - measure max stack usage
	if (no_swap_stack_size)
		set_process_dont_swap(no_swap_stack_size);
}
RELAX_STACK_PROTECTOR_END

void
reset_process_priorities(void)
{
	if (cur_rt_priority) {
		/* Set realtime priority */
		struct sched_param sp = {
			.sched_priority = 0
		};

		if (sched_setscheduler(getpid(), SCHED_OTHER, &sp))
			log_message(LOG_WARNING, "child process: cannot reset realtime scheduling");
		else {
			cur_rt_priority = 0;
#if HAVE_DECL_RLIMIT_RTTIME == 1
			if (cur_rlimit_rttime)
			{
				if (setrlimit(RLIMIT_RTTIME, &orig_rlimit_rt))
					log_message(LOG_WARNING, "child process cannot reset realtime rlimit");
				else
					cur_rlimit_rttime = 0;

			}
#endif
		}
	}

	if (cur_priority != orig_priority)
		reset_process_priority();

	if (process_locked_in_memory) {
		munlockall();
		process_locked_in_memory = false;
	}

	if (rlimit_nofile_set) {
		setrlimit(RLIMIT_NOFILE, &orig_fd_limit);
		rlimit_nofile_set = false;
	}
	if (rlimit_core_set) {
		setrlimit(RLIMIT_CORE, &core);
		rlimit_core_set = false;
	}
}

void
increment_process_priority(void)
{
	if (!max_rt_priority)
		return;

	if (cur_rt_priority) {
		if (cur_rt_priority >= max_rt_priority)
			return;

		cur_rt_priority++;
	} else
		cur_rt_priority = sched_get_priority_min(SCHED_RR);

	set_process_priorities(cur_rt_priority, -1, 0,
#if HAVE_DECL_RLIMIT_RTTIME
			       cur_rlimit_rttime ? 0 : default_rlimit_rttime,
#endif
			       0, 0);

	log_message(LOG_INFO, "Set realtime priority %u", cur_rt_priority);
}

unsigned __attribute__((pure))
get_cur_priority(void)
{
	return cur_rt_priority;
}

#if HAVE_DECL_RLIMIT_RTTIME == 1
unsigned __attribute__((pure))
get_cur_rlimit_rttime(void)
{
	return cur_rlimit_rttime;
}
#endif

int
set_process_cpu_affinity(cpu_set_t *set, const char *process)
{
	/* If not used then empty set */
	if (!CPU_COUNT(set))
		return 0;

	if (sched_setaffinity(0, sizeof(cpu_set_t), set)) {
		log_message(LOG_WARNING, "unable to set cpu affinity to %s process (%m)"
				       , process);
		return -1;
	}

	return 0;
}

int
get_process_cpu_affinity_string(cpu_set_t *set, char *buffer, size_t size)
{
	int i, num_cpus, len, s = size;
	char *cp = buffer;

	/* If not used then empty set */
	if (!CPU_COUNT(set))
		return 0;

	num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
	for (i = 0; i < num_cpus; i++) {
		if (!CPU_ISSET(i, set))
			continue;

		len = integer_to_string(i, cp, s);
		if (len < 0 || s <= len + 1) {
			*cp = '\0';
			return -1;
		}
		*(cp + len) = ' ';
		cp += len + 1;
		s -= len + 1;
	}

	*cp = '\0';
	return 0;
}

void
set_child_rlimit(int resource, const struct rlimit *rlim)
{
	if (resource == RLIMIT_CORE) {
		core = *rlim;
		rlimit_core_set = true;
	}
	else
		log_message(LOG_INFO, "Unknown rlimit resource %d", resource);
}

pid_t
local_fork(void)
{
	pid_t pid;

	pid = fork();

	/* If we are the child process, reset all elevated priorities */
	if (pid == 0)
		reset_process_priorities();

	return pid;
}

void
set_max_file_limit(unsigned fd_required)
{
	struct rlimit limit = { .rlim_cur = 0 };

	if (orig_fd_limit.rlim_cur == 0) {
		if (getrlimit(RLIMIT_NOFILE, &orig_fd_limit))
			log_message(LOG_INFO, "Failed to get original RLIMIT_NOFILE, errno %d", errno);
		else
			limit = orig_fd_limit;
	} else if (getrlimit(RLIMIT_NOFILE, &limit))
		log_message(LOG_INFO, "Failed to get current RLIMIT_NOFILE, errno %d", errno);

	if (fd_required <= orig_fd_limit.rlim_cur &&
	    orig_fd_limit.rlim_cur == limit.rlim_cur)
		return;

	limit.rlim_cur = orig_fd_limit.rlim_cur > fd_required ? orig_fd_limit.rlim_cur : fd_required;
	limit.rlim_max = orig_fd_limit.rlim_max > fd_required ? orig_fd_limit.rlim_max : fd_required;

	if (setrlimit(RLIMIT_NOFILE, &limit) == -1)
		log_message(LOG_INFO, "Failed to set open file limit to %" PRI_rlim_t ":%" PRI_rlim_t " failed - errno %d", limit.rlim_cur, limit.rlim_max, errno);
	else if (__test_bit(LOG_DETAIL_BIT, &debug))
		log_message(LOG_INFO, "Set open file limit to %" PRI_rlim_t ":%" PRI_rlim_t ".", limit.rlim_cur, limit.rlim_max);

	/* We don't want child processes to get excessive limits */
	rlimit_nofile_set = (limit.rlim_cur != orig_fd_limit.rlim_cur);
}