/*
* Soft: Keepalived is a failover program for the LVS project
* <www.linuxvirtualserver.org>. It monitor & manipulate
* a loadbalanced server pool using multi-layer checks.
*
* Part: vrrp_if_config interface
*
* Author: Alexandre Cassen, <acassen@linux-vs.org>
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Copyright (C) 2001-2017 Alexandre Cassen, <acassen@gmail.com>
*/
/* The following parameters need to be set on the vmac interface and its parent:
*
* vmac interface:
* accept_local=1 // We need to be able to hear another instance multicasting it's presence
* arp_ignore=1 // We mustn't reply to ARP requests on this interface for IP address on parent interface
* // and we mustn't only reply to addresses on the same subnet.
* rp_filter=0 // Allows us to receive on VMAC interface when it has no IP address.
*
* parent interface:
* arp_ignore=1 // We mustn't reply to ARP requests on this interface for vrrp IP address
* arp_filter=1 // We mustn't reply to ARP requests for our own IP address
*/
#include "config.h"
#include <fcntl.h>
#include "vrrp_if_config.h"
#include "keepalived_netlink.h"
#include "memory.h"
#ifdef _HAVE_IPV4_DEVCONF_
#include <linux/ip.h>
#include <stdint.h>
#include "vrrp_if.h"
#endif
#include <limits.h>
#include <unistd.h>
#include "logger.h"
#ifdef _HAVE_VRRP_VMAC_
static unsigned all_rp_filter = UINT_MAX;
static unsigned default_rp_filter = UINT_MAX;
#endif
#ifdef _HAVE_IPV4_DEVCONF_
typedef struct sysctl_opts {
uint32_t param;
uint32_t value;
} sysctl_opts_t;
#ifdef _HAVE_VRRP_VMAC_
static sysctl_opts_t parent_sysctl[] = {
{ IPV4_DEVCONF_ARP_IGNORE, 1 },
{ IPV4_DEVCONF_ARPFILTER, 1 },
{ 0, 0 }
};
static sysctl_opts_t vmac_sysctl[] = {
{ IPV4_DEVCONF_ARP_IGNORE, 1 },
{ IPV4_DEVCONF_ACCEPT_LOCAL, 1 },
{ IPV4_DEVCONF_RP_FILTER, 0 },
{ IPV4_DEVCONF_PROMOTE_SECONDARIES, 1 },
{ 0, 0}
};
#endif
#endif
/* Sysctl get and set functions */
static void
make_sysctl_filename(char *dest, const char* prefix, const char* iface, const char* parameter)
{
strcpy(dest, "/proc/sys/");
strcat(dest, prefix);
strcat(dest, "/");
strcat(dest, iface);
strcat(dest, "/");
strcat(dest, parameter);
}
#if !defined _HAVE_IPV4_DEVCONF_ || defined _HAVE_VRRP_VMAC_
static int
set_sysctl(const char* prefix, const char* iface, const char* parameter, unsigned value)
{
char* filename;
char buf[1];
int fd;
ssize_t len;
/* Make the filename */
filename = MALLOC(PATH_MAX);
make_sysctl_filename(filename, prefix, iface, parameter);
fd = open(filename, O_WRONLY);
FREE(filename);
if (fd < 0)
return -1;
/* We only write integers 0-9 */
buf[0] = (char)('0' + value);
len = write(fd, &buf, 1);
close(fd);
if (len != 1)
return -1;
/* Success */
return 0;
}
#endif
static unsigned
get_sysctl(const char* prefix, const char* iface, const char* parameter)
{
char *filename;
char buf[1];
int fd;
ssize_t len;
/* Make the filename */
filename = MALLOC(PATH_MAX);
make_sysctl_filename(filename, prefix, iface, parameter);
fd = open(filename, O_RDONLY);
FREE(filename);
if (fd < 0)
return UINT_MAX;
len = read(fd, &buf, 1);
close(fd);
/* We only read integers 0-9 */
if (len <= 0)
return UINT_MAX;
/* Return the value of the string read */
return (unsigned)buf[0] - '0';
}
#ifdef _HAVE_IPV4_DEVCONF_
struct nlattr *
nest_start(struct nlmsghdr *nlh, unsigned short type)
{
struct nlattr *nest = NLMSG_TAIL(nlh);
nest->nla_type = type;
nlh->nlmsg_len += sizeof(struct nlattr);
return nest;
}
size_t
nest_end(struct nlattr *nla, struct nlattr *nest)
{
nest->nla_len = (unsigned short)((void *)nla - (void *)nest);
return nest->nla_len;
}
static inline int
netlink_set_interface_flags(int ifindex, const sysctl_opts_t *sys_opts)
{
int status = 0;
struct {
struct nlmsghdr n;
struct ifinfomsg ifi;
char buf[64];
} req;
struct nlattr *start;
struct nlattr *inet_start;
struct nlattr *conf_start;
const sysctl_opts_t *so;
memset(&req, 0, sizeof (req));
req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg));
req.n.nlmsg_flags = NLM_F_REQUEST;
req.n.nlmsg_type = RTM_NEWLINK;
req.ifi.ifi_family = AF_UNSPEC;
req.ifi.ifi_index = ifindex;
start = nest_start(&req.n, IFLA_AF_SPEC);
inet_start = nest_start(&req.n, AF_INET);
conf_start = nest_start(&req.n, IFLA_INET_CONF);
for (so = sys_opts; so->param; so++)
addattr32(&req.n, sizeof req, so->param, so->value);
nest_end(NLMSG_TAIL(&req.n), conf_start);
nest_end(NLMSG_TAIL(&req.n), inet_start);
nest_end(NLMSG_TAIL(&req.n), start);
if (netlink_talk(&nl_cmd, &req.n) < 0)
status = 1;
return status;
}
#ifdef _HAVE_VRRP_VMAC_
static inline int
netlink_set_interface_parameters(const interface_t *ifp, interface_t *base_ifp)
{
if (netlink_set_interface_flags(ifp->ifindex, vmac_sysctl))
return -1;
/* Set arp_ignore and arp_filter on base interface if needed */
if (base_ifp->reset_arp_config)
base_ifp->reset_arp_config++;
else {
if (base_ifp->arp_ignore != 1 ||
base_ifp->arp_filter != 1) {
/* We can't use libnl3 since if the base interface type is a bridge, libnl3 sets ifi_family
* to AF_BRIDGE, whereas it should be set to AF_UNSPEC. The kernel function that handles
* RTM_SETLINK messages for AF_BRIDGE doesn't know how to process the IFLA_AF_SPEC attribute. */
if (netlink_set_interface_flags(base_ifp->ifindex, parent_sysctl)) {
log_message(LOG_INFO, "Set base flags on %s failed for VMAC %s", base_ifp->ifname, ifp->ifname);
return -1;
}
base_ifp->reset_arp_config = 1;
}
}
return 0;
}
static inline int
netlink_reset_interface_parameters(const interface_t* ifp)
{
int res;
sysctl_opts_t reset_parent_sysctl[3];
/* If the interface doesn't exist, there is nothing we can change */
if (!ifp->ifindex)
return 0;
/* See netlink3_set_interface_parameters for why libnl3 can't be used */
reset_parent_sysctl[0].param = IPV4_DEVCONF_ARP_IGNORE;
reset_parent_sysctl[0].value = ifp->arp_ignore;
reset_parent_sysctl[1].param = IPV4_DEVCONF_ARPFILTER;
reset_parent_sysctl[1].value = ifp->arp_filter;
reset_parent_sysctl[2].param = 0;
if ((res = netlink_set_interface_flags(ifp->ifindex, reset_parent_sysctl)))
log_message(LOG_INFO, "reset interface flags on %s failed", ifp->ifname);
return res;
}
static inline void
set_interface_parameters_devconf(const interface_t *ifp, interface_t *base_ifp)
{
if (netlink_set_interface_parameters(ifp, base_ifp))
log_message(LOG_INFO, "Unable to set parameters for %s", ifp->ifname);
}
static inline void
reset_interface_parameters_devconf(interface_t *base_ifp)
{
if (base_ifp->reset_arp_config && --base_ifp->reset_arp_config == 0) {
if (netlink_reset_interface_parameters(base_ifp))
log_message(LOG_INFO, "Unable to reset parameters for %s", base_ifp->ifname);
}
}
#endif
static inline void
set_promote_secondaries_devconf(interface_t *ifp)
{
sysctl_opts_t promote_secondaries_sysctl[] = { { IPV4_DEVCONF_PROMOTE_SECONDARIES, 1 }, { 0, 0} };
if (ifp->promote_secondaries)
return;
netlink_set_interface_flags(ifp->ifindex, promote_secondaries_sysctl);
}
static inline void
reset_promote_secondaries_devconf(interface_t *ifp)
{
sysctl_opts_t promote_secondaries_sysctl[] = { { IPV4_DEVCONF_PROMOTE_SECONDARIES, 0 }, { 0, 0} };
netlink_set_interface_flags(ifp->ifindex, promote_secondaries_sysctl);
}
#else
#ifdef _HAVE_VRRP_VMAC_
static inline void
set_interface_parameters_sysctl(const interface_t *ifp, interface_t *base_ifp)
{
unsigned val;
set_sysctl("net/ipv4/conf", ifp->ifname, "arp_ignore", 1);
set_sysctl("net/ipv4/conf", ifp->ifname, "accept_local", 1);
set_sysctl("net/ipv4/conf", ifp->ifname, "rp_filter", 0);
set_sysctl("net/ipv4/conf", ifp->ifname, "promote_secondaries", 1);
if (base_ifp->reset_arp_config)
base_ifp->reset_arp_config++;
else {
if ((val = get_sysctl("net/ipv4/conf", base_ifp->ifname, "arp_ignore")) != UINT_MAX &&
(base_ifp->arp_ignore = (uint32_t)val) != 1)
set_sysctl("net/ipv4/conf", base_ifp->ifname, "arp_ignore", 1);
if ((val = get_sysctl("net/ipv4/conf", base_ifp->ifname, "arp_filter")) != UINT_MAX &&
(base_ifp->arp_filter = (uint32_t)val) != 1)
set_sysctl("net/ipv4/conf", base_ifp->ifname, "arp_filter", 1);
base_ifp->reset_arp_config = 1;
}
}
static inline void
reset_interface_parameters_sysctl(interface_t *base_ifp)
{
if (base_ifp->reset_arp_config && --base_ifp->reset_arp_config == 0) {
set_sysctl("net/ipv4/conf", base_ifp->ifname, "arp_ignore", (int)base_ifp->arp_ignore);
set_sysctl("net/ipv4/conf", base_ifp->ifname, "arp_filter", (int)base_ifp->arp_filter);
}
}
#endif
static inline void
set_promote_secondaries_sysctl(interface_t *ifp)
{
if (get_sysctl("net/ipv4/conf", ifp->ifname, "promote_secondaries") == 1) {
ifp->promote_secondaries = true;
return;
}
set_sysctl("net/ipv4/conf", ifp->ifname, "promote_secondaries", 1);
}
static inline void
reset_promote_secondaries_sysctl(interface_t *ifp)
{
set_sysctl("net/ipv4/conf", ifp->ifname, "promote_secondaries", 0);
}
#endif
void
set_promote_secondaries(interface_t *ifp)
{
if (ifp->promote_secondaries)
return;
if (ifp->reset_promote_secondaries++)
return;
#ifdef _HAVE_IPV4_DEVCONF_
set_promote_secondaries_devconf(ifp);
#else
set_promote_secondaries_sysctl(ifp);
#endif
}
void
reset_promote_secondaries(interface_t *ifp)
{
if (!ifp->reset_promote_secondaries ||
--ifp->reset_promote_secondaries)
return;
#ifdef _HAVE_IPV4_DEVCONF_
reset_promote_secondaries_devconf(ifp);
#else
reset_promote_secondaries_sysctl(ifp);
#endif
}
#ifdef _HAVE_VRRP_VMAC_
/* IPv4 VMAC interfaces require rp_filter to be 0; this in turn requires
* net.ipv4.conf.all.rp_filter to be 0, but if it is non-zero, then all
* interfaces will be operating with a non-zero value of rp_filter.
* In this function, if all.rp_filter > 0 and default.rp_filter < all.rp_filter,
* we first set default.rp_filter to the current value of all.rp_filter,
* so that any new interfaces are created with the current value of all.rp_filter.
* We then iterate through all interfaces, and if {interface}.rp_filter < all.rp_filter
* we set {interface}.rp_filter = all.rp_filter.
* Finally we set all.rp_filter = 0.
*
* This should not alter the operation of any interface, or any interface
* subsequently created, but it does allow us to set rp_filter = 0
* on vmac interfaces.
*/
static void
clear_rp_filter(void)
{
list ifs;
element e;
interface_t *ifp;
unsigned rp_filter;
#ifdef _HAVE_IPV4_DEVCONF_
sysctl_opts_t rpfilter_sysctl[] = { { IPV4_DEVCONF_RP_FILTER, 1 }, { 0, 0} };
#endif
rp_filter = get_sysctl("net/ipv4/conf", "all", "rp_filter");
if (rp_filter == UINT_MAX) {
log_message(LOG_INFO, "Unable to read sysctl net.ipv4.conf.all.rp_filter");
return;
}
if (rp_filter == 0)
return;
/* Save current value of all/rp_filter */
all_rp_filter = rp_filter;
/* We want to ensure that default/rp_filter is at least the value of all/rp_filter */
rp_filter = get_sysctl("net/ipv4/conf", "default", "rp_filter");
if (rp_filter < all_rp_filter) {
log_message(LOG_INFO, "NOTICE: setting sysctl net.ipv4.conf.default.rp_filter from %d to %d", rp_filter, all_rp_filter);
set_sysctl("net/ipv4/conf", "default", "rp_filter", all_rp_filter);
default_rp_filter = rp_filter;
}
/* Now ensure rp_filter for all interfaces is at least all/rp_filter. */
#ifdef _HAVE_IPV4_DEVCONF_
rpfilter_sysctl[0].value = all_rp_filter;
#endif
kernel_netlink_poll(); /* Update our view of interfaces first */
ifs = get_if_list();
LIST_FOREACH(ifs, ifp, e) {
if (!ifp->ifindex)
continue;
#ifndef _HAVE_IPV4_DEVCONF_
if ((ifp->rp_filter = get_sysctl("net/ipv4/conf", ifp->ifname, "rp_filter")) == UINT_MAX)
log_message(LOG_INFO, "Unable to read rp_filter for %s", ifp->ifname);
else
#endif
if (ifp->rp_filter < all_rp_filter) {
#ifdef _HAVE_IPV4_DEVCONF_
netlink_set_interface_flags(ifp->ifindex, rpfilter_sysctl);
#else
set_sysctl("net/ipv4/conf", ifp->ifname, "rp_filter", all_rp_filter);
#endif
}
else {
/* Indicate we are not setting it */
ifp->rp_filter = UINT_MAX;
}
}
/* We have now made sure that all the interfaces have rp_filter >= all_rp_filter */
log_message(LOG_INFO, "NOTICE: setting sysctl net.ipv4.conf.all.rp_filter from %d to 0", all_rp_filter);
set_sysctl("net/ipv4/conf", "all", "rp_filter", 0);
}
void
restore_rp_filter(void)
{
list ifs;
element e;
interface_t *ifp;
unsigned rp_filter;
#ifdef _HAVE_IPV4_DEVCONF_
sysctl_opts_t rpfilter_sysctl[] = { { IPV4_DEVCONF_RP_FILTER, 1 }, { 0, 0} };
#endif
/* Restore the original settings of rp_filter, but only if they
* are the same as what we set them to */
if (all_rp_filter == UINT_MAX)
return;
rp_filter = get_sysctl("net/ipv4/conf", "all", "rp_filter");
log_message(LOG_INFO, "restore_rp_filter - all/rp_filter = %d", rp_filter);
if (rp_filter == 0) {
log_message(LOG_INFO, "NOTICE: resetting sysctl net.ipv4.conf.all.rp_filter to %d", all_rp_filter);
set_sysctl("net/ipv4/conf", "all", "rp_filter", all_rp_filter);
}
if (default_rp_filter != UINT_MAX) {
rp_filter = get_sysctl("net/ipv4/conf", "default", "rp_filter");
if (rp_filter == all_rp_filter) {
log_message(LOG_INFO, "NOTICE: resetting sysctl net.ipv4.conf.default.rp_filter to %d", default_rp_filter);
set_sysctl("net/ipv4/conf", "default", "rp_filter", default_rp_filter);
}
default_rp_filter = UINT_MAX;
}
ifs = get_if_list();
LIST_FOREACH(ifs, ifp, e) {
if (ifp->rp_filter != UINT_MAX) {
rp_filter = get_sysctl("net/ipv4/conf", ifp->ifname, "rp_filter");
if (rp_filter == all_rp_filter) {
#ifdef _HAVE_IPV4_DEVCONF_
rpfilter_sysctl[0].value = ifp->rp_filter;
netlink_set_interface_flags(ifp->ifindex, rpfilter_sysctl);
#else
set_sysctl("net/ipv4/conf", ifp->ifname, "rp_filter", ifp->rp_filter);
#endif
}
}
}
all_rp_filter = UINT_MAX;
}
void
set_interface_parameters(const interface_t *ifp, interface_t *base_ifp)
{
if (all_rp_filter == UINT_MAX)
clear_rp_filter();
#ifdef _HAVE_IPV4_DEVCONF_
set_interface_parameters_devconf(ifp, base_ifp);
#else
set_interface_parameters_sysctl(ifp, base_ifp);
#endif
}
void reset_interface_parameters(interface_t *base_ifp)
{
#ifdef _HAVE_IPV4_DEVCONF_
reset_interface_parameters_devconf(base_ifp);
#else
reset_interface_parameters_sysctl(base_ifp);
#endif
}
void link_set_ipv6(const interface_t* ifp, bool enable)
{
/* There is no direct way to set IPv6 options */
set_sysctl("net/ipv6/conf", ifp->ifname, "disable_ipv6", enable ? 0 : 1);
}
#endif
bool get_ipv6_forwarding(const interface_t* ifp)
{
return !!get_sysctl("net/ipv6/conf", ifp->ifname, "forwarding");
}