Blob Blame History Raw
/*
 * Soft:        Keepalived is a failover program for the LVS project
 *              <www.linuxvirtualserver.org>. It monitor & manipulate
 *              a loadbalanced server pool using multi-layer checks.
 *
 * Part:        NETLINK kernel command channel.
 *
 * Author:      Alexandre Cassen, <acassen@linux-vs.org>
 *
 *              This program is distributed in the hope that it will be useful,
 *              but WITHOUT ANY WARRANTY; without even the implied warranty of
 *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *              See the GNU General Public License for more details.
 *
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
 *
 * Copyright (C) 2001-2017 Alexandre Cassen, <acassen@gmail.com>
 */

#include "config.h"

/* global include */
#include <stdlib.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <string.h>
#include <errno.h>
#include <sys/uio.h>
#include <net/if_arp.h>
#include <arpa/inet.h>
#include <time.h>
#ifdef _WITH_VRRP_
#include <linux/version.h>
#ifdef _HAVE_FIB_ROUTING_
#include <linux/fib_rules.h>
#endif
#endif
#include <linux/ip.h>
#include <unistd.h>

#ifdef THREAD_DUMP
#include "scheduler.h"
#endif

/* local include */
#include "keepalived_netlink.h"
#ifdef _WITH_LVS_
#include "check_api.h"
#endif
#ifdef _WITH_VRRP_
#include "vrrp_scheduler.h"
#include "vrrp_track.h"
#include "vrrp_data.h"
#ifdef _HAVE_VRRP_VMAC_
#include "vrrp_vmac.h"
#endif
#ifdef _HAVE_FIB_ROUTING_
#include "vrrp_iproute.h"
#include "vrrp_iprule.h"
#endif
#endif
#include "logger.h"
#include "scheduler.h"
#include "utils.h"
#include "bitops.h"
#if !HAVE_DECL_SOCK_NONBLOCK
#include "old_socket.h"
#endif
#include "vrrp_ipaddress.h"
#include "global_data.h"

/* This seems a nasty hack, but it's what iproute2 does */
#ifndef SOL_NETLINK
#define SOL_NETLINK 270
#endif

/* Default values */
#define IF_DEFAULT_BUFSIZE	(64*1024)

/* Global vars */
nl_handle_t nl_cmd = { .fd = -1 };	/* Command channel */
#ifdef _WITH_VRRP_
int netlink_error_ignore;	/* If we get this error, ignore it */
#endif

/* Static vars */
static nl_handle_t nl_kernel = { .fd = -1 };	/* Kernel reflection channel */

#ifdef _NETLINK_TIMERS_
/* The maximum netlink command we use is RTM_DELRULE.
 * If that changes, the following definition will need changing. */
#define MAX_NETLINK_TIMER	RTM_DELRULE

static struct timeval netlink_times[MAX_NETLINK_TIMER+1];
static unsigned netlink_count[MAX_NETLINK_TIMER+1];
#ifdef _WITH_VRRP_
static struct timeval start_time, end_time;
#endif

bool do_netlink_timers;
#endif

#ifdef _NETLINK_TIMERS_
void
report_and_clear_netlink_timers(const char * str)
{
	int i;

	log_message(LOG_INFO, "Netlink timers - %s", str);
	for (i = 0; i <= MAX_NETLINK_TIMER; i++) {
		if (netlink_count[i]) {
			log_message(LOG_INFO, "  netlink cmd %d (%d calls), time %ld.%6.6ld", i, netlink_count[i], netlink_times[i].tv_sec, netlink_times[i].tv_usec);
			netlink_times[i].tv_sec = netlink_times[i].tv_usec = netlink_count[i] = 0;
		}
	}
}
#endif

static char *
get_nl_msg_type(unsigned type)
{
	switch (type) {
	case RTM_NEWLINK:
		return "RTM_NEWLINK";
		break;
	case RTM_DELLINK:
		return "RTM_DELLINK";
		break;
	case RTM_NEWADDR:
		return "RTM_NEWADDR";
		break;
	case RTM_DELADDR:
		return "RTM_DELADDR";
		break;
	case RTM_NEWROUTE:
		return "RTM_NEWROUTE";
		break;
	case RTM_DELROUTE:
		return "RTM_DELROUTE";
		break;
	case RTM_NEWRULE:
		return "RTM_NEWRULE";
		break;
	case RTM_DELRULE:
		return "RTM_DELRULE";
		break;
	case RTM_GETLINK:
		return "RTM_GETLINK";
		break;
	case RTM_GETADDR:
		return "RTM_GETADDR";
		break;
	}

	return "";
}
static inline bool
addr_is_equal(struct ifaddrmsg* ifa, void* addr, ip_address_t* vip_addr, interface_t *ifp)
{
	struct in_addr* sin_addr;
	struct in6_addr* sin6_addr;

	if (vip_addr->ifa.ifa_family != ifa->ifa_family)
		return false;
	if (vip_addr->ifp != ifp)
		return false;
	if (vip_addr->ifa.ifa_family == AF_INET) {
		sin_addr = (struct in_addr *)addr;
		return vip_addr->u.sin.sin_addr.s_addr == sin_addr->s_addr;
	}

	sin6_addr = (struct in6_addr*)addr;
	return vip_addr->u.sin6_addr.s6_addr32[0] == sin6_addr->s6_addr32[0] &&
	       vip_addr->u.sin6_addr.s6_addr32[1] == sin6_addr->s6_addr32[1] &&
	       vip_addr->u.sin6_addr.s6_addr32[2] == sin6_addr->s6_addr32[2] &&
	       vip_addr->u.sin6_addr.s6_addr32[3] == sin6_addr->s6_addr32[3];
}

#ifdef _WITH_VRRP_
static vrrp_t *
address_is_ours(struct ifaddrmsg* ifa, struct in_addr* addr, interface_t* ifp)
{
	element e, e1;
	tracking_vrrp_t* tvp;
	vrrp_t* vrrp;
	ip_address_t* vaddr;

	LIST_FOREACH(ifp->tracking_vrrp, tvp, e) {
		vrrp = tvp->vrrp;

		/* If we are not master, then we won't have the address configured */
		if (vrrp->state != VRRP_STATE_MAST)
			continue;

		if (ifa->ifa_family == vrrp->family) {
			LIST_FOREACH(vrrp->vip, vaddr, e1) {
				if (addr_is_equal(ifa, addr, vaddr, ifp))
					return vaddr->dont_track ? NULL : vrrp;
			}
		}

		LIST_FOREACH(vrrp->evip, vaddr, e1) {
			if (addr_is_equal(ifa, addr, vaddr, ifp))
				return vaddr->dont_track ? NULL : vrrp;
		}
	}

	return NULL;
}

#ifdef _HAVE_FIB_ROUTING_
static bool
compare_addr(int family, void *addr1, ip_address_t *addr2)
{
	union {
		struct in_addr *in;
		struct in6_addr *in6;
	} addr1_p = { .in = addr1 };

	if (family == AF_INET)
		return addr1_p.in->s_addr != addr2->u.sin.sin_addr.s_addr;

	return addr1_p.in6->s6_addr32[0] != addr2->u.sin6_addr.s6_addr32[0] ||
	       addr1_p.in6->s6_addr32[1] != addr2->u.sin6_addr.s6_addr32[1] ||
	       addr1_p.in6->s6_addr32[2] != addr2->u.sin6_addr.s6_addr32[2] ||
	       addr1_p.in6->s6_addr32[3] != addr2->u.sin6_addr.s6_addr32[3];
}

static ip_route_t *
route_is_ours(struct rtmsg* rt, struct rtattr *tb[RTA_MAX + 1], vrrp_t** ret_vrrp)
{
	uint32_t table;
	int family;
	int mask_len = rt->rtm_dst_len;
	uint32_t priority = 0;
	uint8_t tos = rt->rtm_tos;
	element e, e1;
	vrrp_t *vrrp;
	ip_route_t *route;
	union {
		struct in_addr in;
		struct in6_addr in6;
	} default_addr;

	*ret_vrrp = NULL;

	table = tb[RTA_TABLE] ? *(uint32_t *)RTA_DATA(tb[RTA_TABLE]) : rt->rtm_table;
	family = rt->rtm_family;
	if (tb[RTA_PRIORITY])
		priority = *(uint32_t *)RTA_DATA(tb[RTA_PRIORITY]);

	LIST_FOREACH(vrrp_data->vrrp, vrrp, e) {
		LIST_FOREACH(vrrp->vroutes, route, e1) {
			if (table != route->table ||
			    family != route->family ||
			    mask_len != route->dst->ifa.ifa_prefixlen ||
			    priority != route->metric ||
			    tos != route->tos)
				continue;

			if (route->oif) {
				if (!tb[RTA_OIF] || route->oif->ifindex != *(uint32_t *)RTA_DATA(tb[RTA_OIF]))
					continue;
			} else {
				if (route->set && route->configured_ifindex &&
				    (!tb[RTA_OIF] || route->configured_ifindex != *(uint32_t *)RTA_DATA(tb[RTA_OIF])))
					continue;
			}

			if (!tb[RTA_DST])
				memset(&default_addr, 0, sizeof(default_addr));

			if (compare_addr(family, tb[RTA_DST] ? RTA_DATA(tb[RTA_DST]) : &default_addr, route->dst))
				continue;

			*ret_vrrp = vrrp;
			return route;
		}
	}

	/* Now check the static routes */
	LIST_FOREACH(vrrp_data->static_routes, route, e) {
		if (table != route->table ||
		    family != route->family ||
		    mask_len != route->dst->ifa.ifa_prefixlen ||
		    tos != route->tos)
			continue;

		if (compare_addr(family, RTA_DATA(tb[RTA_DST]), route->dst))
			continue;

		return route;
	}

	return NULL;
}

static bool
compare_rule(struct fib_rule_hdr *frh, struct rtattr *tb[FRA_MAX + 1], ip_rule_t *rule)
{
	if (rule->dont_track)
		return false;

	if (rule->family != frh->family)
		return false;

	/* This is a very good descriminator, since our rules will always have a priority */
	if (!tb[FRA_PRIORITY] ||
	    rule->priority != *(uint32_t*)RTA_DATA(tb[FRA_PRIORITY]))
		return false;

	if (frh->action != rule->action)
		return false;

	if (frh->action == FR_ACT_GOTO &&
	    (!tb[FRA_GOTO] ||
	     *(uint32_t *)RTA_DATA(tb[FRA_GOTO]) != rule->goto_target))
		return false;

	if (tb[FRA_TABLE] && rule->table != *(uint32_t *)RTA_DATA(tb[FRA_TABLE]))
		return false;
	if (!tb[FRA_TABLE] && rule->table != frh->table)
		return false;

	if (!rule->invert != !((frh->flags & FIB_RULE_INVERT)))
		return false;

	if (!rule->from_addr != !tb[FRA_SRC])
		return false;
	if (rule->from_addr) {
		if (frh->src_len != rule->from_addr->ifa.ifa_prefixlen)
			return false;
		if (compare_addr(rule->family, RTA_DATA(tb[FRA_SRC]), rule->from_addr))
			return false;
	}

	if (!rule->to_addr != !tb[FRA_DST])
		return false;
	if (rule->to_addr) {
		if (frh->dst_len != rule->to_addr->ifa.ifa_prefixlen)
			return false;
		if (compare_addr(rule->family, RTA_DATA(tb[FRA_DST]), rule->to_addr))
			return false;
	}

	if (rule->tos != frh->tos)
		return false;

	if (!tb[FRA_FWMARK] != !(rule->mask & IPRULE_BIT_FWMARK))
		return false;
	if (rule->mask & IPRULE_BIT_FWMARK &&
	    *(uint32_t*)RTA_DATA(tb[FRA_FWMARK]) != rule->fwmark)
		return false;

	if (!tb[FRA_FWMASK] && (rule->mask & IPRULE_BIT_FWMASK))
		return false;
	if (rule->mask & IPRULE_BIT_FWMASK) {
		if (*(uint32_t*)RTA_DATA(tb[FRA_FWMASK]) != rule->fwmask)
			return false;
	}
	else if (tb[FRA_FWMASK]) {
		if (*(uint32_t *)RTA_DATA(tb[FRA_FWMASK]) != 0xffffffff)
			return false;
	}

	if (!tb[FRA_FLOW] != !rule->realms)
		return false;
	if (rule->realms &&
	    *(uint32_t*)RTA_DATA(tb[FRA_FLOW]) != rule->realms)
		return false;

#if HAVE_DECL_FRA_SUPPRESS_PREFIXLEN
	if (!tb[FRA_SUPPRESS_PREFIXLEN]) {
		if (rule->suppress_prefix_len != -1)
			return false;
	} else if (*(int32_t*)RTA_DATA(tb[FRA_SUPPRESS_PREFIXLEN]) != rule->suppress_prefix_len)
		return false;
#endif

#if HAVE_DECL_FRA_SUPPRESS_IFGROUP
	if (!tb[FRA_SUPPRESS_IFGROUP] != !(rule->mask & IPRULE_BIT_SUP_GROUP))
		return false;
	if (rule->mask & IPRULE_BIT_SUP_GROUP &&
	    *(uint32_t*)RTA_DATA(tb[FRA_SUPPRESS_IFGROUP]) != rule->suppress_group)
		return false;
#endif

	if (!tb[FRA_IFNAME] != !(rule->iif))
		return false;
	if (rule->iif &&
	    strcmp(RTA_DATA(tb[FRA_IFNAME]), rule->iif->ifname))
		return false;

#if HAVE_DECL_FRA_OIFNAME
	if (!tb[FRA_OIFNAME] != !(rule->oif))
		return false;
	if (rule->oif &&
	    strcmp(RTA_DATA(tb[FRA_OIFNAME]), rule->oif->ifname))
		return false;
#endif

#if HAVE_DECL_FRA_TUN_ID
	uint64_t tunnel_id;
	if (!tb[FRA_TUN_ID] != !(rule->tunnel_id))
		return false;
	if (rule->tunnel_id) {
		tunnel_id = be64toh(*(uint64_t *)RTA_DATA(tb[FRA_TUN_ID]));
		if (tunnel_id != rule->tunnel_id)
			return false;
	}
#endif

#if HAVE_DECL_FRA_UID_RANGE
	if (!tb[FRA_UID_RANGE] != !(rule->mask & IPRULE_BIT_UID_RANGE))
		return false;
	if ((rule->mask & IPRULE_BIT_UID_RANGE) &&
	    memcmp(RTA_DATA(tb[FRA_UID_RANGE]), &rule->uid_range, sizeof rule->uid_range))
		return false;
#endif

#if HAVE_DECL_FRA_L3MDEV
	if (!tb[FRA_L3MDEV] && rule->l3mdev)
		return false;
	if (tb[FRA_L3MDEV] &&
	    *(uint8_t *)RTA_DATA(tb[FRA_L3MDEV]) != rule->l3mdev)
		return false;
#endif

#if HAVE_DECL_FRA_IP_PROTO
	if (!tb[FRA_IP_PROTO] != !(rule->mask & IPRULE_BIT_IP_PROTO))
		return false;
	if (rule->mask & IPRULE_BIT_IP_PROTO &&
	    *(uint8_t *)RTA_DATA(tb[FRA_IP_PROTO]) != rule->ip_proto)
		return false;
#endif

#if HAVE_DECL_FRA_SPORT_RANGE
	if (!tb[FRA_SPORT_RANGE] != !(rule->mask & IPRULE_BIT_SPORT_RANGE))
		return false;
	if (rule->mask & IPRULE_BIT_SPORT_RANGE &&
	    memcmp(RTA_DATA(tb[FRA_SPORT_RANGE]), &rule->src_port, sizeof rule->src_port))
		return false;
#endif

#if HAVE_DECL_FRA_DPORT_RANGE
	if (!tb[FRA_DPORT_RANGE] != !(rule->mask & IPRULE_BIT_DPORT_RANGE))
		return false;
	if (rule->mask & IPRULE_BIT_DPORT_RANGE &&
	    memcmp(RTA_DATA(tb[FRA_DPORT_RANGE]), &rule->dst_port, sizeof rule->dst_port))
		return false;
#endif

	return true;
}

static ip_rule_t *
rule_is_ours(struct fib_rule_hdr* frh, struct rtattr *tb[FRA_MAX + 1], vrrp_t **ret_vrrp)
{
	element e, e1;
	vrrp_t *vrrp;
	ip_rule_t *rule;

	*ret_vrrp = NULL;

	LIST_FOREACH(vrrp_data->vrrp, vrrp, e) {
		LIST_FOREACH(vrrp->vrules, rule, e1) {
			if (compare_rule(frh, tb, rule)) {
				*ret_vrrp = vrrp;
				return rule;
			}
		}
	}

	LIST_FOREACH(vrrp_data->static_rules, rule, e) {
		if (compare_rule(frh, tb, rule))
			return rule;
	}

	return NULL;
}
#endif
#endif

/* Update the netlink socket receive buffer sizes */
static int
netlink_set_rx_buf_size(nl_handle_t *nl, unsigned rcvbuf_size, bool force)
{
	int ret;

	if (!rcvbuf_size)
		rcvbuf_size = IF_DEFAULT_BUFSIZE;

	/* Set rcvbuf size */
	if (force) {
		if ((ret = setsockopt(nl->fd, SOL_SOCKET, SO_RCVBUFFORCE, &rcvbuf_size, sizeof(rcvbuf_size))) < 0)
			log_message(LOG_INFO, "cant set SO_RCVBUFFORCE IP option. errno=%d (%m)", errno);
	} else {
		if ((ret = setsockopt(nl->fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(rcvbuf_size))) < 0)
			log_message(LOG_INFO, "Cannot set SO_RCVBUF IP option. errno=%d (%m)", errno);
	}

	return ret;
}

#ifdef _HAVE_FIB_ROUTING_
static void
kernel_netlink_set_membership(int group, bool add)
{
	if (setsockopt(nl_kernel.fd, SOL_NETLINK, add ? NETLINK_ADD_MEMBERSHIP : NETLINK_DROP_MEMBERSHIP,
			&group, sizeof(group)) < 0)
		log_message(LOG_INFO, "Netlink: Cannot add membership on netlink socket : (%s)", strerror(errno));
}

void
set_extra_netlink_monitoring(bool ipv4_routes, bool ipv6_routes, bool ipv4_rules, bool ipv6_rules)
{
	kernel_netlink_set_membership(RTNLGRP_IPV4_ROUTE, ipv4_routes);
	kernel_netlink_set_membership(RTNLGRP_IPV6_ROUTE, ipv6_routes);
	kernel_netlink_set_membership(RTNLGRP_IPV4_RULE, ipv4_rules);
	kernel_netlink_set_membership(RTNLGRP_IPV6_RULE, ipv6_rules);
}
#endif

/* Create a socket to netlink interface_t */
static int
netlink_socket(nl_handle_t *nl, unsigned rcvbuf_size, bool force, int flags, int group, ...)
{
	int ret;
	va_list gp;

	memset(nl, 0, sizeof (*nl));

	socklen_t addr_len;
	struct sockaddr_nl snl;
	int sock_flags = flags;
#if !HAVE_DECL_SOCK_NONBLOCK
	sock_flags &= ~SOCK_NONBLOCK;
#endif

	nl->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC | sock_flags, NETLINK_ROUTE);
	if (nl->fd < 0) {
		log_message(LOG_INFO, "Netlink: Cannot open netlink socket : (%s)",
		       strerror(errno));
		return -1;
	}

#if !HAVE_DECL_SOCK_NONBLOCK
	if ((flags & SOCK_NONBLOCK) &&
	    set_sock_flags(nl->fd, F_SETFL, O_NONBLOCK))
		return -1;
#endif

	memset(&snl, 0, sizeof (snl));
	snl.nl_family = AF_NETLINK;

	ret = bind(nl->fd, (struct sockaddr *) &snl, sizeof (snl));
	if (ret < 0) {
		log_message(LOG_INFO, "Netlink: Cannot bind netlink socket : (%s)",
		       strerror(errno));
		close(nl->fd);
		nl->fd = -1;
		return -1;
	}

	/* Join the requested groups */
	va_start(gp, group);
	while (group != 0) {
		if (group < 0) {
			va_end(gp);
			return -1;
		}

		ret = setsockopt(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group));
		if (ret < 0) {
			log_message(LOG_INFO, "Netlink: Cannot add membership on netlink socket : (%s)",
			       strerror(errno));
			va_end(gp);
			return -1;
		}

		group = va_arg(gp,int);
	}
	va_end(gp);

	addr_len = sizeof (snl);
	ret = getsockname(nl->fd, (struct sockaddr *) &snl, &addr_len);
	if (ret < 0 || addr_len != sizeof (snl)) {
		log_message(LOG_INFO, "Netlink: Cannot getsockname : (%s)",
		       strerror(errno));
		close(nl->fd);
		return -1;
	}

	if (snl.nl_family != AF_NETLINK) {
		log_message(LOG_INFO, "Netlink: Wrong address family %d",
		       snl.nl_family);
		close(nl->fd);
		return -1;
	}

	/* Save the port id for checking message source later */
	nl->nl_pid = snl.nl_pid;


#ifdef _INCLUDE_UNUSED_CODE_
	/* There appears to be a kernel bug that manifests itself when we have a large number
	 * of VMAC interfaces to add (i.e. 200 or more). After approx 200 interfaces have been
	 * added the kernel will return ENOBUFS on the nl_kernel socket, and then repeat the
	 * first 30 or so RTM_NEWLINK messages, omitting the first one. Then, at the end of
	 * creating all the interfaces, i.e. after a slight delay with no new messages,
	 * we get another ENOBUFS and all the RTM_NEWLINK messages from the time of the
	 * first ENOBUFS message repeated.
	 *
	 * This problem also happens if the system already has a large (e.g. 200 or more)
	 * number of interfaces configured before keepalived starts.
	 *
	 * This problem feels as though a circular buffer is wrapping around, and causes
	 * all the old messages in the buffer to be resent, but the first one is omitted.
	 * Note that it is only the interfaces that keepalived creates that are resent,
	 * not interfaces that already existed on the system before keepalived starts.
	 *
	 * We can also get ENOBUFS on the nl_cmd socket if the NLM_F_ECHO flag is set as well as
	 * the NLM_F_ACK flag when a command is sent on the nl_cmd socket.
	 *
	 * It appears that this must be a kernel bug, since when it happens on interface creation,
	 * if we are also running `ip -ts monitor link addr route`, i.e. the same as the nl_kernel
	 * socket, then precisely the same messages are repeated (provided we have set the
	 * vrrp_netlink_cmd_rcv_bufs global configuration option to 1048576 (1024k) to match what
	 * ip monitor does).
	 *
	 * NETLINK_NO_ENOBUFS was introduced in Linux 2.6.30
	 */
	int one = 1;
	if ((ret = setsockopt(nl->fd, SOL_NETLINK, NETLINK_NO_ENOBUFS, &one, sizeof(one))) < 0)
		log_message(LOG_INFO, "Cannot set NETLINK_NO_ENOBUFS option. errno=%d (%m)", errno);
#endif

	nl->seq = (uint32_t)time(NULL);

	if (nl->fd < 0)
		return -1;

	return netlink_set_rx_buf_size(nl, rcvbuf_size, force);
}

/* Close a netlink socket */
static void
netlink_close(nl_handle_t *nl)
{
	if (!nl)
		return;

	/* First of all release pending thread. There is no thread
	 * for nl_cmd since it is used synchronously. */
	if (nl->thread) {
		thread_cancel(nl->thread);
		nl->thread = NULL;
	}

	if (nl->fd != -1)
		close(nl->fd);

	nl->fd = -1;
}

/* iproute2 utility function */
int
addattr_l(struct nlmsghdr *n, size_t maxlen, unsigned short type, void *data, size_t alen)
{
	size_t len = RTA_LENGTH(alen);
	size_t align_len = NLMSG_ALIGN(len);
	struct rtattr *rta;

	if (n->nlmsg_len + align_len > maxlen)
		return -1;

	rta = (struct rtattr *) (((char *) n) + n->nlmsg_len);
	rta->rta_type = type;
	rta->rta_len = (unsigned short)len;
	memcpy(RTA_DATA(rta), data, alen);
	n->nlmsg_len += (uint32_t)align_len;

	return 0;
}

#ifdef _WITH_VRRP_
int
addattr8(struct nlmsghdr *n, size_t maxlen, unsigned short type, uint8_t data)
{
	return addattr_l(n, maxlen, type, &data, sizeof data);
}
#endif

int
addattr32(struct nlmsghdr *n, size_t maxlen, unsigned short type, uint32_t data)
{
	return addattr_l(n, maxlen, type, &data, sizeof data);
}

#ifdef _WITH_VRRP_
int
addattr64(struct nlmsghdr *n, size_t maxlen, unsigned short type, uint64_t data)
{
	return addattr_l(n, maxlen, type, &data, sizeof(data));
}

int
addattr_l2(struct nlmsghdr *n, size_t maxlen, unsigned short type, void *data, size_t alen, void *data2, size_t alen2)
{
	size_t len = RTA_LENGTH(alen + alen2);
	size_t align_len = NLMSG_ALIGN(len);
	struct rtattr *rta;

	if (n->nlmsg_len + align_len > maxlen)
		return -1;

	rta = (struct rtattr *) (((char *) n) + n->nlmsg_len);
	rta->rta_type = type;
	rta->rta_len = (unsigned short)len;
	memcpy(RTA_DATA(rta), data, alen);
	memcpy(RTA_DATA(rta) + alen, data2, alen2);
	n->nlmsg_len += (uint32_t)align_len;

	return 0;
}

int
addraw_l(struct nlmsghdr *n, size_t maxlen, const void *data, size_t len)
{
	size_t align_len = NLMSG_ALIGN(len);

	if (n->nlmsg_len + align_len > maxlen)
		return -1;

	memcpy(NLMSG_TAIL(n), data, len);
	memset((void *) NLMSG_TAIL(n) + len, 0, align_len - len);
	n->nlmsg_len += (uint32_t)align_len;
	return 0;
}

size_t
rta_addattr_l(struct rtattr *rta, size_t maxlen, unsigned short type,
		  const void *data, size_t alen)
{
	struct rtattr *subrta;
	size_t len = RTA_LENGTH(alen);
	size_t align_len = RTA_ALIGN(len);

	if (rta->rta_len + align_len > maxlen)
		return 0;

	subrta = (struct rtattr*)(((char*)rta) + rta->rta_len);
	subrta->rta_type = type;
	subrta->rta_len = (unsigned short)len;
	memcpy(RTA_DATA(subrta), data, alen);
	rta->rta_len = (unsigned short)(rta->rta_len + align_len);
	return align_len;
}

size_t
rta_addattr_l2(struct rtattr *rta, size_t maxlen, unsigned short type,
		  const void *data, size_t alen,
		  const void *data2, size_t alen2)
{
	struct rtattr *subrta;
	size_t len = RTA_LENGTH(alen + alen2);
	size_t align_len = RTA_ALIGN(len);

	if (rta->rta_len + align_len > maxlen)
		return 0;

	subrta = (struct rtattr*)(((char*)rta) + rta->rta_len);
	subrta->rta_type = type;
	subrta->rta_len = (unsigned short)len;
	memcpy(RTA_DATA(subrta), data, alen);
	memcpy(RTA_DATA(subrta) + alen, data2, alen2);
	rta->rta_len = (unsigned short)(rta->rta_len + align_len);
	return align_len;
}

size_t
rta_addattr64(struct rtattr *rta, size_t maxlen, unsigned short type, uint64_t data)
{
	return rta_addattr_l(rta, maxlen, type, &data, sizeof data);
}

size_t
rta_addattr32(struct rtattr *rta, size_t maxlen, unsigned short type, uint32_t data)
{
	struct rtattr *subrta;
	size_t len = RTA_LENGTH(sizeof data);
	size_t align_len = RTA_ALIGN(len);

	if (rta->rta_len + align_len > maxlen)
		return 0;

	subrta = (struct rtattr*)(((char*)rta) + rta->rta_len);
	subrta->rta_type = type;
	subrta->rta_len = (unsigned short)len;
	memcpy(RTA_DATA(subrta), &data, sizeof data);
	rta->rta_len = (unsigned short)(rta->rta_len + align_len);
	return align_len;
}

size_t
rta_addattr16(struct rtattr *rta, size_t maxlen, unsigned short type, uint16_t data)
{
	return rta_addattr_l(rta, maxlen, type, &data, sizeof data);
}

size_t
rta_addattr8(struct rtattr *rta, size_t maxlen, unsigned short type, uint8_t data)
{
	return rta_addattr_l(rta, maxlen, type, &data, sizeof data);
}

struct rtattr *
rta_nest(struct rtattr *rta, size_t maxlen, unsigned short type)
{
	struct rtattr *nest = RTA_TAIL(rta);

	rta_addattr_l(rta, maxlen, type, NULL, 0);

	return nest;
}

size_t
rta_nest_end(struct rtattr *rta, struct rtattr *nest)
{
	nest->rta_len = (unsigned short)((void *)RTA_TAIL(rta) - (void *)nest);

	return rta->rta_len;
}
#endif

static inline __u8 rta_getattr_u8(const struct rtattr *rta)
{
	return *(__u8 *)RTA_DATA(rta);
}

static void
parse_rtattr(struct rtattr **tb, int max, struct rtattr *rta, size_t len)
{
	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));

	while (RTA_OK(rta, len)) {
		if (rta->rta_type <= max)
			tb[rta->rta_type] = rta;
		rta = RTA_NEXT(rta, len);
	}
}

#ifdef _WITH_VRRP_
static void
parse_rtattr_nested(struct rtattr **tb, int max, struct rtattr *rta)
{
	parse_rtattr(tb, max, RTA_DATA(rta), RTA_PAYLOAD(rta));
}

static void
set_vrrp_backup(vrrp_t *vrrp)
{
	vrrp_t *sync_vrrp;
	element e;

	vrrp->wantstate = VRRP_STATE_BACK;
	vrrp_state_leave_master(vrrp, true);
	if (vrrp->sync) {
		LIST_FOREACH(vrrp->sync->vrrp_instances, sync_vrrp, e) {
			if (sync_vrrp->state == VRRP_STATE_MAST) {
				sync_vrrp->wantstate = VRRP_STATE_BACK;
				vrrp_state_leave_master(sync_vrrp, true);

				/* We want a quick transition back to master */
				sync_vrrp->ms_down_timer = VRRP_TIMER_SKEW(sync_vrrp);
				vrrp_init_instance_sands(sync_vrrp);
				vrrp_thread_requeue_read(sync_vrrp);
			}
		}
		vrrp->sync->state = VRRP_STATE_BACK;
	}

	/* We want a quick transition back to master */
	vrrp->ms_down_timer = VRRP_TIMER_SKEW(vrrp);
	vrrp_init_instance_sands(vrrp);
	vrrp_thread_requeue_read(vrrp);
}
#endif

/*
 * Netlink interface address lookup filter
 * We need to handle multiple primary address and
 * multiple secondary address to the same interface.
 * We also need to handle the same address on
 * multiple interfaces, for IPv6 link local addresses.
 */
static int
netlink_if_address_filter(__attribute__((unused)) struct sockaddr_nl *snl, struct nlmsghdr *h)
{
	struct ifaddrmsg *ifa;
	struct rtattr *tb[IFA_MAX + 1];
#ifdef _WITH_VRRP_
	interface_t *ifp;
	ip_address_t *ipaddr;
#endif
	size_t len;
	union {
		void *addr;
		struct in_addr *in;
		struct in6_addr *in6;
	} addr;
#ifdef _WITH_VRRP_
	char addr_str[INET6_ADDRSTRLEN];
	bool addr_chg = false;
	element e;
	vrrp_t *vrrp;
	vrrp_t *address_vrrp;
	tracking_vrrp_t *tvp;
	bool is_tracking_saddr;
#endif

	if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR)
		return 0;

	if (h->nlmsg_len < NLMSG_LENGTH(sizeof (struct ifaddrmsg)))
		return -1;

	ifa = NLMSG_DATA(h);

	/* Only IPv4 and IPv6 are valid for us */
	if (ifa->ifa_family != AF_INET && ifa->ifa_family != AF_INET6)
		return 0;

	len = h->nlmsg_len - NLMSG_LENGTH(sizeof (struct ifaddrmsg));

	parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), len);

	if (tb[IFA_LOCAL] == NULL)
		tb[IFA_LOCAL] = tb[IFA_ADDRESS];
	if (tb[IFA_ADDRESS] == NULL)
		tb[IFA_ADDRESS] = tb[IFA_LOCAL];

	/* local interface address */
	addr.addr = (tb[IFA_LOCAL] ? RTA_DATA(tb[IFA_LOCAL]) : NULL);

	if (addr.addr == NULL)
		return -1;

#ifdef _WITH_VRRP_
#ifndef _DEBUG_
	if (prog_type == PROG_TYPE_VRRP || __test_bit(CONFIG_TEST_BIT, &debug))
#endif
	{
		/* Fetch interface_t */
		ifp = if_get_by_ifindex(ifa->ifa_index);
		if (!ifp)
			return 0;

// ?? Only interested in link-local for IPv6 unless unicast
// we take address from vrrp->ifp->base_ifp, unless we have made an IPv6 address
// do we want to set a flag to say it is a generated link local address (or set saddr and track_saddr, but not saddr_from_config)
// or can we just compare address to vrrp->ifp->base_ifp address.
// We still need to consider non-vmac IPv6 if interface doesn't have a
// link local address.
		if (h->nlmsg_type == RTM_NEWADDR) {
			/* If no address is set on interface then set the first time */
// TODO if saddr from config && track saddr, addresses must match
			if (ifa->ifa_family == AF_INET) {
				if (!ifp->sin_addr.s_addr) {
					ifp->sin_addr = *addr.in;
					if (!LIST_ISEMPTY(ifp->tracking_vrrp))
						addr_chg = true;
				}
			} else {
// TODO  might not be link local if configured address
				if (ifa->ifa_scope == RT_SCOPE_LINK) {
					if (!ifp->sin6_addr.s6_addr32[0]) {
						ifp->sin6_addr = *addr.in6;
						if (!LIST_ISEMPTY(ifp->tracking_vrrp))
							addr_chg = true;
					}
#if defined _HAVE_VRRP_VMAC_ && !HAVE_DECL_IFLA_INET6_ADDR_GEN_MODE
					else if (ifp->vmac_type) {
						/* We already have an address; is this an auto generated link local address?
						 * For some reason if we recreate the VMAC when the underlying interface is
						 * recreated, deleting the autogenerated address doesn't get rid of the address */
						remove_vmac_auto_gen_addr(ifp, addr.in6);
					}
#endif
				}
			}

			if (addr_chg) {
				if (__test_bit(LOG_DETAIL_BIT, &debug)) {
					inet_ntop(ifa->ifa_family, addr.addr, addr_str, sizeof(addr_str));
					log_message(LOG_INFO, "Assigned address %s for interface %s"
							    , addr_str, ifp->ifname);
				}

				/* Now see if any vrrp instances were missing an interface address
				 * and see if they can be brought up */
				LIST_FOREACH(ifp->tracking_vrrp, tvp, e) {
					vrrp = tvp->vrrp;

					is_tracking_saddr = false;
					if (vrrp->track_saddr) {
						if (vrrp->family == ifa->ifa_family)
							is_tracking_saddr = inaddr_equal(ifa->ifa_family, &vrrp->saddr, addr.addr);
					}

					if (ifp == (vrrp->family == AF_INET ? VRRP_CONFIGURED_IFP(vrrp) : vrrp->ifp) &&
					    vrrp->num_script_if_fault &&
					    vrrp->family == ifa->ifa_family &&
					    vrrp->saddr.ss_family == AF_UNSPEC &&
					    (!vrrp->saddr_from_config || is_tracking_saddr)) {
						/* Copy the address */
						if (ifa->ifa_family == AF_INET)
							inet_ip4tosockaddr(addr.in, &vrrp->saddr);
						else
							inet_ip6tosockaddr(addr.in6, &vrrp->saddr);
						try_up_instance(vrrp, false);
					}
#ifdef _HAVE_VRRP_VMAC_
					// If IPv6 link local and vmac doesn't have an address, add it to the vmac
					else if (vrrp->family == AF_INET6 &&
						 ifp == vrrp->ifp->base_ifp &&
						 vrrp->ifp->vmac_type &&
						 !__test_bit(VRRP_VMAC_XMITBASE_BIT, &vrrp->vmac_flags) &&
						 vrrp->num_script_if_fault &&
						 vrrp->family == ifa->ifa_family &&
						 vrrp->saddr.ss_family == AF_UNSPEC &&
						 (!vrrp->saddr_from_config || is_tracking_saddr)) {
						if (add_link_local_address(vrrp->ifp, addr.in6)) {
							inet_ip6tosockaddr(addr.in6, &vrrp->saddr);
							try_up_instance(vrrp, false);
						}
					}
#endif
				}
			}
		} else {
			/* Mark the address as needing to go. We can't delete the address
			 * until after down_instance is called, since it sends a prio 0 message */
			if (ifa->ifa_family == AF_INET) {
				if (ifp->sin_addr.s_addr == addr.in->s_addr)
					addr_chg = true;
			}
			else {
				if (ifp->sin6_addr.s6_addr32[0] == addr.in6->s6_addr32[0] &&
				    ifp->sin6_addr.s6_addr32[1] == addr.in6->s6_addr32[1] &&
				    ifp->sin6_addr.s6_addr32[2] == addr.in6->s6_addr32[2] &&
				    ifp->sin6_addr.s6_addr32[3] == addr.in6->s6_addr32[3])
					addr_chg = true;
			}

			if (addr_chg && !LIST_ISEMPTY(ifp->tracking_vrrp)) {
				if (__test_bit(LOG_DETAIL_BIT, &debug)) {
					inet_ntop(ifa->ifa_family, addr.addr, addr_str, sizeof(addr_str));
					log_message(LOG_INFO, "Deassigned address %s from interface %s"
							    , addr_str, ifp->ifname);
				}

				/* See if any vrrp instances need to be downed */
				LIST_FOREACH(ifp->tracking_vrrp, tvp, e) {
					vrrp = tvp->vrrp;

					is_tracking_saddr = false;
					if (vrrp->track_saddr) {
						if (vrrp->family == ifa->ifa_family)
							is_tracking_saddr = inaddr_equal(ifa->ifa_family, &vrrp->saddr, addr.addr);
					}
#ifdef _HAVE_VRRP_VMAC_
					/* If we are a VMAC and took this address from the parent interface, we need to
					 * release the address and create one for ourself */
					if (ifa->ifa_family == AF_INET6 &&
					    __test_bit(VRRP_VMAC_BIT, &vrrp->vmac_flags) &&
					    ifp == vrrp->ifp->base_ifp &&
					    ifa->ifa_scope == RT_SCOPE_LINK &&
					    !__test_bit(VRRP_VMAC_XMITBASE_BIT, &vrrp->vmac_flags) &&
					    !vrrp->saddr_from_config &&
					    vrrp->ifp->base_ifp->sin6_addr.s6_addr32[0] == addr.in6->s6_addr32[0] &&
					    vrrp->ifp->base_ifp->sin6_addr.s6_addr32[1] == addr.in6->s6_addr32[1] &&
					    vrrp->ifp->base_ifp->sin6_addr.s6_addr32[2] == addr.in6->s6_addr32[2] &&
					    vrrp->ifp->base_ifp->sin6_addr.s6_addr32[3] == addr.in6->s6_addr32[3]) {
						if (IF_ISUP(ifp) && replace_link_local_address(vrrp->ifp))
							inet_ip6tosockaddr(&vrrp->ifp->sin6_addr, &vrrp->saddr);
						else if (IF_ISUP(ifp)) {
							/* We failed to add an address, so down the instance */
							down_instance(vrrp);
							vrrp->saddr.ss_family = AF_UNSPEC;
						}
					}
					else
#endif
					     if (ifp == (vrrp->family == AF_INET ? VRRP_CONFIGURED_IFP(vrrp) : vrrp->ifp) &&
						 vrrp->family == ifa->ifa_family &&
						 vrrp->saddr.ss_family != AF_UNSPEC &&
						 (!vrrp->saddr_from_config || is_tracking_saddr)) {
/* There might be another address available. Either send a netlink request for current addresses, or we keep a list */
						down_instance(vrrp);
						vrrp->saddr.ss_family = AF_UNSPEC;
					}
				}
			}

			if (addr_chg) {
				/* Now we can remove the address */
				if (ifa->ifa_family == AF_INET)
					ifp->sin_addr.s_addr = 0;
				else
					ifp->sin6_addr.s6_addr32[0] = 0;
			}
		}

		if (!addr_chg || LIST_ISEMPTY(ifp->tracking_vrrp)) {
			if (h->nlmsg_type == RTM_DELADDR)
				address_vrrp = address_is_ours(ifa, addr.addr, ifp);
			else
				address_vrrp = NULL;

			/* Display netlink operation */
			if (
#ifdef _WITH_LVS_
			    __test_bit(LOG_ADDRESS_CHANGES, &debug) ||
#endif
			    (__test_bit(LOG_DETAIL_BIT, &debug) && address_vrrp)) {
				inet_ntop(ifa->ifa_family, addr.addr, addr_str, sizeof(addr_str));
				log_message(LOG_INFO, "Netlink reflector reports IP %s %s %s"
						    , addr_str, h->nlmsg_type == RTM_NEWADDR ? "added to" : "removed from", ifp->ifname);
			}

			/* If one of our VIPs/eVIPs has been deleted, transition to backup */
			if (address_vrrp && address_vrrp->state == VRRP_STATE_MAST) {
				set_vrrp_backup(address_vrrp);
			}
		}

		if (h->nlmsg_type == RTM_DELADDR) {
			/* Check if a static address has been deleted */
			LIST_FOREACH(vrrp_data->static_addresses, ipaddr, e) {
				if (!ipaddr->dont_track && addr_is_equal(ifa, addr.addr, ipaddr, ifp)) {
					reinstate_static_address(ipaddr);
					break;
				}
			}
		}
	}
#endif

#ifdef _WITH_LVS_
#ifndef _DEBUG_
	if (prog_type == PROG_TYPE_CHECKER)
#endif
	{
		/* Refresh checkers state */
		update_checker_activity(ifa->ifa_family, addr.addr,
					(h->nlmsg_type == RTM_NEWADDR));
	}
#endif

	return 0;
}

/* Our netlink parser */
static int
netlink_parse_info(int (*filter) (struct sockaddr_nl *, struct nlmsghdr *),
		   nl_handle_t *nl, struct nlmsghdr *n, bool read_all)
{
	ssize_t len;
	int ret = 0;
	int error;
	char *nlmsg_buf = NULL;
	int nlmsg_buf_size = 0;

	while (true) {
		struct iovec iov = {
			.iov_len = 0
		};
		struct sockaddr_nl snl;
		struct msghdr msg = {
			.msg_name = &snl,
			.msg_namelen = sizeof(snl),
			.msg_iov = &iov,
			.msg_iovlen = 1,
			.msg_control = NULL,
			.msg_controllen = 0,
			.msg_flags = 0
		};
		struct nlmsghdr *h;

		/* Find out how big our receive buffer needs to be */
		do {
			len = recvmsg(nl->fd, &msg, MSG_PEEK | MSG_TRUNC);
		} while (len < 0 && errno == EINTR);

		if (len < 0) {
			ret = -1;
			break;
		}

		if (len > nlmsg_buf_size) {
			FREE_PTR(nlmsg_buf);
			nlmsg_buf = MALLOC(len);
			nlmsg_buf_size = len;
		}

		iov.iov_base = nlmsg_buf;
		iov.iov_len = nlmsg_buf_size;

		do {
			len = recvmsg(nl->fd, &msg, 0);
		} while (len < 0 && errno == EINTR);

		if (len < 0) {
			if (errno == EWOULDBLOCK || errno == EAGAIN)
				break;
			if (errno == ENOBUFS) {
				log_message(LOG_INFO, "Netlink: Receive buffer overrun on %s socket - (%m)", nl == &nl_kernel ? "monitor" : "cmd");
				log_message(LOG_INFO, "  - increase the relevant netlink_rcv_bufs global parameter and/or set force");
			}
			else
				log_message(LOG_INFO, "Netlink: recvmsg error on %s socket  - %d (%m)", nl == &nl_kernel ? "monitor" : "cmd", errno);
			continue;
		}

		if (len == 0) {
			log_message(LOG_INFO, "Netlink: EOF");
			ret = -1;
			break;
		}

		if (msg.msg_namelen != sizeof snl) {
			log_message(LOG_INFO,
			       "Netlink: Sender address length error: length %d",
			       msg.msg_namelen);
			ret = -1;
			break;
		}

		for (h = (struct nlmsghdr *) nlmsg_buf; NLMSG_OK(h, (size_t)len); h = NLMSG_NEXT(h, len)) {
			/* Finish off reading. */
			if (h->nlmsg_type == NLMSG_DONE) {
				FREE(nlmsg_buf);
				return ret;
			}

			/* Error handling. */
			if (h->nlmsg_type == NLMSG_ERROR) {
				struct nlmsgerr *err = (struct nlmsgerr *) NLMSG_DATA(h);

				/*
				 * If error == 0 then this is a netlink ACK.
				 * return if not related to multipart message.
				 */
				if (err->error == 0) {
					if (!(h->nlmsg_flags & NLM_F_MULTI) && !read_all) {
						FREE(nlmsg_buf);
						return 0;
					}
					continue;
				}

				if (h->nlmsg_len < NLMSG_LENGTH(sizeof (struct nlmsgerr))) {
					log_message(LOG_INFO,
					       "Netlink: error: message truncated");
					FREE(nlmsg_buf);
					return -1;
				}

				if (n && (err->error == -EEXIST) &&
				    ((n->nlmsg_type == RTM_NEWROUTE) ||
				     (n->nlmsg_type == RTM_NEWADDR))) {
					FREE(nlmsg_buf);
					return 0;
				}

				/* If have more than one IPv4 address in the same CIDR
				 * and the "primary" address is removed, unless promote_secondaries
				 * is configured on the interface, all the "secondary" addresses
				 * in the same CIDR are deleted */
				if (n && err->error == -EADDRNOTAVAIL &&
				    n->nlmsg_type == RTM_DELADDR) {
					if (!(h->nlmsg_flags & NLM_F_MULTI)) {
						FREE(nlmsg_buf);
						return 0;
					}
					continue;
				}
#ifdef _WITH_VRRP_
				if (netlink_error_ignore != -err->error)
#endif
					log_message(LOG_INFO,
					       "Netlink: error: %s, type=%s(%u), seq=%u, pid=%d",
					       strerror(-err->error),
					       get_nl_msg_type(err->msg.nlmsg_type), err->msg.nlmsg_type,
					       err->msg.nlmsg_seq, err->msg.nlmsg_pid);

				FREE(nlmsg_buf);
				return -1;
			}

#ifdef _WITH_VRRP_
			/* Skip unsolicited messages from cmd channel */
			if (
#ifndef _DEBUG_
			    prog_type == PROG_TYPE_VRRP &&
#endif
			    h->nlmsg_type != RTM_NEWLINK &&
			    h->nlmsg_type != RTM_DELLINK &&
			    h->nlmsg_type != RTM_NEWROUTE &&
			    nl != &nl_cmd && h->nlmsg_pid == nl_cmd.nl_pid)
				continue;
#endif

			error = (*filter) (&snl, h);
			if (error < 0) {
				log_message(LOG_INFO, "Netlink: filter function error");
				ret = error;
			}

			if (!(h->nlmsg_flags & NLM_F_MULTI) && !read_all) {
				FREE(nlmsg_buf);
				return ret;
			}
		}

		/* After error care. */
		if (msg.msg_flags & MSG_TRUNC) {
			log_message(LOG_INFO, "Netlink: error: message truncated");
			continue;
		}
		if (len) {
			log_message(LOG_INFO, "Netlink: error: data remnant size %zd",
			       len);

			ret = -1;
			break;
		}
	}

	if (nlmsg_buf)
		FREE(nlmsg_buf);

	return ret;
}

#ifdef _WITH_VRRP_
/* Out talk filter */
static int
netlink_talk_filter(__attribute__((unused)) struct sockaddr_nl *snl, struct nlmsghdr *h)
{
	log_message(LOG_INFO, "Netlink: ignoring message type 0x%04x", h->nlmsg_type);

	return 0;
}

/* send message to netlink kernel socket, then receive response */
ssize_t
netlink_talk(nl_handle_t *nl, struct nlmsghdr *n)
{
	ssize_t status;
	struct sockaddr_nl snl;
	struct iovec iov = {
		.iov_base = n,
		.iov_len = n->nlmsg_len
	};
	struct msghdr msg = {
		.msg_name = &snl,
		.msg_namelen = sizeof(snl),
		.msg_iov = &iov,
		.msg_iovlen = 1,
		.msg_control = NULL,
		.msg_controllen = 0,
		.msg_flags = 0
	};

	memset(&snl, 0, sizeof snl);
	snl.nl_family = AF_NETLINK;

	n->nlmsg_seq = ++nl->seq;

	/* Request Netlink acknowledgement */
	n->nlmsg_flags |= NLM_F_ACK;

#ifdef _NETLINK_TIMERS_
	gettimeofday(&start_time, NULL);
#endif

	/* Send message to netlink interface. */
	status = sendmsg(nl->fd, &msg, 0);
	if (status < 0) {
		log_message(LOG_INFO, "Netlink: sendmsg(%d) cmd %d error: %s", nl->fd, n->nlmsg_type,
		       strerror(errno));
		return -1;
	}

	status = netlink_parse_info(netlink_talk_filter, nl, n, false);

#ifdef _NETLINK_TIMERS_
	/* Special case for NEWLINK - treat create separately; it is also used to up an interface etc. */
	int index = n->nlmsg_type == RTM_NEWLINK && (n->nlmsg_flags & NLM_F_CREATE) ? 0 : n->nlmsg_type;
	gettimeofday(&end_time, NULL);
	if (index <= MAX_NETLINK_TIMER) {
		netlink_times[index].tv_sec += end_time.tv_sec - start_time.tv_sec;
		netlink_times[index].tv_usec += end_time.tv_usec - start_time.tv_usec;
		netlink_count[index]++;
		if (netlink_times[index].tv_usec < 0)
			netlink_times[index].tv_usec += 1000000, netlink_times[index].tv_sec--;
		else if (netlink_times[index].tv_usec > 1000000)
			netlink_times[index].tv_usec -= 1000000, netlink_times[index].tv_sec++;
	}
#endif

	return status;
}
#endif

/* Fetch a specific type of information from netlink kernel */
static int
netlink_request(nl_handle_t *nl,
		unsigned char family,
		uint16_t type,
#ifndef _WITH_VRRP_
		__attribute__((unused))
#endif
					char *name)
{
	ssize_t status;
	struct sockaddr_nl snl;
	struct {
		struct nlmsghdr nlh;
		struct ifinfomsg i;
		char buf[64];
	} req;

	/* Cleanup the room */
	memset(&snl, 0, sizeof (snl));
	snl.nl_family = AF_NETLINK;

	memset(&req, 0, sizeof req);
	req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof req.i);
	req.nlh.nlmsg_type = type;
	req.nlh.nlmsg_flags = NLM_F_REQUEST;
	req.nlh.nlmsg_pid = 0;
	req.nlh.nlmsg_seq = ++nl->seq;
	req.i.ifi_family = family;

#ifdef _WITH_VRRP_
	if (name)
		addattr_l(&req.nlh, sizeof req, IFLA_IFNAME, name, strlen(name) + 1);
	else
#endif
		req.nlh.nlmsg_flags |= NLM_F_DUMP;
#if HAVE_DECL_RTEXT_FILTER_SKIP_STATS
	addattr32(&req.nlh, sizeof req, IFLA_EXT_MASK, RTEXT_FILTER_SKIP_STATS);
#endif

	status = sendto(nl->fd, (void *) &req, sizeof (req)
			, 0, (struct sockaddr *) &snl, sizeof (snl));
	if (status < 0) {
		log_message(LOG_INFO, "Netlink: sendto() failed: %s",
		       strerror(errno));
		return -1;
	}
	return 0;
}

#ifdef _WITH_VRRP_
void
process_if_status_change(interface_t *ifp)
{
	vrrp_t *vrrp;
	element e;
	tracking_vrrp_t *tvp;
	bool now_up = FLAGS_UP(ifp->ifi_flags);

	/* The state of the interface has changed from up to down or vice versa.
	 * Find which vrrp instances are affected */
	LIST_FOREACH(ifp->tracking_vrrp, tvp, e) {
		vrrp = tvp->vrrp;

		if (tvp->weight == VRRP_NOT_TRACK_IF) {
			/* We might want to restore things to the interface if it is coming up */
			continue;
		}

		if (tvp->weight) {
			if (now_up)
				vrrp->total_priority += abs(tvp->weight);
			else
				vrrp->total_priority -= abs(tvp->weight);
			vrrp_set_effective_priority(vrrp);

			continue;
		}

		/* This vrrp's interface or underlying interface has changed */
		if (now_up)
			try_up_instance(vrrp, false);
		else
			down_instance(vrrp);
	}
}

static void
update_interface_flags(interface_t *ifp, unsigned ifi_flags)
{
	bool was_up, now_up;

	if (ifi_flags == ifp->ifi_flags)
		return;

	if (!vrrp_data)
		return;

	/* We get called after a VMAC is created, but before tracking_vrrp is set */

	/* For an interface to be really up, any underlying interface must also be up */
	was_up = IF_FLAGS_UP(ifp);
	now_up = FLAGS_UP(ifi_flags);
	ifp->ifi_flags = ifi_flags;

	if (was_up == now_up)
		return;

	if (ifp->tracking_vrrp) {
		log_message(LOG_INFO, "Netlink reports %s %s", ifp->ifname, now_up ? "up" : "down");

		process_if_status_change(ifp);
	}

	if (!now_up)
		interface_down(ifp);
	else
		interface_up(ifp);
}

static char *get_mac_string(int type)
 {
	switch (type) {
	case IFLA_BROADCAST:
		return "Broadcast";
	case IFLA_ADDRESS:
		return "Address";
	default:
		return "Unknown Type";
	}
}

static bool
netlink_if_get_ll_addr(interface_t *ifp, struct rtattr *tb[],
				  int type, char *name)
{
	size_t i;

	if (tb[type]) {
		size_t hw_addr_len = RTA_PAYLOAD(tb[type]);

		if (hw_addr_len > sizeof(ifp->hw_addr)) {
			log_message(LOG_ERR,
				    " %s MAC address for %s is too large: %zu",
				    get_mac_string(type), name, hw_addr_len);
			return false;
		}

		switch (type) {

		case IFLA_ADDRESS:
			ifp->hw_addr_len = hw_addr_len;
			memcpy(ifp->hw_addr, RTA_DATA(tb[type]), hw_addr_len);
			/*
			 * Don't allow a hardware address of all zeroes
			 * Mark hw_addr_len as 0 to warn
			 */
			for (i = 0; i < hw_addr_len; i++)
				if (ifp->hw_addr[i] != 0)
					break;
			if (i == hw_addr_len)
				ifp->hw_addr_len = 0;
			else
				ifp->hw_addr_len = hw_addr_len;
			break;

		case IFLA_BROADCAST:
			memcpy(ifp->hw_addr_bcast, RTA_DATA(tb[type]),
			       hw_addr_len);
			break;

		default:
			return false;
		}
	}

	return true;
}

#ifdef _HAVE_IPV4_DEVCONF_
static void
parse_af_spec(struct rtattr* attr, interface_t *ifp)
{
	struct rtattr* afspec[AF_INET6 + 1];
	struct rtattr* inet[IFLA_INET_MAX + 1];
	uint32_t* inet_devconf;

	if (!attr)
		return;

	parse_rtattr_nested(afspec, AF_INET6, attr);
	if (afspec[AF_INET]) {
		parse_rtattr_nested(inet, IFLA_INET_MAX, afspec[AF_INET]);
		if (inet[IFLA_INET_CONF]) {
			inet_devconf = RTA_DATA(inet[IFLA_INET_CONF]);
#ifdef _HAVE_VRRP_VMAC_
			ifp->arp_ignore = inet_devconf[IPV4_DEVCONF_ARP_IGNORE - 1];
			ifp->arp_filter = inet_devconf[IPV4_DEVCONF_ARPFILTER - 1];
			if (ifp->rp_filter == UINT_MAX)
				ifp->rp_filter = inet_devconf[IPV4_DEVCONF_RP_FILTER - 1];
#endif
			ifp->promote_secondaries = inet_devconf[IPV4_DEVCONF_PROMOTE_SECONDARIES - 1];
		}
	}
}
#endif

static bool
netlink_if_link_populate(interface_t *ifp, struct rtattr *tb[], struct ifinfomsg *ifi)
{
	char *name;
#ifdef _HAVE_VRRP_VMAC_
	struct rtattr* linkinfo[IFLA_INFO_MAX+1];
	struct rtattr* linkattr[IFLA_MACVLAN_MAX+1];
	bool is_macvlan = false;
#ifdef _HAVE_VRF_
	struct rtattr *vrf_attr[IFLA_VRF_MAX + 1];
	bool is_vrf = false;
	uint32_t new_vrf_master_index;
	bool is_vrf_master = false;
#endif
#endif

	name = (char *)RTA_DATA(tb[IFLA_IFNAME]);

	/* Fill the interface structure */
	memcpy(ifp->ifname, name, strlen(name));
	ifp->ifindex = (ifindex_t)ifi->ifi_index;

#ifdef _HAVE_VRRP_VMAC_
	if (tb[IFLA_LINKINFO]) {
		parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]);

		if (linkinfo[IFLA_INFO_KIND]) {
			if (!strcmp((char *)RTA_DATA(linkinfo[IFLA_INFO_KIND]), "macvlan") ||
			    !strcmp((char *)RTA_DATA(linkinfo[IFLA_INFO_KIND]), "macvtap")) {
				is_macvlan = true;
				parse_rtattr_nested(linkattr, IFLA_MACVLAN_MAX, linkinfo[IFLA_INFO_DATA]);
			}
#ifdef _HAVE_VRF_
			else if (!strcmp((char *)RTA_DATA(linkinfo[IFLA_INFO_KIND]), "vrf") ) {
				is_vrf = true;
				parse_rtattr_nested(vrf_attr, IFLA_VRF_MAX, linkinfo[IFLA_INFO_DATA]);
			}
#endif
		}
	}

#ifdef _HAVE_IPV4_DEVCONF_
	if (tb[IFLA_AF_SPEC])
		parse_af_spec(tb[IFLA_AF_SPEC], ifp);
#endif

	/* Check there hasn't been an unsupported interface type change */
	if (!global_data->allow_if_changes && ifp->seen_interface) {
		/* If it was a macvlan and now isn't, or vice versa,
		 * then the interface type has changed */
		if (is_macvlan == !ifp->vmac_type)
			return false;

		/* If a macvlan, check the underlying interface hasn't changed */
		if (is_macvlan &&
		    (!tb[IFLA_LINK] || ifp->base_ifp->ifindex != *(uint32_t *)RTA_DATA(tb[IFLA_LINK])))
			return false;
	}
#endif

	ifp->mtu = *(uint32_t *)RTA_DATA(tb[IFLA_MTU]);
	ifp->hw_type = ifi->ifi_type;

	if (!netlink_if_get_ll_addr(ifp, tb, IFLA_ADDRESS, name))
		return false;
	if (!netlink_if_get_ll_addr(ifp, tb, IFLA_BROADCAST, name))
		return false;

#ifdef _HAVE_VRRP_VMAC_
	ifp->base_ifp = ifp;
	ifp->base_ifindex = 0;

	if (tb[IFLA_LINKINFO]) {
		if (linkinfo[IFLA_INFO_KIND]) {
			/* See if this interface is a MACVLAN */
			if (is_macvlan) {
				if (linkattr[IFLA_MACVLAN_MODE] &&
				    tb[IFLA_LINK]) {
					ifp->vmac_type = *(uint32_t*)RTA_DATA(linkattr[IFLA_MACVLAN_MODE]);
					ifp->base_ifindex = *(uint32_t *)RTA_DATA(tb[IFLA_LINK]);
					ifp->base_ifp = if_get_by_ifindex(ifp->base_ifindex);
					if (ifp->base_ifp)
						ifp->base_ifindex = 0;	/* Make sure this isn't used at runtime */
				}
			}
#ifdef _HAVE_VRF_
			else if (is_vrf) {
				if (vrf_attr[IFLA_VRF_TABLE])
				{
					ifp->vrf_master_ifp = ifp;
					is_vrf_master = true;
				}
			}
#endif

#ifdef _FIXED_IF_TYPE_
			if (strcmp(_FIXED_IF_TYPE_, (char *)RTA_DATA(linkinfo[IFLA_INFO_KIND])))
#endif
				ifp->changeable_type = true;
		}
	}

#ifdef _HAVE_VRF_
	/* If we don't have the master interface details yet, we won't know
	 * if the master is a VRF master, but we sort that out later */
	if (!is_vrf_master) {
		if (tb[IFLA_MASTER]) {
			new_vrf_master_index = *(uint32_t*)RTA_DATA(tb[IFLA_MASTER]);
			if (!ifp->vrf_master_ifp ||
			    new_vrf_master_index != ifp->vrf_master_ifp->ifindex) {
				ifp->vrf_master_ifindex = new_vrf_master_index;
				ifp->vrf_master_ifp = if_get_by_ifindex(ifp->vrf_master_ifindex);
				if (ifp->vrf_master_ifp) {
					if (ifp->vrf_master_ifp->vrf_master_ifp != ifp->vrf_master_ifp)
						ifp->vrf_master_ifp = NULL;
					ifp->vrf_master_ifindex = 0;	/* Make sure this isn't used at runtime */

					update_vmac_vrfs(ifp);
				}
			}
		} else {
			ifp->vrf_master_ifindex = 0;
			if (ifp->vrf_master_ifp) {
				ifp->vrf_master_ifp = NULL;

				update_vmac_vrfs(ifp);
			}
		}
	}
#endif

	ifp->rp_filter = UINT_MAX;	/* We have not read it yet */
#endif

	ifp->ifi_flags = ifi->ifi_flags;

	return true;
}

/* Netlink interface link lookup filter */
static int
netlink_if_link_filter(__attribute__((unused)) struct sockaddr_nl *snl, struct nlmsghdr *h)
{
	struct ifinfomsg *ifi;
	struct rtattr *tb[IFLA_MAX + 1];
	interface_t *ifp;
	size_t len;
	char *name;

	ifi = NLMSG_DATA(h);

	if (h->nlmsg_type != RTM_NEWLINK)
		return 0;

	if (h->nlmsg_len < NLMSG_LENGTH(sizeof (struct ifinfomsg)))
		return -1;
	len = h->nlmsg_len - NLMSG_LENGTH(sizeof (struct ifinfomsg));

	/* Interface name lookup */
	parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);

	if (tb[IFLA_IFNAME] == NULL)
		return -1;
	name = (char *) RTA_DATA(tb[IFLA_IFNAME]);

	/* Skip it if already exists */
	ifp = if_get_by_ifname(name, IF_CREATE_NETLINK);

	/* Fill the interface structure */
	if (!netlink_if_link_populate(ifp, tb, ifi))
		return -1;

	if (ifp->ifindex)
		update_interface_flags(ifp, ifi->ifi_flags);

	return 0;
}

/* Interfaces lookup bootstrap function */
int
netlink_interface_lookup(char *name)
{
	/* Interface lookup */

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33)
	/* RTM_NETLINK didn't support selecting by
	 * interface name until Linux v2.6.33 */
	name = NULL;
#endif

	if (netlink_request(&nl_cmd, AF_PACKET, RTM_GETLINK, name) < 0)
		return -1;

	return netlink_parse_info(netlink_if_link_filter, &nl_cmd, NULL, false);
}
#endif

/* Addresses lookup bootstrap function */
static int
netlink_address_lookup(void)
{
	int status;

	/* IPv4 Address lookup */
	if (netlink_request(&nl_cmd, AF_INET, RTM_GETADDR, NULL) < 0)
		return -1;

	if ((status = netlink_parse_info(netlink_if_address_filter, &nl_cmd, NULL, false)))
		return status;

	/* IPv6 Address lookup */
	if (netlink_request(&nl_cmd, AF_INET6, RTM_GETADDR, NULL) < 0)
		return -1;

	return netlink_parse_info(netlink_if_address_filter, &nl_cmd, NULL, false);
}

#ifdef _WITH_VRRP_
/* Netlink flag Link update */
static int
netlink_link_filter(__attribute__((unused)) struct sockaddr_nl *snl, struct nlmsghdr *h)
{
	struct ifinfomsg *ifi;
	struct rtattr *tb[IFLA_MAX + 1];
	interface_t *ifp;
	size_t len;
	char *name;
#ifdef _HAVE_VRF_
	uint32_t new_master_index;
	interface_t *new_master_ifp;
#endif

	if (!(h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK))
		return 0;

	if (h->nlmsg_len < NLMSG_LENGTH(sizeof (struct ifinfomsg)))
		return -1;
	len = h->nlmsg_len - NLMSG_LENGTH(sizeof (struct ifinfomsg));

	/* Interface name lookup */
	ifi = NLMSG_DATA(h);
	parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
	if (tb[IFLA_IFNAME] == NULL)
		return -1;
	name = (char *)RTA_DATA(tb[IFLA_IFNAME]);

	/* Ignore NEWLINK messages with ifi_change == 0 and IFLA_WIRELESS set
	   See for example https://bugs.chromium.org/p/chromium/issues/detail?id=501982 */
	if (!ifi->ifi_change && tb[IFLA_WIRELESS] && h->nlmsg_type == RTM_NEWLINK)
		return 0;

	/* find the interface_t. If the interface doesn't exist in the interface
	 * list and this is a new interface add it to the interface list.
	 * If an interface with the same name exists overwrite the older
	 * structure and fill it with the new interface information.
	 */
	ifp = if_get_by_ifindex((ifindex_t)ifi->ifi_index);

	if (ifp) {
		if (h->nlmsg_type == RTM_DELLINK) {
			if (!LIST_ISEMPTY(ifp->tracking_vrrp) || __test_bit(LOG_DETAIL_BIT, &debug))
				log_message(LOG_INFO, "Interface %s deleted", ifp->ifname);
#ifndef _DEBUG_
			if (prog_type != PROG_TYPE_VRRP) {
				ifp->ifi_flags = 0;
				ifp->ifindex = 0;
			} else
#endif
				cleanup_lost_interface(ifp);

#ifdef _HAVE_VRRP_VMAC_
			/* If this was a vmac we created, create it again, so long as the underlying i/f exists */
			if (ifp->is_ours
#ifndef _DEBUG_
			    && prog_type == PROG_TYPE_VRRP
#endif
							  )
				thread_add_event(master, recreate_vmac_thread, ifp, 0);
#endif
		} else {
			if (strcmp(ifp->ifname, name)) {
				/* The name can change, so handle that here */
				log_message(LOG_INFO, "Interface name has changed from %s to %s", ifp->ifname, name);

#ifndef _DEBUG_
				if (prog_type != PROG_TYPE_VRRP) {
					ifp->ifi_flags = 0;
					ifp->ifindex = 0;
				} else
#endif
					cleanup_lost_interface(ifp);

#ifdef _HAVE_VRRP_VMAC_
				/* If this was one of our vmacs, create it again */
				if (ifp->is_ours
#ifndef _DEBUG_
				    && prog_type == PROG_TYPE_VRRP
#endif
								) {
					/* Change the mac address on the interface, so we can create a new vmac */

					/* Now create our VMAC again */
					if (ifp->base_ifp->ifindex)
						thread_add_event(master, recreate_vmac_thread, ifp, 0);
				}
				else
#endif
					ifp = NULL;	/* Set ifp to null, to force creating a new interface_t */
			} else if (ifp->ifindex) {
#ifdef _HAVE_VRF_
				/* Now check if the VRF info is changed */
				if (tb[IFLA_MASTER]) {
					new_master_index = *(uint32_t *)RTA_DATA(tb[IFLA_MASTER]);
					new_master_ifp = if_get_by_ifindex(new_master_index);
				} else
					new_master_ifp = NULL;
				if (new_master_ifp != ifp->vrf_master_ifp) {
					ifp->vrf_master_ifp = new_master_ifp;
					update_vmac_vrfs(ifp);
				}
#endif

#ifdef _HAVE_IPV4_DEVCONF_
				if (tb[IFLA_AF_SPEC])
					parse_af_spec(tb[IFLA_AF_SPEC], ifp);
#endif

				/* Ignore interface if we are using linkbeat on it */
				if (ifp->linkbeat_use_polling)
					return 0;
			} else
				ifp = NULL;
		}
	}

	if (!ifp) {
		if (h->nlmsg_type == RTM_NEWLINK) {
			ifp = if_get_by_ifname(name, IF_CREATE_NETLINK);

			/* Since the garp_delay and tracking_vrrp are set up by name,
			 * it is reasonable to preserve them.
			 * If what is created is a vmac, we could end up in a complete mess. */
			garp_delay_t *sav_garp_delay = ifp->garp_delay;
			list sav_tracking_vrrp = ifp->tracking_vrrp;

			memset(ifp, 0, sizeof(interface_t));

			ifp->garp_delay = sav_garp_delay;
			ifp->tracking_vrrp = sav_tracking_vrrp;

			if (!netlink_if_link_populate(ifp, tb, ifi))
				return -1;

			if (__test_bit(LOG_DETAIL_BIT, &debug))
				log_message(LOG_INFO, "Interface %s added", ifp->ifname);

			update_added_interface(ifp);

			/* We need to see a transition to up, so mark it down for now */
			ifp->ifi_flags &= ~(IFF_UP | IFF_RUNNING);
		} else {
			if (__test_bit(LOG_DETAIL_BIT, &debug))
				log_message(LOG_INFO, "Unknown interface %s deleted", (char *)tb[IFLA_IFNAME]);
			return 0;
		}
	}

	/* Update flags. Flags == 0 means interface deleted. */
	update_interface_flags(ifp, (h->nlmsg_type == RTM_DELLINK) ? 0 : ifi->ifi_flags);

	return 0;
}

#ifdef _HAVE_FIB_ROUTING_
static int
netlink_route_filter(__attribute__((unused)) struct sockaddr_nl *snl, struct nlmsghdr *h)
{
	struct rtmsg *rt;
	struct rtattr *tb[RTA_MAX + 1];
	size_t len;
	vrrp_t *vrrp;
	ip_route_t *route;

	if (h->nlmsg_type != RTM_NEWROUTE && h->nlmsg_type != RTM_DELROUTE)
		return 0;

	if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*rt)))
		return -1;

	rt = NLMSG_DATA(h);

	if (rt->rtm_protocol != RTPROT_KEEPALIVED) {
		/* It is not a route we are monitoring - ignore it */
		return 0;
	}

	/* Only IPv4 and IPv6 are valid for us */
	if (rt->rtm_family != AF_INET && rt->rtm_family != AF_INET6)
		return 0;

	len = h->nlmsg_len - NLMSG_LENGTH(sizeof (struct rtmsg));

	parse_rtattr(tb, RTA_MAX, RTM_RTA(rt), len);

	if (!(route = route_is_ours(rt, tb, &vrrp)))
		return 0;

	route->set = (h->nlmsg_type == RTM_NEWROUTE);

	/* Matching route */
	if (h->nlmsg_type == RTM_NEWROUTE) {
		/* If we haven't specified a dev for the route, save the link the route
		 * has been added to. */
		if (tb[RTA_OIF]) {
			route->configured_ifindex = *(uint32_t*)RTA_DATA(tb[RTA_OIF]);
			if (route->oif && route->oif->ifindex != route->configured_ifindex)
				log_message(LOG_INFO, "route added index %d != config index %d", route->configured_ifindex, route->oif->ifindex);
		}
		else
			log_message(LOG_INFO, "New route doesn't have i/f index");

		return 0;
	}

	/* We are only interested in route deletions now */

	if (route->dont_track)
		return 0;

	if (vrrp)
		set_vrrp_backup(vrrp);
	else
		reinstate_static_route(route);

	return 0;
}

static int
netlink_rule_filter(__attribute__((unused)) struct sockaddr_nl *snl, struct nlmsghdr *h)
{
	struct fib_rule_hdr *frh;
	struct rtattr *tb[FRA_MAX + 1];
	size_t len;
	vrrp_t *vrrp;
	ip_rule_t *ip_rule;

	if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE)
		return 0;

	if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*frh)))
		return -1;

	frh = NLMSG_DATA(h);

	/* Only IPv4 and IPv6 are valid for us */
	if (frh->family != AF_INET && frh->family != AF_INET6)
		return 0;

	len = h->nlmsg_len - NLMSG_LENGTH(sizeof (struct rtmsg));

	parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);

#if HAVE_DECL_FRA_PROTOCOL
	if (tb[FRA_PROTOCOL] &&
	    *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]) != RTPROT_KEEPALIVED) {
		/* It is not a rule we are monitoring - ignore it */
		return 0;
	}
#endif

	/* We are only interested in rule deletions now */
	if (h->nlmsg_type != RTM_DELRULE)
		return 0;

	if (!(ip_rule = rule_is_ours(frh, tb, &vrrp)))
		return 0;

	ip_rule->set = false;

	if (ip_rule->dont_track)
		return 0;

	if (vrrp)
		set_vrrp_backup(vrrp);
	else
		reinstate_static_rule(ip_rule);

	return 0;
}
#endif
#endif

/* Netlink kernel message reflection */
static int
netlink_broadcast_filter(struct sockaddr_nl *snl, struct nlmsghdr *h)
{
	switch (h->nlmsg_type) {
	case RTM_NEWLINK:
	case RTM_DELLINK:
		/* It appears that older kernels (certainly 2.6.32) can
		 * send RTM_NEWLINK (but not RTM_DELLINK) messages even
		 * when RTNLGRP_LINK has not been subscribed to. This
		 * occurs when the link is set to up state.
		 * Only the VRRP process is interested in link messages. */
#ifdef _WITH_VRRP_
#ifndef _DEBUG_
		if (prog_type == PROG_TYPE_VRRP)
#endif
			return netlink_link_filter(snl, h);
#endif
		break;
	case RTM_NEWADDR:
	case RTM_DELADDR:
		return netlink_if_address_filter(snl, h);
		break;
#ifdef _HAVE_FIB_ROUTING_
	case RTM_NEWROUTE:
	case RTM_DELROUTE:
		return netlink_route_filter(snl, h);
	case RTM_NEWRULE:
	case RTM_DELRULE:
		return netlink_rule_filter(snl, h);
#endif
	default:
		log_message(LOG_INFO,
		       "Kernel is reflecting an unknown netlink nlmsg_type: %d",
		       h->nlmsg_type);
		break;
	}
	return 0;
}

static int
kernel_netlink(thread_t * thread)
{
	nl_handle_t *nl = THREAD_ARG(thread);

	if (thread->type != THREAD_READ_TIMEOUT)
		netlink_parse_info(netlink_broadcast_filter, nl, NULL, true);
	nl->thread = thread_add_read(master, kernel_netlink, nl, nl->fd,
				      TIMER_NEVER);
	return 0;
}

#ifdef _WITH_VRRP_
void
kernel_netlink_poll(void)
{
	if (!nl_kernel.fd)
		return;

	netlink_parse_info(netlink_broadcast_filter, &nl_kernel, NULL, true);
}
#endif

void
kernel_netlink_set_recv_bufs(void)
{
#ifdef _DEBUG_
#ifdef _WITH_VRRP_
	netlink_set_rx_buf_size(&nl_kernel, global_data->vrrp_netlink_monitor_rcv_bufs, global_data->vrrp_netlink_monitor_rcv_bufs_force);
	netlink_set_rx_buf_size(&nl_cmd, global_data->vrrp_netlink_cmd_rcv_bufs, global_data->vrrp_netlink_cmd_rcv_bufs_force);
#else
	netlink_set_rx_buf_size(&nl_kernel, global_data->lvs_netlink_monitor_rcv_bufs, global_data->lvs_netlink_monitor_rcv_bufs_force);
	netlink_set_rx_buf_size(&nl_cmd, global_data->lvs_netlink_cmd_rcv_bufs, global_data->lvs_netlink_cmd_rcv_bufs_force);
#endif
#else
#ifdef _WITH_VRRP_
	if (prog_type == PROG_TYPE_VRRP) {
		netlink_set_rx_buf_size(&nl_kernel, global_data->vrrp_netlink_monitor_rcv_bufs, global_data->vrrp_netlink_monitor_rcv_bufs_force);
		netlink_set_rx_buf_size(&nl_cmd, global_data->vrrp_netlink_cmd_rcv_bufs, global_data->vrrp_netlink_cmd_rcv_bufs_force);
	}
#endif
#ifdef _WITH_LVS_
	if (prog_type == PROG_TYPE_CHECKER)
		netlink_set_rx_buf_size(&nl_kernel, global_data->lvs_netlink_monitor_rcv_bufs, global_data->lvs_netlink_monitor_rcv_bufs_force);
#endif
#endif
}

void
kernel_netlink_close_monitor(void)
{
	netlink_close(&nl_kernel);
}

void
kernel_netlink_close_cmd(void)
{
	netlink_close(&nl_cmd);
}

void
kernel_netlink_close(void)
{
	kernel_netlink_close_monitor();
	kernel_netlink_close_cmd();
}

void
kernel_netlink_init(void)
{
	/*
	 * Prepare netlink kernel broadcast channel
	 * subscription. We subscribe to LINK, ADDR,
	 * and ROUTE netlink broadcast messages, but
	 * the checker process does not need the
	 * route or link messages.
	 */

	/* If the netlink kernel fd is already open, just register a read thread.
	 * This will happen at reload. */
	if (nl_kernel.fd > 0) {
		nl_kernel.thread = thread_add_read(master, kernel_netlink, &nl_kernel, nl_kernel.fd, TIMER_NEVER);
		return;
	}

#ifdef _DEBUG_
#ifdef _WITH_VRRP_
	netlink_socket(&nl_kernel, global_data->vrrp_netlink_monitor_rcv_bufs, global_data->vrrp_netlink_monitor_rcv_bufs_force,
			SOCK_NONBLOCK, RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV6_IFADDR, 0);
#else
	netlink_socket(&nl_kernel, global_data->lvs_netlink_monitor_rcv_bufs, global_data->lvs_netlink_monitor_rcv_bufs_force,
			SOCK_NONBLOCK, RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV6_IFADDR, 0);
#endif
#else
#ifdef _WITH_VRRP_
	if (prog_type == PROG_TYPE_VRRP)
		netlink_socket(&nl_kernel, global_data->vrrp_netlink_monitor_rcv_bufs, global_data->vrrp_netlink_monitor_rcv_bufs_force,
				SOCK_NONBLOCK, RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV6_IFADDR, 0);
#endif
#ifdef _WITH_LVS_
	if (prog_type == PROG_TYPE_CHECKER)
		netlink_socket(&nl_kernel, global_data->lvs_netlink_monitor_rcv_bufs, global_data->lvs_netlink_monitor_rcv_bufs_force,
				SOCK_NONBLOCK, RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV6_IFADDR, 0);
#endif
#endif

	if (nl_kernel.fd > 0) {
		log_message(LOG_INFO, "Registering Kernel netlink reflector");
		nl_kernel.thread = thread_add_read(master, kernel_netlink, &nl_kernel, nl_kernel.fd,
						   TIMER_NEVER);
	} else
		log_message(LOG_INFO, "Error while registering Kernel netlink reflector channel");

	/* Prepare netlink command channel. The cmd socket is used synchronously.*/
#ifdef _DEBUG_
#ifdef _WITH_VRRP_
	netlink_socket(&nl_cmd, global_data->vrrp_netlink_cmd_rcv_bufs, global_data->vrrp_netlink_cmd_rcv_bufs_force, 0, 0);
#else
	netlink_socket(&nl_cmd, global_data->lvs_netlink_cmd_rcv_bufs, global_data->lvs_netlink_cmd_rcv_bufs_force, 0, 0);
#endif
#else
#ifdef _WITH_VRRP_
	if (prog_type == PROG_TYPE_VRRP)
		netlink_socket(&nl_cmd, global_data->vrrp_netlink_cmd_rcv_bufs, global_data->vrrp_netlink_cmd_rcv_bufs_force, 0, 0);
#endif
#ifdef _WITH_LVS_
	if (prog_type == PROG_TYPE_CHECKER)
		netlink_socket(&nl_cmd, global_data->lvs_netlink_cmd_rcv_bufs, global_data->lvs_netlink_cmd_rcv_bufs_force, 0, 0);
#endif
#endif
	if (nl_cmd.fd > 0)
		log_message(LOG_INFO, "Registering Kernel netlink command channel");
	else
		log_message(LOG_INFO, "Error while registering Kernel netlink cmd channel");

	/* Start with netlink interface and address lookup */
#ifdef _WITH_VRRP_
#ifndef _DEBUG_
	if (prog_type == PROG_TYPE_VRRP)
#endif
		init_interface_queue();
#endif

	netlink_address_lookup();

#if !defined _DEBUG_ && defined _WITH_LVS_
	if (prog_type == PROG_TYPE_CHECKER)
		kernel_netlink_close_cmd();
#endif
}

#ifdef _WITH_VRRP_
void
kernel_netlink_read_interfaces(void)
{
	int ret;

#ifdef _WITH_VRRP_
	netlink_socket(&nl_cmd, global_data->vrrp_netlink_cmd_rcv_bufs, global_data->vrrp_netlink_cmd_rcv_bufs_force, 0, 0);
#else
	netlink_socket(&nl_cmd, global_data->lvs_netlink_cmd_rcv_bufs, global_data->lvs_netlink_cmd_rcv_bufs_force, 0, 0);
#endif

	if (nl_cmd.fd <= 0)
		fprintf(stderr, "Error while registering Kernel netlink cmd channel\n");

	init_interface_queue();

	if ((ret = netlink_address_lookup()))
		fprintf(stderr, "netlink_address_lookup() returned %d\n", ret);

	kernel_netlink_close_cmd();
}
#endif

#ifdef THREAD_DUMP
void
register_keepalived_netlink_addresses(void)
{
	register_thread_address("kernel_netlink", kernel_netlink);
}
#endif