/* * Soft: Keepalived is a failover program for the LVS project * . It monitor & manipulate * a loadbalanced server pool using multi-layer checks. * * Part: Interfaces manipulation. * * Author: Alexandre Cassen, * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Copyright (C) 2001-2017 Alexandre Cassen, */ #include "config.h" /* global include */ #include #include #include #include #include #include #include #include #include #include #if defined _HAVE_NETINET_LINUX_IF_ETHER_H_COLLISION_ && \ defined _LINUX_IF_ETHER_H && \ !defined _NETINET_IF_ETHER_H /* musl libc throws an error if is included before , * so we stop being included if has been included. */ #define _NETINET_IF_ETHER_H #endif #if !HAVE_DECL_SOCK_CLOEXEC #include "old_socket.h" #endif #include /* needed to get correct values for SIOC* */ #include #include #include #include /* local include */ #include "global_data.h" #include "vrrp.h" #include "vrrp_if.h" #include "vrrp_daemon.h" #include "keepalived_netlink.h" #include "utils.h" #include "logger.h" #ifdef _HAVE_VRRP_VMAC_ #include "vrrp_vmac.h" #include "bitops.h" #endif #include "vrrp_track.h" #include "vrrp_scheduler.h" #include "vrrp_iproute.h" #ifdef THREAD_DUMP #include "scheduler.h" #endif /* Local vars */ static list if_queue; static struct ifreq ifr; static list old_garp_delay; /* Global vars */ list garp_delay; /* Helper functions */ /* Return interface from interface index */ interface_t * if_get_by_ifindex(ifindex_t ifindex) { interface_t *ifp; element e; if (LIST_ISEMPTY(if_queue)) return NULL; for (e = LIST_HEAD(if_queue); e; ELEMENT_NEXT(e)) { ifp = ELEMENT_DATA(e); if (ifp->ifindex == ifindex) return ifp; } return NULL; } interface_t * if_get_by_ifname(const char *ifname, if_lookup_t create) { interface_t *ifp; element e; LIST_FOREACH(if_queue, ifp, e) { if (!strcmp(ifp->ifname, ifname)) return ifp; } if (create == IF_NO_CREATE || (create == IF_CREATE_IF_DYNAMIC && (!global_data || !global_data->dynamic_interfaces))) { if (create == IF_CREATE_IF_DYNAMIC) non_existent_interface_specified = true; return NULL; } if (!(ifp = MALLOC(sizeof(interface_t)))) return NULL; strcpy(ifp->ifname, ifname); #ifdef _HAVE_VRRP_VMAC_ ifp->base_ifp = ifp; #endif if_add_queue(ifp); if (create == IF_CREATE_IF_DYNAMIC) log_message(LOG_INFO, "Configuration specifies interface %s which doesn't currently exist - will use if created", ifname); return ifp; } #ifdef _HAVE_VRRP_VMAC_ /* Set the base_ifp for VMACs and vrf_master_ifp for VRFs - only used at startup */ static void set_base_ifp(void) { interface_t *ifp; #ifdef _HAVE_VRF_ interface_t *master_ifp; #endif element e; if (LIST_ISEMPTY(if_queue)) return; LIST_FOREACH(if_queue, ifp, e) { if (!ifp->base_ifp && ifp->base_ifindex) { ifp->base_ifp = if_get_by_ifindex(ifp->base_ifindex); ifp->base_ifindex = 0; /* This is only used at startup, so ensure not used later */ } #ifdef _HAVE_VRF_ /* Now see if the interface is enslaved to a VRF */ if (ifp->vrf_master_ifindex) { master_ifp = if_get_by_ifindex(ifp->vrf_master_ifindex); if (master_ifp && master_ifp->vrf_master_ifp == master_ifp) ifp->vrf_master_ifp = master_ifp; ifp->vrf_master_ifindex = 0; } #endif } } #endif /* Return the interface list itself */ list get_if_list(void) { return if_queue; } void reset_interface_queue(void) { old_garp_delay = garp_delay; interface_t *ifp; element e; garp_delay = NULL; LIST_FOREACH(if_queue, ifp, e) { ifp->linkbeat_use_polling = false; ifp->garp_delay = NULL; free_list(&ifp->tracking_vrrp); } } /* MII Transceiver Registers poller functions */ static uint16_t if_mii_read(int fd, uint16_t phy_id, uint16_t reg_num) { struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr.ifr_data; data->phy_id = phy_id; data->reg_num = reg_num; if (ioctl(fd, SIOCGMIIREG, &ifr) < 0) { log_message(LOG_ERR, "SIOCGMIIREG on %s failed: %s", ifr.ifr_name, strerror(errno)); return 0xffff; } return data->val_out; } #ifdef _INCLUDE_UNUSED_CODE_ static void if_mii_dump(const uint16_t *mii_regs, size_t num_regs unsigned phy_id) { int mii_reg; printf(" MII PHY #%d transceiver registers:", phy_id); for (mii_reg = 0; mii_reg < num_regs; mii_reg++) printf("%s %4.4x", (mii_reg % 8) == 0 ? "\n ":"", mii_regs[mii_reg]); printf("\n"); } #endif static int if_mii_status(const int fd) { struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr.ifr_data; uint16_t phy_id = data->phy_id; uint16_t bmsr, new_bmsr; if (if_mii_read(fd, phy_id, MII_BMCR) == 0xffff || (bmsr = if_mii_read(fd, phy_id, MII_BMSR)) == 0) { log_message(LOG_ERR, "No MII transceiver present for %s !!!", ifr.ifr_name); return -1; } // if_mii_dump(mii_regs, sizeof(mii_regs)/ sizeof(mii_regs[0], phy_id); /* * For Basic Mode Status Register (BMSR). * Sticky field (Link established & Jabber detected), we need to read * a second time the BMSR to get current status. */ new_bmsr = if_mii_read(fd, phy_id, MII_BMSR); // printf(" \nBasic Mode Status Register 0x%4.4x ... 0x%4.4x\n", bmsr, new_bmsr); if (bmsr & BMSR_LSTATUS) return LINK_UP; else if (new_bmsr & BMSR_LSTATUS) return LINK_UP; else return LINK_DOWN; } static int if_mii_probe(const char *ifname) { struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr.ifr_data; uint16_t phy_id; int fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0); int status; if (fd < 0) return -1; #if !HAVE_DECL_SOCK_CLOEXEC if (set_sock_flags(fd, F_SETFD, FD_CLOEXEC)) log_message(LOG_INFO, "Unable to set CLOEXEC on mii_probe socket - %s (%d)", strerror(errno), errno); #endif memset(&ifr, 0, sizeof (struct ifreq)); strcpy(ifr.ifr_name, ifname); if (ioctl(fd, SIOCGMIIPHY, &ifr) < 0) { close(fd); return -1; } /* check if the driver reports BMSR using the MII interface, as we * will need this and we already know that some don't support it. */ phy_id = data->phy_id; /* save it in case it is overwritten */ data->reg_num = MII_BMSR; if (ioctl(fd, SIOCGMIIREG, &ifr) < 0) { close(fd); return -1; } data->phy_id = phy_id; /* Dump the MII transceiver */ status = if_mii_status(fd); close(fd); return status; } static int if_ethtool_status(const int fd) { struct ethtool_value edata; edata.cmd = ETHTOOL_GLINK; ifr.ifr_data = (caddr_t) & edata; if (!ioctl(fd, SIOCETHTOOL, &ifr)) return (edata.data) ? 1 : 0; return -1; } static int if_ethtool_probe(const char *ifname) { int fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0); int status; if (fd < 0) return -1; #if !HAVE_DECL_SOCK_CLOEXEC if (set_sock_flags(fd, F_SETFD, FD_CLOEXEC)) log_message(LOG_INFO, "Unable to set CLOEXEC on ethtool_probe socket - %s (%d)", strerror(errno), errno); #endif memset(&ifr, 0, sizeof (struct ifreq)); strcpy(ifr.ifr_name, ifname); status = if_ethtool_status(fd); close(fd); return status; } /* Returns false if interface is down */ static bool if_ioctl_flags(interface_t *ifp) { int fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0); if (fd < 0) return true; #if !HAVE_DECL_SOCK_CLOEXEC if (set_sock_flags(fd, F_SETFD, FD_CLOEXEC)) log_message(LOG_INFO, "Unable to set CLOEXEC on ioctl_flags socket - %s (%d)", strerror(errno), errno); #endif memset(&ifr, 0, sizeof (struct ifreq)); strcpy(ifr.ifr_name, ifp->ifname); if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0) { close(fd); return true; } close(fd); return FLAGS_UP(ifr.ifr_flags); } /* Interfaces lookup */ static void free_if(void *data) { interface_t *ifp = data; free_list(&ifp->tracking_vrrp); FREE(data); } /* garp_delay facility function */ static void free_garp_delay(void *data) { FREE(data); } static void dump_garp_delay(FILE *fp, void *data) { garp_delay_t *gd = data; char time_str[26]; interface_t *ifp; element e; conf_write(fp, "------< GARP delay group %d >------", gd->aggregation_group); if (gd->have_garp_interval) { conf_write(fp, " GARP interval = %g", gd->garp_interval.tv_sec + ((double)gd->garp_interval.tv_usec) / 1000000); if (!ctime_r(&gd->garp_next_time.tv_sec, time_str)) strcpy(time_str, "invalid time "); conf_write(fp, " GARP next time %ld.%6.6ld (%.19s.%6.6ld)", gd->garp_next_time.tv_sec, gd->garp_next_time.tv_usec, time_str, gd->garp_next_time.tv_usec); } if (gd->have_gna_interval) { conf_write(fp, " GNA interval = %g", gd->gna_interval.tv_sec + ((double)gd->gna_interval.tv_usec) / 1000000); if (!ctime_r(&gd->gna_next_time.tv_sec, time_str)) strcpy(time_str, "invalid time "); conf_write(fp, " GNA next time %ld.%6.6ld (%.19s.%6.6ld)", gd->gna_next_time.tv_sec, gd->gna_next_time.tv_usec, time_str, gd->gna_next_time.tv_usec); } else if (!gd->have_garp_interval) conf_write(fp, " No configuration"); conf_write(fp, " Interfaces"); LIST_FOREACH(if_queue, ifp, e) { if (ifp->garp_delay == gd) conf_write(fp, " %s", ifp->ifname); } } void alloc_garp_delay(void) { if (!LIST_EXISTS(garp_delay)) garp_delay = alloc_list(free_garp_delay, dump_garp_delay); list_add(garp_delay, MALLOC(sizeof(garp_delay_t))); } void set_default_garp_delay(void) { garp_delay_t default_delay; element e; interface_t *ifp; garp_delay_t *delay; vrrp_t *vrrp; if (global_data->vrrp_garp_interval) { default_delay.garp_interval.tv_sec = global_data->vrrp_garp_interval / 1000000; default_delay.garp_interval.tv_usec = global_data->vrrp_garp_interval % 1000000; default_delay.have_garp_interval = true; } if (global_data->vrrp_gna_interval) { default_delay.gna_interval.tv_sec = global_data->vrrp_gna_interval / 1000000; default_delay.gna_interval.tv_usec = global_data->vrrp_gna_interval % 1000000; default_delay.have_gna_interval = true; } /* Allocate a delay structure to each physical interface that doesn't have one and * is being used by a VRRP instance */ LIST_FOREACH(vrrp_data->vrrp, vrrp, e) { ifp = IF_BASE_IFP(vrrp->ifp); if (!ifp->garp_delay) { alloc_garp_delay(); delay = LIST_TAIL_DATA(garp_delay); *delay = default_delay; ifp->garp_delay = delay; } } } static void dump_if(FILE *fp, void *data) { interface_t *ifp = data; char addr_str[INET6_ADDRSTRLEN]; char *mac_buf; size_t mac_buf_len; char *p; size_t i; conf_write(fp, " Name = %s", ifp->ifname); conf_write(fp, " index = %u", ifp->ifindex); conf_write(fp, " IPv4 address = %s", ifp->sin_addr.s_addr ? inet_ntop2(ifp->sin_addr.s_addr) : "(none)"); inet_ntop(AF_INET6, &ifp->sin6_addr, addr_str, sizeof(addr_str)); conf_write(fp, " IPv6 address = %s", ifp->sin6_addr.s6_addr32[0] ? addr_str : "(none)"); if (ifp->hw_addr_len) { mac_buf_len = 3 * ifp->hw_addr_len; mac_buf = MALLOC(mac_buf_len); for (i = 0, p = mac_buf; i < ifp->hw_addr_len; i++) p += snprintf(p, mac_buf_len - (p - mac_buf), "%.2x%s", ifp->hw_addr[i], i < ifp->hw_addr_len -1 ? ":" : ""); conf_write(fp, " MAC = %s", mac_buf); for (i = 0, p = mac_buf; i < ifp->hw_addr_len; i++) p += snprintf(p, mac_buf_len - (p - mac_buf), "%.2x%s", ifp->hw_addr_bcast[i], i < ifp->hw_addr_len - 1 ? ":" : ""); conf_write(fp, " MAC broadcast = %s", mac_buf); FREE(mac_buf); } conf_write(fp, " State = %sUP, %sRUNNING%s%s%s%s%s%s", ifp->ifi_flags & IFF_UP ? "" : "not ", ifp->ifi_flags & IFF_RUNNING ? "" : "not ", !(ifp->ifi_flags & IFF_BROADCAST) ? ", no broadcast" : "", ifp->ifi_flags & IFF_LOOPBACK ? ", loopback" : "", ifp->ifi_flags & IFF_POINTOPOINT ? ", point to point" : "", ifp->ifi_flags & IFF_NOARP ? ", no arp" : "", !(ifp->ifi_flags & IFF_MULTICAST) ? ", no multicast" : "", #ifdef _HAVE_VRRP_VMAC_ ifp != ifp->base_ifp && !(ifp->base_ifp->ifi_flags & IFF_UP) ? ", master down" : "" #else "" #endif ); #ifdef _HAVE_VRRP_VMAC_ if (ifp->vmac_type && ifp->base_ifp) conf_write(fp, " VMAC type %s, underlying interface = %s, state = %sUP, %sRUNNING", ifp->vmac_type == MACVLAN_MODE_PRIVATE ? "private" : ifp->vmac_type == MACVLAN_MODE_VEPA ? "vepa" : ifp->vmac_type == MACVLAN_MODE_BRIDGE ? "bridge" : #ifdef MACVLAN_MODE_PASSTHRU ifp->vmac_type == MACVLAN_MODE_PASSTHRU ? "passthru" : #endif #ifdef MACVLAN_MODE_SOURCE ifp->vmac_type == MACVLAN_MODE_SOURCE ? "source" : #endif "unknown", ifp->base_ifp->ifname, ifp->base_ifp->ifi_flags & IFF_UP ? "" : "not ", ifp->base_ifp->ifi_flags & IFF_RUNNING ? "" : "not "); if (ifp->is_ours) conf_write(fp, " I/f created by keepalived"); else if (global_data->allow_if_changes && ifp->changeable_type) conf_write(fp, " Interface type/base can be changed"); if (ifp->seen_interface) conf_write(fp, " Done VRID check"); #endif conf_write(fp, " MTU = %d", ifp->mtu); switch (ifp->hw_type) { case ARPHRD_LOOPBACK: conf_write(fp, " HW Type = LOOPBACK"); break; case ARPHRD_ETHER: conf_write(fp, " HW Type = ETHERNET"); break; case ARPHRD_INFINIBAND: log_message(LOG_INFO, " HW Type = INFINIBAND"); break; default: conf_write(fp, " HW Type = UNKNOWN (%d)", ifp->hw_type); break; } if (!ifp->linkbeat_use_polling) conf_write(fp, " NIC netlink status update"); else if (IF_MII_SUPPORTED(ifp)) conf_write(fp, " NIC support MII regs"); else if (IF_ETHTOOL_SUPPORTED(ifp)) conf_write(fp, " NIC support ETHTOOL GLINK interface"); else conf_write(fp, " NIC ioctl refresh polling"); #ifdef _HAVE_VRF_ if (ifp->vrf_master_ifp == ifp) conf_write(fp, " VRF master"); else if (ifp->vrf_master_ifp) conf_write(fp, " VRF slave of %s", ifp->vrf_master_ifp->ifname); #endif if (ifp->garp_delay) { if (ifp->garp_delay->have_garp_interval) conf_write(fp, " Gratuitous ARP interval %ldms", ifp->garp_delay->garp_interval.tv_sec * 100 + ifp->garp_delay->garp_interval.tv_usec / (TIMER_HZ / 100)); if (ifp->garp_delay->have_gna_interval) conf_write(fp, " Gratuitous NA interval %ldms", ifp->garp_delay->gna_interval.tv_sec * 100 + ifp->garp_delay->gna_interval.tv_usec / (TIMER_HZ / 100)); if (ifp->garp_delay->aggregation_group) conf_write(fp, " Gratuitous ARP aggregation group %d", ifp->garp_delay->aggregation_group); } #ifdef _HAVE_VRRP_VMAC_ conf_write(fp, " Reset ARP config counter %d", ifp->reset_arp_config); conf_write(fp, " Original arp_ignore %d", ifp->arp_ignore); conf_write(fp, " Original arp_filter %d", ifp->arp_filter); if (ifp->rp_filter < UINT_MAX) conf_write(fp, " rp_filter %d", ifp->rp_filter); #endif conf_write(fp, " Original promote_secondaries %d", ifp->promote_secondaries); conf_write(fp, " Reset promote_secondaries counter %d", ifp->reset_promote_secondaries); conf_write(fp, " Tracking VRRP instances = %d", !LIST_ISEMPTY(ifp->tracking_vrrp) ? LIST_SIZE(ifp->tracking_vrrp) : 0); if (!LIST_ISEMPTY(ifp->tracking_vrrp)) dump_list(fp, ifp->tracking_vrrp); } static void init_if_queue(void) { if_queue = alloc_list(free_if, dump_if); } void if_add_queue(interface_t * ifp) { list_add(if_queue, ifp); } static int if_linkbeat_refresh_thread(thread_t * thread) { interface_t *ifp = THREAD_ARG(thread); bool if_up = true, was_up; was_up = IF_FLAGS_UP(ifp); if (IF_MII_SUPPORTED(ifp)) if_up = if_mii_probe(ifp->ifname); else if (IF_ETHTOOL_SUPPORTED(ifp)) if_up = if_ethtool_probe(ifp->ifname); /* * update ifp->flags to get the new IFF_RUNNING status. * Some buggy drivers need this... */ if (if_up) if_up = if_ioctl_flags(ifp); ifp->ifi_flags = if_up ? IFF_UP | IFF_RUNNING : 0; if (if_up != was_up) { log_message(LOG_INFO, "Linkbeat reports %s %s", ifp->ifname, if_up ? "up" : "down"); process_if_status_change(ifp); } /* Register next polling thread */ thread_add_timer(master, if_linkbeat_refresh_thread, ifp, POLLING_DELAY); return 0; } void init_interface_linkbeat(void) { interface_t *ifp; element e; int status; bool linkbeat_in_use = false; bool if_up; for (e = LIST_HEAD(if_queue); e; ELEMENT_NEXT(e)) { ifp = ELEMENT_DATA(e); if (!ifp->linkbeat_use_polling) continue; /* Don't poll an interface that we aren't using */ if (!ifp->tracking_vrrp) continue; #ifdef _HAVE_VRRP_VMAC_ /* netlink messages work for vmacs */ if (ifp->vmac_type) continue; #endif linkbeat_in_use = true; ifp->ifi_flags = IFF_UP | IFF_RUNNING; ifp->lb_type = LB_IOCTL; status = if_mii_probe(ifp->ifname); if (status >= 0) { ifp->lb_type = LB_MII; if_up = !!status; } else if ((status = if_ethtool_probe(ifp->ifname)) >= 0) { ifp->lb_type = LB_ETHTOOL; if_up = !!status; } else if_up = true; if (if_up) if_up = if_ioctl_flags(ifp); ifp->ifi_flags = if_up ? IFF_UP | IFF_RUNNING : 0; /* Register new monitor thread */ thread_add_timer(master, if_linkbeat_refresh_thread, ifp, POLLING_DELAY); } if (linkbeat_in_use) log_message(LOG_INFO, "Using MII-BMSR/ETHTOOL NIC polling thread..."); } /* Interface queue helpers*/ void free_interface_queue(void) { free_list(&if_queue); free_list(&garp_delay); } void free_old_interface_queue(void) { free_list(&old_garp_delay); } void init_interface_queue(void) { init_if_queue(); netlink_interface_lookup(NULL); #ifdef _HAVE_VRRP_VMAC_ /* Since we are reading all the interfaces, we might have received details of * a vmac/vrf before the underlying interface, so now we need to ensure the * interface pointers are all set */ set_base_ifp(); #endif // dump_list(NULL, if_queue); } int if_join_vrrp_group(sa_family_t family, int *sd, interface_t *ifp) { struct ip_mreqn imr; struct ipv6_mreq imr6; int ret = 0; if (*sd < 0) return -1; /* -> outbound processing option * join the multicast group. * binding the socket to the interface for outbound multicast * traffic. */ if (family == AF_INET) { memset(&imr, 0, sizeof(imr)); imr.imr_multiaddr = global_data->vrrp_mcast_group4.sin_addr; imr.imr_ifindex = (int)IF_INDEX(ifp); /* -> Need to handle multicast convergance after takeover. * We retry until multicast is available on the interface. */ ret = setsockopt(*sd, IPPROTO_IP, IP_ADD_MEMBERSHIP, (char *) &imr, (socklen_t)sizeof(struct ip_mreqn)); } else { memset(&imr6, 0, sizeof(imr6)); imr6.ipv6mr_multiaddr = global_data->vrrp_mcast_group6.sin6_addr; imr6.ipv6mr_interface = IF_INDEX(ifp); ret = setsockopt(*sd, IPPROTO_IPV6, IPV6_ADD_MEMBERSHIP, (char *) &imr6, (socklen_t)sizeof(struct ipv6_mreq)); } if (ret < 0) { log_message(LOG_INFO, "(%s) cant do IP%s_ADD_MEMBERSHIP errno=%s (%d)", ifp->ifname, (family == AF_INET) ? "" : "v6", strerror(errno), errno); close(*sd); *sd = -1; } return *sd; } int if_leave_vrrp_group(sa_family_t family, int sd, interface_t *ifp) { struct ip_mreqn imr; struct ipv6_mreq imr6; int ret = 0; /* If fd is -1 then we add a membership trouble */ if (sd < 0 || !ifp) return -1; /* Leaving the VRRP multicast group */ if (family == AF_INET) { memset(&imr, 0, sizeof(imr)); imr.imr_multiaddr = global_data->vrrp_mcast_group4.sin_addr; imr.imr_ifindex = (int)IF_INDEX(ifp); ret = setsockopt(sd, IPPROTO_IP, IP_DROP_MEMBERSHIP, (char *) &imr, sizeof(imr)); } else { memset(&imr6, 0, sizeof(imr6)); imr6.ipv6mr_multiaddr = global_data->vrrp_mcast_group6.sin6_addr; imr6.ipv6mr_interface = IF_INDEX(ifp); ret = setsockopt(sd, IPPROTO_IPV6, IPV6_DROP_MEMBERSHIP, (char *) &imr6, sizeof(struct ipv6_mreq)); } if (ret < 0) { log_message(LOG_INFO, "(%s) cant do IP%s_DROP_MEMBERSHIP errno=%s (%d)", ifp->ifname, (family == AF_INET) ? "" : "V6", strerror(errno), errno); return -1; } return 0; } int if_setsockopt_bindtodevice(int *sd, interface_t *ifp) { int ret; if (*sd < 0) return -1; /* -> inbound processing option * Specify the bound_dev_if. * why IP_ADD_MEMBERSHIP & IP_MULTICAST_IF doesnt set * sk->bound_dev_if themself ??? !!! * Needed for filter multicasted advert per interface. * * -- If you read this !!! and know the answer to the question * please feel free to answer me ! :) */ ret = setsockopt(*sd, SOL_SOCKET, SO_BINDTODEVICE, IF_NAME(ifp), (socklen_t)strlen(IF_NAME(ifp)) + 1); if (ret < 0) { log_message(LOG_INFO, "can't bind to device %s. errno=%d. (try to run it as root)", IF_NAME(ifp), errno); close(*sd); *sd = -1; } return *sd; } int if_setsockopt_hdrincl(int *sd) { int ret; int on = 1; if (*sd < 0) return -1; /* Include IP header into RAW protocol packet */ ret = setsockopt(*sd, IPPROTO_IP, IP_HDRINCL, &on, sizeof(on)); if (ret < 0) { log_message(LOG_INFO, "cant set HDRINCL IP option. errno=%d (%m)", errno); close(*sd); *sd = -1; } return *sd; } int if_setsockopt_ipv6_checksum(int *sd) { int ret; int offset = 6; if (!sd && *sd < 0) return -1; ret = setsockopt(*sd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)); if (ret < 0) { log_message(LOG_INFO, "cant set IPV6_CHECKSUM IP option. errno=%d (%m)", errno); close(*sd); *sd = -1; } return *sd; } #if HAVE_DECL_IP_MULTICAST_ALL /* Since Linux 2.6.31 */ int if_setsockopt_mcast_all(sa_family_t family, int *sd) { int ret; unsigned char no = 0; if (*sd < 0) return -1; if (family == AF_INET6) return *sd; /* Don't accept multicast packets we haven't requested */ ret = setsockopt(*sd, IPPROTO_IP, IP_MULTICAST_ALL, &no, sizeof(no)); if (ret < 0) { log_message(LOG_INFO, "cant set IP_MULTICAST_ALL IP option. errno=%d (%m)", errno); close(*sd); *sd = -1; } return *sd; } #endif int if_setsockopt_mcast_loop(sa_family_t family, int *sd) { int ret; unsigned char loop = 0; int loopv6 = 0; if (*sd < 0) return -1; /* Set Multicast loop */ if (family == AF_INET) ret = setsockopt(*sd, IPPROTO_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); else ret = setsockopt(*sd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, &loopv6, sizeof(loopv6)); if (ret < 0) { log_message(LOG_INFO, "cant set IP%s_MULTICAST_LOOP IP option. errno=%d (%m)", (family == AF_INET) ? "" : "V6", errno); close(*sd); *sd = -1; } return *sd; } int if_setsockopt_mcast_hops(sa_family_t family, int *sd) { int ret; int hops = 255; /* Not applicable for IPv4 */ if (*sd < 0 || family == AF_INET) return -1; /* Set HOP limit */ ret = setsockopt(*sd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &hops, sizeof(hops)); if (ret < 0) { log_message(LOG_INFO, "cant set IPV6_MULTICAST_HOPS IP option. errno=%d (%m)", errno); close(*sd); *sd = -1; } return *sd; } int if_setsockopt_mcast_if(sa_family_t family, int *sd, interface_t *ifp) { int ret; ifindex_t ifindex; int int_ifindex; if (*sd < 0) return -1; /* Set interface for sending outbound datagrams */ ifindex = IF_INDEX(ifp); if ( family == AF_INET) { struct ip_mreqn imr; memset(&imr, 0, sizeof(imr)); imr.imr_ifindex = (int)IF_INDEX(ifp); ret = setsockopt(*sd, IPPROTO_IP, IP_MULTICAST_IF, &imr, sizeof(imr)); } else { int_ifindex = (int)ifindex; ret = setsockopt(*sd, IPPROTO_IPV6, IPV6_MULTICAST_IF, &int_ifindex, sizeof(int_ifindex)); } if (ret < 0) { log_message(LOG_INFO, "cant set IP%s_MULTICAST_IF IP option. errno=%d (%m)", (family == AF_INET) ? "" : "V6", errno); close(*sd); *sd = -1; } return *sd; } int if_setsockopt_priority(int *sd, int family) { int ret; int val; if (*sd < 0) return -1; /* Set PRIORITY for VRRP traffic */ if (family == AF_INET) { val = IPTOS_PREC_INTERNETCONTROL; ret = setsockopt(*sd, IPPROTO_IP, IP_TOS, &val, sizeof(val)); } else { /* set tos to internet network control */ val = 0xc0; /* 192, which translates to DCSP value 48, or cs6 */ ret = setsockopt(*sd, IPPROTO_IPV6, IPV6_TCLASS, &val, sizeof(val)); } if (ret < 0) { log_message(LOG_INFO, "can't set %s option. errno=%d (%m)", (family == AF_INET) ? "IP_TOS" : "IPV6_TCLASS", errno); close(*sd); *sd = -1; } return *sd; } int if_setsockopt_rcvbuf(int *sd, int val) { int ret; if (*sd < 0) return -1; /* rcvbuf option */ ret = setsockopt(*sd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); if (ret < 0) { log_message(LOG_INFO, "cant set SO_RCVBUF IP option. errno=%d (%m)", errno); close(*sd); *sd = -1; } return *sd; } int if_setsockopt_no_receive(int *sd) { int ret; struct sock_filter bpfcode[1] = { {0x06, 0, 0, 0}, /* ret #0 - means that all packets will be filtered out */ }; struct sock_fprog bpf = {1, bpfcode}; if (*sd < 0) return -1; ret = setsockopt(*sd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)); if (ret < 0) { log_message(LOG_INFO, "Can't set SO_ATTACH_FILTER option. errno=%d (%m)", errno); close(*sd); *sd = -1; } return *sd; } void interface_up(interface_t *ifp) { /* We need to re-add static addresses and static routes */ static_track_reinstate_config(ifp); } void interface_down(interface_t *ifp) { element e, e1; vrrp_t *vrrp; ip_route_t *route; bool route_found; /* Unfortunately the kernel doesn't send RTM_DELROUTE for userspace added * routes that are deleted when the link goes down (?kernel bug). */ LIST_FOREACH(vrrp_data->vrrp, vrrp, e) { if (vrrp->state != VRRP_STATE_MAST) continue; route_found = false; LIST_FOREACH(vrrp->vroutes, route, e1) { if (!route->set) continue; /* Any route that has an oif will be tracking the interface, * so we only need to check for routes that dont specify an * oif */ /* Don't track route if it's not configured with this down * interface. */ if (!route->oif || route->configured_ifindex != ifp->ifindex) continue; route->set = false; if (route->dont_track) continue; route_found = true; } if (route_found) { /* Bring down vrrp instance/sync group */ down_instance(vrrp); } } #ifdef _HAVE_FIB_ROUTING_ /* Now check the static routes */ LIST_FOREACH(vrrp_data->static_routes, route, e) { if (route->set && route->oif == ifp) { /* This route will have been deleted */ route->set = false; } } #endif } void cleanup_lost_interface(interface_t *ifp) { vrrp_t *vrrp; tracking_vrrp_t *tvp; element e; LIST_FOREACH(ifp->tracking_vrrp, tvp, e) { vrrp = tvp->vrrp; /* If this is just a tracking interface, we don't need to do anything */ if (vrrp->ifp != ifp && IF_BASE_IFP(vrrp->ifp) != ifp && VRRP_CONFIGURED_IFP(vrrp) != ifp) continue; /* If the vrrp instance's interface doesn't exist, skip it */ if (!vrrp->ifp->ifindex) continue; #ifdef _HAVE_VRRP_VMAC_ /* If vmac going, clear VMAC_UP_BIT on vrrp instance */ if (vrrp->ifp->is_ours) __clear_bit(VRRP_VMAC_UP_BIT, &vrrp->vmac_flags); if (vrrp->configured_ifp == ifp && vrrp->configured_ifp->base_ifp == vrrp->ifp->base_ifp && vrrp->ifp->is_ours) { /* This is a changeable interface that the vrrp instance * was configured on. Delete the macvlan we created */ netlink_link_del_vmac(vrrp); } if (vrrp->configured_ifp == ifp && vrrp->configured_ifp->base_ifp != vrrp->configured_ifp) del_vrrp_from_interface(vrrp, vrrp->configured_ifp->base_ifp); /* If the interface type can be changed, and the vrrp had a * duplicate VRID, clear the error since when the underlying * interface is created again, it may be on another underlying * interface, and there may not be a duplicate VRID. */ if (global_data->allow_if_changes && ifp->changeable_type && vrrp->configured_ifp == ifp && vrrp->duplicate_vrid_fault) { vrrp->duplicate_vrid_fault = false; vrrp->num_script_if_fault--; } #endif /* Find the sockpool entry. If none, then we have closed the socket */ if (vrrp->sockets->fd_in != -1) { thread_cancel_read(master, vrrp->sockets->fd_in); close(vrrp->sockets->fd_in); vrrp->sockets->fd_in = -1; } if (vrrp->sockets->fd_out != -1) { close(vrrp->sockets->fd_out); vrrp->sockets->fd_out = -1; } vrrp->sockets->ifp->ifindex = 0; if (IF_ISUP(ifp)) down_instance(vrrp); } interface_down(ifp); ifp->ifindex = 0; ifp->ifi_flags = 0; #ifdef _HAVE_VRRP_VMAC_ if (!ifp->is_ours) ifp->base_ifp = ifp; #endif #ifdef _HAVE_VRF_ ifp->vrf_master_ifp = NULL; ifp->vrf_master_ifindex = 0; #endif } static void setup_interface(vrrp_t *vrrp) { interface_t *ifp; #ifdef _HAVE_VRRP_VMAC_ /* If the vrrp instance uses a vmac, and that vmac i/f doesn't * exist, then create it */ if (__test_bit(VRRP_VMAC_BIT, &vrrp->vmac_flags) && !vrrp->ifp->ifindex) { if (!netlink_link_add_vmac(vrrp)) return; } #endif #ifdef _HAVE_VRRP_VMAC_ if (__test_bit(VRRP_VMAC_XMITBASE_BIT, &vrrp->vmac_flags)) ifp = vrrp->ifp->base_ifp; else #endif ifp = vrrp->ifp; /* Find the sockpool entry. If none, then we open the socket */ if (vrrp->sockets->fd_in == -1) { vrrp->sockets->fd_in = open_vrrp_read_socket(vrrp->sockets->family, vrrp->sockets->proto, ifp, vrrp->sockets->unicast, vrrp->sockets->rx_buf_size); if (vrrp->sockets->fd_in == -1) vrrp->sockets->fd_out = -1; else vrrp->sockets->fd_out = open_vrrp_send_socket(vrrp->sockets->family, vrrp->sockets->proto, ifp, vrrp->sockets->unicast); vrrp->sockets->ifp = vrrp->ifp; if (vrrp_initialised) { vrrp->state = vrrp->num_script_if_fault ? VRRP_STATE_FAULT : VRRP_STATE_BACK; vrrp_init_instance_sands(vrrp); vrrp_thread_add_read(vrrp); } } return; } #ifdef _HAVE_VRRP_VMAC_ int recreate_vmac_thread(thread_t *thread) { vrrp_t *vrrp; tracking_vrrp_t *tvp; element e; interface_t *ifp = THREAD_ARG(thread); if (LIST_ISEMPTY(ifp->tracking_vrrp)) return 0; LIST_FOREACH(ifp->tracking_vrrp, tvp, e) { vrrp = tvp->vrrp; /* If this isn't the vrrp's interface, skip */ if (vrrp->ifp != ifp) continue; if (!__test_bit(VRRP_VMAC_BIT, &vrrp->vmac_flags)) continue; /* Don't attempt to create the VMAC if the configured * interface doesn't exist */ if (!VRRP_CONFIGURED_IFP(vrrp)->ifindex) continue; netlink_error_ignore = ENODEV; setup_interface(vrrp); netlink_error_ignore = 0; break; } return 0; } #endif void update_added_interface(interface_t *ifp) { vrrp_t *vrrp; tracking_vrrp_t *tvp; element e; #ifdef _HAVE_VRRP_VMAC_ vrrp_t *vrrp1; tracking_vrrp_t *tvp1; element e1; #endif if (LIST_ISEMPTY(ifp->tracking_vrrp)) return; LIST_FOREACH(ifp->tracking_vrrp, tvp, e) { vrrp = tvp->vrrp; #ifdef _HAVE_VRRP_VMAC_ /* If this interface is a macvlan that we haven't created, * and the interface type can be changed or we haven't checked * this interface before, make sure that there is not VRID * conflict. */ if (!ifp->is_ours && (global_data->allow_if_changes || !ifp->seen_interface)) { LIST_FOREACH(ifp->base_ifp->tracking_vrrp, tvp1, e1) { vrrp1 = tvp1->vrrp; if (vrrp == vrrp1) continue; if (!VRRP_CONFIGURED_IFP(vrrp1)->ifindex) continue; if (IF_BASE_IFP(VRRP_CONFIGURED_IFP(vrrp)) == IF_BASE_IFP(VRRP_CONFIGURED_IFP(vrrp1)) && vrrp->family == vrrp1->family && vrrp->vrid == vrrp1->vrid) { vrrp->num_script_if_fault++; vrrp->duplicate_vrid_fault = true; log_message(LOG_INFO, "VRID conflict between %s and %s IPv%d vrid %d", vrrp->iname, vrrp1->iname, vrrp->family == AF_INET ? 4 : 6, vrrp->vrid); break; } } } if (ifp->vmac_type && tvp->type & TRACK_VRRP) { add_vrrp_to_interface(vrrp, ifp->base_ifp, tvp->weight, false, TRACK_VRRP_DYNAMIC); if (!IF_ISUP(vrrp->configured_ifp->base_ifp) && !vrrp->dont_track_primary) vrrp->num_script_if_fault++; } /* We might be the configured interface for a vrrp instance that itself uses * a macvlan. If so, we can create the macvlans */ if (__test_bit(VRRP_VMAC_BIT, &vrrp->vmac_flags) && vrrp->configured_ifp == ifp && !vrrp->ifp->ifindex) thread_add_event(master, recreate_vmac_thread, vrrp->ifp, 0); #endif /* If this is just a tracking interface, we don't need to do anything */ if (vrrp->ifp != ifp && IF_BASE_IFP(vrrp->ifp) != ifp) continue; /* Reopen any socket on this interface if necessary */ if ( #ifdef _HAVE_VRRP_VMAC_ !__test_bit(VRRP_VMAC_BIT, &vrrp->vmac_flags) && #endif vrrp->sockets->fd_in == -1) setup_interface(vrrp); } #ifdef _HAVE_VRRP_VMAC_ ifp->seen_interface = true; #endif } #ifdef THREAD_DUMP void register_vrrp_if_addresses(void) { register_thread_address("if_linkbeat_refresh_thread", if_linkbeat_refresh_thread); #ifdef _HAVE_VRRP_VMAC_ register_thread_address("recreate_vmac_thread", recreate_vmac_thread); #endif } #endif