Blob Blame History Raw
/*
 * eBPF filter for IPv4 Address Conflict Detection
 *
 * An eBPF map and an eBPF program are provided. The map contains all the
 * addresses address conflict detection is performed on, and the program
 * filters out all packets except exactly the packets relevant to the ACD
 * protocol on the addresses currently in the map.
 *
 * Note that userspace still has to filter the incoming packets, as filter
 * are applied when packets are queued on the socket, not when userspace
 * receives them. It is therefore possible to receive packets about addresses
 * that have already been removed.
 */

#include <c-stdaux.h>
#include <errno.h>
#include <inttypes.h>
#include <linux/bpf.h>
#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/syscall.h>
#include <unistd.h>
#include "n-acd-private.h"

#define BPF_LD_ABS(SIZE, IMM)                                                   \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,            \
                .dst_reg        = 0,                                            \
                .src_reg        = 0,                                            \
                .off            = 0,                                            \
                .imm            = IMM,                                          \
        })

#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)                                        \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,           \
                .dst_reg        = DST,                                          \
                .src_reg        = SRC,                                          \
                .off            = OFF,                                          \
                .imm            = 0,                                            \
        })

#define BPF_LD_MAP_FD(DST, MAP_FD)                                              \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_LD | BPF_DW | BPF_IMM,                    \
                .dst_reg        = DST,                                          \
                .src_reg        = BPF_PSEUDO_MAP_FD,                            \
                .off            = 0,                                            \
                .imm            = (__u32) (MAP_FD),                             \
        }),                                                                     \
        ((struct bpf_insn) {                                                    \
                .code           = 0, /* zero is reserved opcode */              \
                .dst_reg        = 0,                                            \
                .src_reg        = 0,                                            \
                .off            = 0,                                            \
                .imm            = ((__u64) (MAP_FD)) >> 32,                     \
        })

#define BPF_ALU_REG(OP, DST, SRC)                                               \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_ALU64 | BPF_OP(OP) | BPF_X,               \
                .dst_reg        = DST,                                          \
                .src_reg        = SRC,                                          \
                .off            = 0,                                            \
                .imm            = 0,                                            \
        })

#define BPF_ALU_IMM(OP, DST, IMM)                                               \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_ALU64 | BPF_OP(OP) | BPF_K,               \
                .dst_reg        = DST,                                          \
                .src_reg        = 0,                                            \
                .off            = 0,                                            \
                .imm            = IMM,                                          \
        })

#define BPF_MOV_REG(DST, SRC)                                                   \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_ALU64 | BPF_MOV | BPF_X,                  \
                .dst_reg        = DST,                                          \
                .src_reg        = SRC,                                          \
                .off            = 0,                                            \
                .imm            = 0,                                            \
        })

#define BPF_MOV_IMM(DST, IMM)                                                   \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_ALU64 | BPF_MOV | BPF_K,                  \
                .dst_reg        = DST,                                          \
                .src_reg        = 0,                                            \
                .off            = 0,                                            \
                .imm            = IMM,                                          \
        })

#define BPF_STX_MEM(SIZE, DST, SRC, OFF)                                        \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,           \
                .dst_reg        = DST,                                          \
                .src_reg        = SRC,                                          \
                .off            = OFF,                                          \
                .imm            = 0,                                            \
        })

#define BPF_JMP_REG(OP, DST, SRC, OFF)                                          \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_JMP | BPF_OP(OP) | BPF_X,                 \
                .dst_reg        = DST,                                          \
                .src_reg        = SRC,                                          \
                .off            = OFF,                                          \
                .imm            = 0,                                            \
        })

#define BPF_JMP_IMM(OP, DST, IMM, OFF)                                          \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_JMP | BPF_OP(OP) | BPF_K,                 \
                .dst_reg        = DST,                                          \
                .src_reg        = 0,                                            \
                .off            = OFF,                                          \
                .imm            = IMM,                                          \
        })

#define BPF_EMIT_CALL(FUNC)                                                     \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_JMP | BPF_CALL,                           \
                .dst_reg        = 0,                                            \
                .src_reg        = 0,                                            \
                .off            = 0,                                            \
                .imm            = FUNC,                                         \
        })

#define BPF_EXIT_INSN()                                                         \
        ((struct bpf_insn) {                                                    \
                .code           = BPF_JMP | BPF_EXIT,                           \
                .dst_reg        = 0,                                            \
                .src_reg        = 0,                                            \
                .off            = 0,                                            \
                .imm            = 0,                                            \
        })

static int n_acd_syscall_bpf(int cmd, union bpf_attr *attr, unsigned int size) {
        return (int)syscall(__NR_bpf, cmd, attr, size);
}

int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
        union bpf_attr attr;
        int mapfd;

        memset(&attr, 0, sizeof(attr));
        attr = (union bpf_attr){
                .map_type    = BPF_MAP_TYPE_HASH,
                .key_size    = sizeof(uint32_t),
                .value_size  = sizeof(uint8_t), /* values are never used, but must be set */
                .max_entries = max_entries,
        };

        mapfd = n_acd_syscall_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
        if (mapfd < 0)
                return -errno;

        *mapfdp = mapfd;
        return 0;
}

int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
        union bpf_attr attr;
        uint32_t addr = be32toh(addrp->s_addr);
        uint8_t _dummy = 0;
        int r;

        memset(&attr, 0, sizeof(attr));
        attr = (union bpf_attr){
                .map_fd = mapfd,
                .key    = (uint64_t)(unsigned long)&addr,
                .value  = (uint64_t)(unsigned long)&_dummy,
                .flags  = BPF_NOEXIST,
        };

        r = n_acd_syscall_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
        if (r < 0)
                return -errno;

        return 0;
}

int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
        uint32_t addr = be32toh(addrp->s_addr);
        union bpf_attr attr;
        int r;

        memset(&attr, 0, sizeof(attr));
        attr = (union bpf_attr){
                .map_fd = mapfd,
                .key    = (uint64_t)(unsigned long)&addr,
        };

        r = n_acd_syscall_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
        if (r < 0)
                return -errno;

        return 0;
}

int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
        const union {
                uint8_t u8[6];
                uint16_t u16[3];
                uint32_t u32[1];
        } mac = {
                .u8 = {
                        macp->ether_addr_octet[0],
                        macp->ether_addr_octet[1],
                        macp->ether_addr_octet[2],
                        macp->ether_addr_octet[3],
                        macp->ether_addr_octet[4],
                        macp->ether_addr_octet[5],
                },
        };
        struct bpf_insn prog[] = {
                /* for using BPF_LD_ABS r6 must point to the skb, currently in r1 */
                BPF_MOV_REG(6, 1),                                              /* r6 = r1 */

                /* drop the packet if it is too short */
                BPF_LDX_MEM(BPF_W, 0, 6, offsetof(struct __sk_buff, len)),      /* r0 = skb->len */
                BPF_JMP_IMM(BPF_JGE, 0, sizeof(struct ether_arp), 2),           /* if (r0 >= sizeof(ether_arp)) skip 2 */
                BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
                BPF_EXIT_INSN(),                                                /* return */

                /* drop the packet if the header is not as expected */
                BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_hrd)),         /* r0 = header type */
                BPF_JMP_IMM(BPF_JEQ, 0, ARPHRD_ETHER, 2),                       /* if (r0 == ethernet) skip 2 */
                BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
                BPF_EXIT_INSN(),                                                /* return */

                BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_pro)),         /* r0 = protocol */
                BPF_JMP_IMM(BPF_JEQ, 0, ETHERTYPE_IP, 2),                       /* if (r0 == IP) skip 2 */
                BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
                BPF_EXIT_INSN(),                                                /* return */

                BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_hln)),         /* r0 = hw addr length */
                BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct ether_addr), 2),          /* if (r0 == sizeof(ether_addr)) skip 2 */
                BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
                BPF_EXIT_INSN(),                                                /* return */

                BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_pln)),         /* r0 = protocol addr length */
                BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct in_addr), 2),             /* if (r0 == sizeof(in_addr)) skip 2 */
                BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
                BPF_EXIT_INSN(),                                                /* return */

                /* drop packets from our own mac address */
                BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_sha)),         /* r0 = first four bytes of packet mac address */
                BPF_JMP_IMM(BPF_JNE, 0, be32toh(mac.u32[0]), 4),                /* if (r0 != first four bytes of our mac address) skip 4 */
                BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_sha) + 4),     /* r0 = last two bytes of packet mac address */
                BPF_JMP_IMM(BPF_JNE, 0, be16toh(mac.u16[2]), 2),                /* if (r0 != last two bytes of our mac address) skip 2 */
                BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
                BPF_EXIT_INSN(),                                                /* return */

                /*
                 * We listen for two kinds of packets:
                 *  Conflicts)
                 *    These are requests or replies with the sender address not set to INADDR_ANY. The
                 *    conflicted address is the sender address, remember this in r7.
                 *  Probes)
                 *    These are requests with the sender address set to INADDR_ANY. The probed address
                 *    is the target address, remember this in r7.
                 *  Any other packets are dropped.
                 */
                BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_spa)),         /* r0 = sender ip address */
                BPF_JMP_IMM(BPF_JEQ, 0, 0, 7),                                  /* if (r0 == 0) skip 7 */
                BPF_MOV_REG(7, 0),                                              /* r7 = r0 */
                BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)),          /* r0 = operation */
                BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 3),                      /* if (r0 == request) skip 3 */
                BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REPLY, 2),                        /* if (r0 == reply) skip 2 */
                BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
                BPF_EXIT_INSN(),                                                /* return */
                BPF_JMP_IMM(BPF_JA, 0, 0, 6),                                   /* skip 6 */
                BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_tpa)),         /* r0 = target ip address */
                BPF_MOV_REG(7, 0),                                              /* r7 = r0 */
                BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)),          /* r0 = operation */
                BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 2),                      /* if (r0 == request) skip 2 */
                BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
                BPF_EXIT_INSN(),                                                /* return */

                /* check if the probe or conflict is for an address we are monitoring */
                BPF_STX_MEM(BPF_W, 10, 7, -4),                                  /* *(uint32_t*)fp - 4 = r7 */
                BPF_MOV_REG(2, 10),                                             /* r2 = fp */
                BPF_ALU_IMM(BPF_ADD, 2, -4),                                    /* r2 -= 4 */
                BPF_LD_MAP_FD(1, mapfd),                                        /* r1 = mapfd */
                BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),                        /* r0 = map_lookup_elem(r1, r2) */
                BPF_JMP_IMM(BPF_JNE, 0, 0, 2),                                  /* if (r0 != NULL) skip 2 */
                BPF_MOV_IMM(0, 0),                                              /* r0 = 0 */
                BPF_EXIT_INSN(),                                                /* return */

                /* return exactly the packet length*/
                BPF_MOV_IMM(0, sizeof(struct ether_arp)),                       /* r0 = sizeof(struct ether_arp) */
                BPF_EXIT_INSN(),                                                /* return */
        };
        union bpf_attr attr;
        int progfd;

        memset(&attr, 0, sizeof(attr));
        attr = (union bpf_attr){
                .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
                .insns     = (uint64_t)(unsigned long)prog,
                .insn_cnt  = sizeof(prog) / sizeof(*prog),
                .license   = (uint64_t)(unsigned long)"ASL",
        };

        progfd = n_acd_syscall_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
        if (progfd < 0)
                return -errno;

        *progfdp = progfd;
        return 0;
}