/* * IPv4 Address Conflict Detection * * This file implements the probe object. A probe is basically the * state-machine of a single ACD run. It takes an address to probe for, checks * for conflicts and then defends it once configured. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "n-acd.h" #include "n-acd-private.h" /* * These parameters and timing intervals are specified in RFC-5227. The * original values are: * * PROBE_NUM 3 * PROBE_WAIT 1s * PROBE_MIN 1s * PROBE_MAX 3s * ANNOUNCE_NUM 3 * ANNOUNCE_WAIT 2s * ANNOUNCE_INTERVAL 2s * MAX_CONFLICTS 10 * RATE_LIMIT_INTERVAL 60s * DEFEND_INTERVAL 10s * * If we assume a best-case and worst-case scenario for non-conflicted runs, we * end up with a runtime between 4s and 9s to finish the probe. Then it still * takes a fixed 4s to finish the announcements. * * RFC 5227 section 1.1: * [...] (Note that the values listed here are fixed constants; they are * not intended to be modifiable by implementers, operators, or end users. * These constants are given symbolic names here to facilitate the writing * of future standards that may want to reference this document with * different values for these named constants; however, at the present time * no such future standards exist.) [...] * * Unfortunately, no-one ever stepped up to write a "future standard" to revise * the timings. A 9s timeout for successful link setups is not acceptable today. * Hence, we will just go forward and ignore the proposed values. On both * wired and wireless local links round-trip latencies of below 3ms are common. * We require the caller to set a timeout multiplier, where 1 corresponds to a * total probe time between 0.5 ms and 1.0 ms. On modern networks a multiplier * of about 100 should be a reasonable default. To comply with the RFC select a * multiplier of 9000. */ #define N_ACD_RFC_PROBE_NUM (3) #define N_ACD_RFC_PROBE_WAIT_NSEC (UINT64_C(111111)) /* 1/9 ms */ #define N_ACD_RFC_PROBE_MIN_NSEC (UINT64_C(111111)) /* 1/9 ms */ #define N_ACD_RFC_PROBE_MAX_NSEC (UINT64_C(333333)) /* 3/9 ms */ #define N_ACD_RFC_ANNOUNCE_NUM (3) #define N_ACD_RFC_ANNOUNCE_WAIT_NSEC (UINT64_C(222222)) /* 2/9 ms */ #define N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC (UINT64_C(222222)) /* 2/9 ms */ #define N_ACD_RFC_MAX_CONFLICTS (10) #define N_ACD_RFC_RATE_LIMIT_INTERVAL_NSEC (UINT64_C(60000000000)) /* 60s */ #define N_ACD_RFC_DEFEND_INTERVAL_NSEC (UINT64_C(10000000000)) /* 10s */ /** * n_acd_probe_config_new() - create probe configuration * @configp: output argument for new probe configuration * * This creates a new probe configuration. It will be returned in @configp to * the caller, which upon return fully owns the object. * * A probe configuration collects parameters for probes. It never validates the * input, but this is left to the consumer of the configuration to do. * * Return: 0 on success, negative error code on failure. */ _c_public_ int n_acd_probe_config_new(NAcdProbeConfig **configp) { _c_cleanup_(n_acd_probe_config_freep) NAcdProbeConfig *config = NULL; config = malloc(sizeof(*config)); if (!config) return -ENOMEM; *config = (NAcdProbeConfig)N_ACD_PROBE_CONFIG_NULL(*config); *configp = config; config = NULL; return 0; } /** * n_acd_probe_config_free() - destroy probe configuration * @config: configuration to operate on, or NULL * * This destroys the probe configuration and all associated objects. If @config * is NULL, this is a no-op. * * Return: NULL is returned. */ _c_public_ NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config) { if (!config) return NULL; free(config); return NULL; } /** * n_acd_probe_config_set_ip() - set ip property * @config: configuration to operate on * @ip: ip to set * * This sets the IP property to the value `ip`. The address is copied into the * configuration object. No validation is performed. * * The IP property selects the IP address that a probe checks for. It is the * caller's responsibility to guarantee the address is valid and can be used. */ _c_public_ void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip) { config->ip = ip; } /** * n_acd_probe_config_set_timeout() - set timeout property * @config: configuration to operate on * @msecs: timeout to set, in milliseconds * * This sets the timeout to use for a conflict detection probe. The * specification default is provided as `N_ACD_TIMEOUT_RFC5227` and corresponds * to 9 seconds. * * If set to 0, conflict detection is skipped and the address is immediately * advertised and defended. * * Depending on the transport used, the API user should select a suitable * timeout. Since `ACD` only operates on the link layer, timeouts in the * hundreds of milliseconds range should be more than enough for any modern * network. Note that increasing this value directly affects the time it takes * to connect to a network, since an address should not be used unless conflict * detection finishes. * * Using the specification default is **discouraged**. It is way too slow and * not appropriate for modern networks. * * Default value is `N_ACD_TIMEOUT_RFC5227`. */ _c_public_ void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs) { config->timeout_msecs = msecs; } static void n_acd_probe_schedule(NAcdProbe *probe, uint64_t n_timeout, unsigned int n_jitter) { uint64_t n_time; timer_now(&probe->acd->timer, &n_time); n_time += n_timeout; /* * ACD specifies jitter values to reduce packet storms on the local * link. This call accepts the maximum relative jitter value in * nanoseconds as @n_jitter. We then use rand_r(3p) to get a * pseudo-random jitter on top of the real timeout given as @n_timeout. */ if (n_jitter) { uint64_t random; random = ((uint64_t)rand_r(&probe->acd->seed) << 32) | (uint64_t)rand_r(&probe->acd->seed); n_time += random % n_jitter; } timeout_schedule(&probe->timeout, &probe->acd->timer, n_time); } static void n_acd_probe_unschedule(NAcdProbe *probe) { timeout_unschedule(&probe->timeout); } static bool n_acd_probe_is_unique(NAcdProbe *probe) { NAcdProbe *sibling; if (!c_rbnode_is_linked(&probe->ip_node)) return false; sibling = c_rbnode_entry(c_rbnode_next(&probe->ip_node), NAcdProbe, ip_node); if (sibling && sibling->ip.s_addr == probe->ip.s_addr) return false; sibling = c_rbnode_entry(c_rbnode_prev(&probe->ip_node), NAcdProbe, ip_node); if (sibling && sibling->ip.s_addr == probe->ip.s_addr) return false; return true; } static int n_acd_probe_link(NAcdProbe *probe) { int r; /* * Make sure the kernel bpf map has space for at least one more * entry. */ r = n_acd_ensure_bpf_map_space(probe->acd); if (r) return r; /* * Link entry into context, indexed by its IP. Note that we allow * duplicates just fine. It is up to you to decide whether to avoid * duplicates, if you don't want them. Duplicates on the same context * do not conflict with each other, though. */ { CRBNode **slot, *parent; NAcdProbe *other; slot = &probe->acd->ip_tree.root; parent = NULL; while (*slot) { other = c_rbnode_entry(*slot, NAcdProbe, ip_node); parent = *slot; if (probe->ip.s_addr < other->ip.s_addr) slot = &(*slot)->left; else slot = &(*slot)->right; } c_rbtree_add(&probe->acd->ip_tree, parent, slot, &probe->ip_node); } /* * Add the ip address to the map, if it is not already there. */ if (n_acd_probe_is_unique(probe)) { r = n_acd_bpf_map_add(probe->acd->fd_bpf_map, &probe->ip); if (r) { /* * Make sure the IP address is linked in userspace iff * it is linked in the kernel. */ c_rbnode_unlink(&probe->ip_node); return r; } ++probe->acd->n_bpf_map; } return 0; } static void n_acd_probe_unlink(NAcdProbe *probe) { int r; /* * If this is the only probe for a given IP, remove the IP from the * kernel BPF map. */ if (n_acd_probe_is_unique(probe)) { r = n_acd_bpf_map_remove(probe->acd->fd_bpf_map, &probe->ip); c_assert(r >= 0); --probe->acd->n_bpf_map; } c_rbnode_unlink(&probe->ip_node); } int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config) { _c_cleanup_(n_acd_probe_freep) NAcdProbe *probe = NULL; int r; if (!config->ip.s_addr) return N_ACD_E_INVALID_ARGUMENT; probe = malloc(sizeof(*probe)); if (!probe) return -ENOMEM; *probe = (NAcdProbe)N_ACD_PROBE_NULL(*probe); probe->acd = n_acd_ref(acd); probe->ip = config->ip; /* * We use the provided timeout-length as multiplier for all our * timeouts. The provided timeout defines the maximum length of an * entire probe-interval until the first announcement. Given the * spec-provided parameters, this ends up as: * * PROBE_WAIT + PROBE_MAX + PROBE_MAX + ANNOUNCE_WAIT * = 1s + 3s + 3s + 2s * = 9s * * Hence, the default value for this timeout is 9000ms, which just * ends up matching the spec-provided values. * * What we now semantically do is divide this timeout by 1ns/1000000. * This first turns it into nanoseconds, then strips the unit by * turning it into a multiplier. However, rather than performing the * division here, we multiplier all our timeouts by 1000000 statically * at compile time. Therefore, we can use the user-provided timeout as * unmodified multiplier. No conversion necessary. */ probe->timeout_multiplier = config->timeout_msecs; r = n_acd_probe_link(probe); if (r) return r; /* * Now that everything is set up, we have to send the first probe. This * is done after ~PROBE_WAIT seconds, hence we schedule our timer. * In case no timeout-multiplier is set, we pretend we already sent all * probes successfully and schedule the timer so we proceed with the * announcements. We must schedule a fake timer there, since we are not * allowed to advance the state machine outside of n_acd_dispatch(). */ if (probe->timeout_multiplier) { probe->n_iteration = 0; n_acd_probe_schedule(probe, 0, probe->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_NSEC); } else { probe->n_iteration = N_ACD_RFC_PROBE_NUM; n_acd_probe_schedule(probe, 0, 0); } *probep = probe; probe = NULL; return 0; } /** * n_acd_probe_free() - destroy a probe * @probe: probe to operate on, or NULL * * This destroys the probe specified by @probe. All operations are immediately * ceded and all associated objects are released. * * If @probe is NULL, this is a no-op. * * This function will flush all events associated with @probe from the event * queue. That is, no events will be returned for this @probe anymore. * * Return: NULL is returned. */ _c_public_ NAcdProbe *n_acd_probe_free(NAcdProbe *probe) { NAcdEventNode *node, *t_node; if (!probe) return NULL; c_list_for_each_entry_safe(node, t_node, &probe->event_list, probe_link) n_acd_event_node_free(node); n_acd_probe_unschedule(probe); n_acd_probe_unlink(probe); probe->acd = n_acd_unref(probe->acd); free(probe); return NULL; } int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event) { _c_cleanup_(n_acd_event_node_freep) NAcdEventNode *node = NULL; int r; r = n_acd_raise(probe->acd, &node, event); if (r) return r; switch (event) { case N_ACD_EVENT_READY: node->event.ready.probe = probe; break; case N_ACD_EVENT_USED: node->event.used.probe = probe; break; case N_ACD_EVENT_DEFENDED: node->event.defended.probe = probe; break; case N_ACD_EVENT_CONFLICT: node->event.conflict.probe = probe; break; default: c_assert(0); return -ENOTRECOVERABLE; } c_list_link_tail(&probe->event_list, &node->probe_link); if (nodep) *nodep = node; node = NULL; return 0; } int n_acd_probe_handle_timeout(NAcdProbe *probe) { int r; switch (probe->state) { case N_ACD_PROBE_STATE_PROBING: /* * We are still PROBING. We send 3 probes with a random timeout * scheduled between each. If, after a fixed timeout, we did * not receive any conflict we consider the probing successful. */ if (probe->n_iteration < N_ACD_RFC_PROBE_NUM) { /* * We have not sent all 3 probes, yet. A timer fired, * so we are ready to send the next probe. If this is * the third probe, schedule a timer for ANNOUNCE_WAIT * to give other peers a chance to answer. If this is * not the third probe, wait between PROBE_MIN and * PROBE_MAX for the next probe. */ r = n_acd_send(probe->acd, &probe->ip, NULL); if (r) { if (r != -N_ACD_E_DROPPED) return r; /* * Packet was dropped, and we know about it. It * never reached the network. Reasons are * manifold, and n_acd_send() raises events if * necessary. * From a probe-perspective, we simply pretend * we never sent the probe and schedule a * timeout for the next probe, effectively * doubling a single probe-interval. */ } else { /* Successfully sent, so advance counter. */ ++probe->n_iteration; } if (probe->n_iteration < N_ACD_RFC_PROBE_NUM) n_acd_probe_schedule(probe, probe->timeout_multiplier * N_ACD_RFC_PROBE_MIN_NSEC, probe->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_NSEC - N_ACD_RFC_PROBE_MIN_NSEC)); else n_acd_probe_schedule(probe, probe->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_NSEC, 0); } else { /* * All 3 probes succeeded and we waited enough to * consider this address usable by now. Do not announce * the address, yet. We must first give the caller a * chance to configure the address (so they can answer * ARP requests), before announcing it. */ r = n_acd_probe_raise(probe, NULL, N_ACD_EVENT_READY); if (r) return r; probe->state = N_ACD_PROBE_STATE_CONFIGURING; } break; case N_ACD_PROBE_STATE_ANNOUNCING: /* * We are ANNOUNCING, meaning the caller configured the address * on the interface and is actively using it. We send 3 * announcements out, in a short interval, and then just * perform passive conflict detection. * Note that once all 3 announcements are sent, we no longer * schedule a timer, so this part should not trigger, anymore. */ r = n_acd_send(probe->acd, &probe->ip, &probe->ip); if (r) { if (r != -N_ACD_E_DROPPED) return r; /* * See above in STATE_PROBING for details. We know the * packet was never sent, so we simply try again after * extending the timer. */ } else { /* Successfully sent, so advance counter. */ ++probe->n_iteration; } if (probe->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) { /* * Announcements are always scheduled according to the * time-intervals specified in the spec. We always use * the RFC5227-mandated multiplier. * If you reconsider this, note that timeout_multiplier * might be 0 here. */ n_acd_probe_schedule(probe, N_ACD_TIMEOUT_RFC5227 * N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC, 0); } break; case N_ACD_PROBE_STATE_CONFIGURING: case N_ACD_PROBE_STATE_FAILED: default: /* * There are no timeouts in these states. If we trigger one, * something is fishy. */ c_assert(0); return -ENOTRECOVERABLE; } return 0; } int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict) { NAcdEventNode *node; uint64_t now; int r; timer_now(&probe->acd->timer, &now); switch (probe->state) { case N_ACD_PROBE_STATE_PROBING: /* * Regardless whether this is a hard or soft conflict, we must * treat this as a probe failure. That is, notify the caller of * the conflict and wait for further instructions. We do not * react to this, until the caller tells us what to do, but we * do stop sending further probes. */ r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_USED); if (r) return r; node->event.used.sender = node->sender; node->event.used.n_sender = ETH_ALEN; memcpy(node->sender, packet->arp_sha, ETH_ALEN); n_acd_probe_unschedule(probe); n_acd_probe_unlink(probe); probe->state = N_ACD_PROBE_STATE_FAILED; break; case N_ACD_PROBE_STATE_CONFIGURING: /* * We are waiting for the caller to configure the interface and * start ANNOUNCING. In this state, we cannot defend the * address as that would indicate that it is ready to be used, * and we cannot signal CONFLICT or USED as the caller may * already have started to use the address (and may have * configured the engine to always defend it, which means they * should be able to rely on never losing it after READY). * Simply drop the event, and rely on the anticipated ANNOUNCE * to trigger it again. */ break; case N_ACD_PROBE_STATE_ANNOUNCING: { /* * We were already instructed to announce the address, which * means the address is configured and in use. Hence, the * caller is responsible to serve regular ARP queries. Meaning, * we can ignore any soft conflicts (other peers doing ACD). * * But if we see a hard-conflict, we either defend the address * according to the caller's instructions, or we report the * conflict and bail out. */ bool conflict = false, rate_limited = false; if (!hard_conflict) break; rate_limited = now < probe->last_defend + N_ACD_RFC_DEFEND_INTERVAL_NSEC; switch (probe->defend) { case N_ACD_DEFEND_NEVER: conflict = true; break; case N_ACD_DEFEND_ONCE: if (rate_limited) { conflict = true; break; } /* fallthrough */ case N_ACD_DEFEND_ALWAYS: if (!rate_limited) { r = n_acd_send(probe->acd, &probe->ip, &probe->ip); if (r) { if (r != -N_ACD_E_DROPPED) return r; if (probe->defend == N_ACD_DEFEND_ONCE) { conflict = true; break; } } if (r != -N_ACD_E_DROPPED) probe->last_defend = now; } r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_DEFENDED); if (r) return r; node->event.defended.sender = node->sender; node->event.defended.n_sender = ETH_ALEN; memcpy(node->sender, packet->arp_sha, ETH_ALEN); break; } if (conflict) { r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_CONFLICT); if (r) return r; node->event.conflict.sender = node->sender; node->event.conflict.n_sender = ETH_ALEN; memcpy(node->sender, packet->arp_sha, ETH_ALEN); n_acd_probe_unschedule(probe); n_acd_probe_unlink(probe); probe->state = N_ACD_PROBE_STATE_FAILED; } break; } case N_ACD_PROBE_STATE_FAILED: default: /* * We are not listening for packets in these states. If we receive one, * something is fishy. */ c_assert(0); return -ENOTRECOVERABLE; } return 0; } /** * n_acd_probe_set_userdata - set userdata * @probe: probe to operate on * @userdata: userdata pointer * * This can be used to set a caller-controlled user-data pointer on @probe. The * value of the pointer is never inspected or used by `n-acd` and is fully * under control of the caller. * * The default value is NULL. */ _c_public_ void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata) { probe->userdata = userdata; } /** * n_acd_probe_get_userdata - get userdata * @probe: probe to operate on * * This queries the userdata pointer that was previously set through * n_acd_probe_set_userdata(). * * The default value is NULL. * * Return: The stored userdata pointer is returned. */ _c_public_ void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap) { *userdatap = probe->userdata; } /** * n_acd_probe_announce() - announce the configured IP address * @probe: probe to operate on * @defend: defense policy * * Announce the IP address on the local link, and start defending it according * to the given policy, which mut be one of N_ACD_DEFEND_ONCE, * N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS. * * This must be called in response to an N_ACD_EVENT_READY event, and only * after the given address has been configured on the given network interface. * * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defense policy * is invalid, negative error code on failure. */ _c_public_ int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend) { if (defend >= _N_ACD_DEFEND_N) return N_ACD_E_INVALID_ARGUMENT; probe->state = N_ACD_PROBE_STATE_ANNOUNCING; probe->defend = defend; probe->n_iteration = 0; /* * We must schedule a fake-timeout, since we are not allowed to * advance the state-machine outside of n_acd_dispatch(). */ n_acd_probe_schedule(probe, 0, 0); return 0; }