/* * IPv4 Address Conflict Detection * * This file contains the main context initialization and management functions, * as well as a bunch of utilities used through the n-acd modules. */ /** * DOC: IPv4 Address Conflict Detection * * The `n-acd` project implements the IPv4 Address Conflict Detection protocol * as defined in RFC-5227. The protocol originates in the IPv4 Link Local * Address selection but was later on generalized and resulted in `ACD`. The * idea is to use `ARP` to query a link for an address to see whether it * already exists on the network, as well as defending an address that is in * use on a network interface. Furthermore, `ACD` provides passive diagnostics * for administrators, as it will detect address conflicts automatically, which * then can be logged or shown to a user. * * The main context object of `n-acd` is the `NAcd` structure. It is a passive * ref-counted context object which drives `ACD` probes running on it. A * context is specific to a linux network device and transport. If multiple * network devices are used, then separate `NAcd` contexts must be deployed. * * The `NAcdProbe` object drives a single `ACD` state-machine. A probe is * created on an `NAcd` context by providing an address to probe for. The probe * will then raise notifications whether the address conflict detection found * something, or whether the address is ready to be used. Optionally, the probe * will then enter into passive mode and defend the address as long as it is * kept active. * * Note that the `n-acd` project only implements the networking protocol. It * never queries or modifies network interfaces. It completely relies on the * API user to react to notifications and update network interfaces * respectively. `n-acd` uses an event-mechanism on every context object. All * events raise by any probe or operation on a given context will queue all * events on that context object. The event-queue can then be drained by the * API user. All events are properly asynchronous and designed in a way that no * synchronous reaction to any event is required. That is, the events are * carefully designed to allow forwarding via IPC (or even networks) to a * controller that handles them and specifies how to react. Furthermore, none * of the function calls of `n-acd` require synchronous error handling. * Instead, functions only ever return values on fatal errors. Everything else * is queued as events, thus guaranteeing that synchronous handling of return * values is not required. Exceptions are functions that do not affect internal * state or do not have an associated context object. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "n-acd.h" #include "n-acd-private.h" enum { N_ACD_EPOLL_TIMER, N_ACD_EPOLL_SOCKET, }; static int n_acd_get_random(unsigned int *random) { uint8_t hash_seed[] = { 0x3a, 0x0c, 0xa6, 0xdd, 0x44, 0xef, 0x5f, 0x7a, 0x5e, 0xd7, 0x25, 0x37, 0xbf, 0x4e, 0x80, 0xa1, }; CSipHash hash = C_SIPHASH_NULL; struct timespec ts; const uint8_t *p; int r; /* * We need random jitter for all timeouts when handling ARP probes. Use * AT_RANDOM to get a seed for rand_r(3p), if available (should always * be available on linux). See the time-out scheduler for details. * Additionally, we include the current time in the seed. This avoids * using the same jitter in case you run multiple ACD engines in the * same process. Lastly, the seed is hashed with SipHash24 to avoid * exposing the value of AT_RANDOM on the network. */ c_siphash_init(&hash, hash_seed); p = (const uint8_t *)getauxval(AT_RANDOM); if (p) c_siphash_append(&hash, p, 16); r = clock_gettime(CLOCK_MONOTONIC, &ts); if (r < 0) return -c_errno(); c_siphash_append(&hash, (const uint8_t *)&ts.tv_sec, sizeof(ts.tv_sec)); c_siphash_append(&hash, (const uint8_t *)&ts.tv_nsec, sizeof(ts.tv_nsec)); *random = c_siphash_finalize(&hash); return 0; } static int n_acd_socket_new(int *fdp, int fd_bpf_prog, NAcdConfig *config) { const struct sockaddr_ll address = { .sll_family = AF_PACKET, .sll_protocol = htobe16(ETH_P_ARP), .sll_ifindex = config->ifindex, .sll_halen = ETH_ALEN, .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, }; int r, s = -1; s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); if (s < 0) { r = -c_errno(); goto error; } if (fd_bpf_prog >= 0) { r = setsockopt(s, SOL_SOCKET, SO_ATTACH_BPF, &fd_bpf_prog, sizeof(fd_bpf_prog)); if (r < 0) { r = -c_errno(); goto error; } } r = bind(s, (struct sockaddr *)&address, sizeof(address)); if (r < 0) { r = -c_errno(); goto error; } *fdp = s; s = -1; return 0; error: if (s >= 0) close(s); return r; } /** * n_acd_config_new() - create configuration object * @configp: output argument for new configuration * * This creates a new configuration object and provides it to the caller. The * object is fully owned by the caller upon function return. * * A configuration object is a passive structure that is used to collect * information that is then passed to a constructor or other function. A * configuration never validates the data, but it is up to the consumer of a * configuration to do that. * * Return: 0 on success, negative error code on failure. */ _c_public_ int n_acd_config_new(NAcdConfig **configp) { _c_cleanup_(n_acd_config_freep) NAcdConfig *config = NULL; config = malloc(sizeof(*config)); if (!config) return -ENOMEM; *config = (NAcdConfig)N_ACD_CONFIG_NULL(*config); *configp = config; config = NULL; return 0; } /** * n_acd_config_free() - destroy configuration object * @config: configuration to operate on, or NULL * * This destroys the configuration object @config. If @config is NULL, this is * a no-op. * * Return: NULL is returned. */ _c_public_ NAcdConfig *n_acd_config_free(NAcdConfig *config) { if (!config) return NULL; free(config); return NULL; } /** * n_acd_config_set_ifindex() - set ifindex property * @config: configuration to operate on * @ifindex: ifindex to set * * This sets the @ifindex property of the configuration object. Any previous * value is overwritten. * * A valid ifindex is a 32bit integer greater than 0. Any other value is * treated as unspecified. * * The ifindex corresponds to the interface index provided by the linux kernel. * It specifies the network device to be used. */ _c_public_ void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex) { config->ifindex = ifindex; } /** * n_acd_config_set_transport() - set transport property * @config: configuration to operate on * @transport: transport to set * * This specifies the transport to use. A transport must be one of the * `N_ACD_TRANSPORT_*` identifiers. It selects which transport protocol `n-acd` * will run on. */ _c_public_ void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport) { config->transport = transport; } /** * n_acd_config_set_mac() - set mac property * @config: configuration to operate on * @mac: mac to set * * This specifies the hardware address (also referred to as `MAC Address`) to * use. Any hardware address can be specified. It is the caller's * responsibility to make sure the address can actually be used. * * The address in @mac is copied into @config. It does not have to be retained * by the caller. */ _c_public_ void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac) { /* * We truncate the address at the maximum we support. We still remember * the original length, so any consumer of this configuration can then * complain about an unsupported address length. This allows us to * avoid a memory allocation here and having to return `int`. */ config->n_mac = n_mac; memcpy(config->mac, mac, n_mac > ETH_ALEN ? ETH_ALEN : n_mac); } int n_acd_event_node_new(NAcdEventNode **nodep) { NAcdEventNode *node; node = malloc(sizeof(*node)); if (!node) return -ENOMEM; *node = (NAcdEventNode)N_ACD_EVENT_NODE_NULL(*node); *nodep = node; return 0; } NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node) { if (!node) return NULL; c_list_unlink(&node->probe_link); c_list_unlink(&node->acd_link); free(node); return NULL; } int n_acd_ensure_bpf_map_space(NAcd *acd) { NAcdProbe *probe; _c_cleanup_(c_closep) int fd_map = -1, fd_prog = -1; size_t max_map; int r; if (acd->n_bpf_map < acd->max_bpf_map) return 0; max_map = 2 * acd->max_bpf_map; r = n_acd_bpf_map_create(&fd_map, max_map); if (r) return r; c_rbtree_for_each_entry(probe, &acd->ip_tree, ip_node) { r = n_acd_bpf_map_add(fd_map, &probe->ip); if (r) return r; } r = n_acd_bpf_compile(&fd_prog, fd_map, (struct ether_addr*) acd->mac); if (r) return r; if (fd_prog >= 0) { r = setsockopt(acd->fd_socket, SOL_SOCKET, SO_ATTACH_BPF, &fd_prog, sizeof(fd_prog)); if (r) return -c_errno(); } if (acd->fd_bpf_map >= 0) close(acd->fd_bpf_map); acd->fd_bpf_map = fd_map; fd_map = -1; acd->max_bpf_map = max_map; return 0; } /** * n_acd_new() - create a new ACD context * @acdp: output argument for new context object * @config: configuration parameters * * Create a new ACD context and return it in @acdp. The configuration @config * must be initialized by the caller and must specify a valid network * interface, transport mechanism, as well as hardware address compatible with * the selected transport. The configuration is copied into the context. The * @config object thus does not have to be retained by the caller. * * Return: 0 on success, negative error code on failure. */ _c_public_ int n_acd_new(NAcd **acdp, NAcdConfig *config) { _c_cleanup_(n_acd_unrefp) NAcd *acd = NULL; _c_cleanup_(c_closep) int fd_bpf_prog = -1; int r; if (config->ifindex <= 0 || config->transport != N_ACD_TRANSPORT_ETHERNET || config->n_mac != ETH_ALEN || !memcmp(config->mac, (uint8_t[ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ETH_ALEN)) return N_ACD_E_INVALID_ARGUMENT; acd = malloc(sizeof(*acd)); if (!acd) return -ENOMEM; *acd = (NAcd)N_ACD_NULL(*acd); acd->ifindex = config->ifindex; memcpy(acd->mac, config->mac, ETH_ALEN); r = n_acd_get_random(&acd->seed); if (r) return r; acd->fd_epoll = epoll_create1(EPOLL_CLOEXEC); if (acd->fd_epoll < 0) return -c_errno(); r = timer_init(&acd->timer); if (r < 0) return r; acd->max_bpf_map = 8; r = n_acd_bpf_map_create(&acd->fd_bpf_map, acd->max_bpf_map); if (r) return r; r = n_acd_bpf_compile(&fd_bpf_prog, acd->fd_bpf_map, (struct ether_addr*) acd->mac); if (r) return r; r = n_acd_socket_new(&acd->fd_socket, fd_bpf_prog, config); if (r) return r; r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->timer.fd, &(struct epoll_event){ .events = EPOLLIN, .data.u32 = N_ACD_EPOLL_TIMER, }); if (r < 0) return -c_errno(); r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->fd_socket, &(struct epoll_event){ .events = EPOLLIN, .data.u32 = N_ACD_EPOLL_SOCKET, }); if (r < 0) return -c_errno(); *acdp = acd; acd = NULL; return 0; } static void n_acd_free_internal(NAcd *acd) { NAcdEventNode *node, *t_node; if (!acd) return; c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link) n_acd_event_node_free(node); c_assert(c_rbtree_is_empty(&acd->ip_tree)); if (acd->fd_socket >= 0) { c_assert(acd->fd_epoll >= 0); epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->fd_socket, NULL); close(acd->fd_socket); acd->fd_socket = -1; } if (acd->fd_bpf_map >= 0) { close(acd->fd_bpf_map); acd->fd_bpf_map = -1; } if (acd->timer.fd >= 0) { c_assert(acd->fd_epoll >= 0); epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->timer.fd, NULL); timer_deinit(&acd->timer); } if (acd->fd_epoll >= 0) { close(acd->fd_epoll); acd->fd_epoll = -1; } free(acd); } /** * n_acd_ref() - acquire reference * @acd: context to operate on, or NULL * * This acquires a single reference to the context specified as @acd. If @acd * is NULL, this is a no-op. * * Return: @acd is returned. */ _c_public_ NAcd *n_acd_ref(NAcd *acd) { if (acd) ++acd->n_refs; return acd; } /** * n_acd_unref() - release reference * @acd: context to operate on, or NULL * * This releases a single reference to the context @acd. If this is the last * reference, the context is torn down and deallocated. * * Return: NULL is returned. */ _c_public_ NAcd *n_acd_unref(NAcd *acd) { if (acd && !--acd->n_refs) n_acd_free_internal(acd); return NULL; } int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event) { NAcdEventNode *node; int r; r = n_acd_event_node_new(&node); if (r) return r; node->event.event = event; c_list_link_tail(&acd->event_list, &node->acd_link); if (nodep) *nodep = node; return 0; } int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa) { struct sockaddr_ll address = { .sll_family = AF_PACKET, .sll_protocol = htobe16(ETH_P_ARP), .sll_ifindex = acd->ifindex, .sll_halen = ETH_ALEN, .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, }; struct ether_arp arp = { .ea_hdr = { .ar_hrd = htobe16(ARPHRD_ETHER), .ar_pro = htobe16(ETHERTYPE_IP), .ar_hln = sizeof(acd->mac), .ar_pln = sizeof(uint32_t), .ar_op = htobe16(ARPOP_REQUEST), }, }; ssize_t l; int r; memcpy(arp.arp_sha, acd->mac, sizeof(acd->mac)); memcpy(arp.arp_tpa, &tpa->s_addr, sizeof(uint32_t)); if (spa) memcpy(arp.arp_spa, &spa->s_addr, sizeof(spa->s_addr)); l = sendto(acd->fd_socket, &arp, sizeof(arp), MSG_NOSIGNAL, (struct sockaddr *)&address, sizeof(address)); if (l < 0) { if (errno == EAGAIN || errno == ENOBUFS) { /* * We never maintain outgoing queues. We rely on the * network device to do that for us. In case the queues * are full, or the kernel refuses to queue the packet * for other reasons, we must tell our caller that the * packet was dropped. */ return N_ACD_E_DROPPED; } else if (errno == ENETDOWN || errno == ENXIO) { /* * These errors happen if the network device went down * or was actually removed. We always propagate this as * event, so the user can react accordingly (similarly * to the recvmmsg(2) handler). In case the user does * not immediately react, we also tell our caller that * the packet was dropped, so we don't erroneously * treat this as success. */ r = n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN); if (r) return r; return N_ACD_E_DROPPED; } /* * Random network error. We treat this as fatal and propagate * the error, so it is noticed and can be investigated. */ return -c_errno(); } else if (l != (ssize_t)sizeof(arp)) { /* * Ugh, the kernel modified the packet. This is unexpected. We * consider the packet lost. */ return N_ACD_E_DROPPED; } return 0; } /** * n_acd_get_fd() - get pollable file descriptor * @acd: context object to operate on * @fdp: output argument for file descriptor * * This returns the backing file-descriptor of the context object @acd. The * file-descriptor is owned by @acd and valid as long as @acd is. The * file-descriptor never changes, so it can be cached by the caller as long as * they hold a reference to @acd. * * The file-descriptor is internal to the @acd context and should not be * modified by the caller. It is only exposed to allow the caller to poll on * it. Whenever the file-descriptor polls readable, n_acd_dispatch() should be * called. * * Currently, the file-descriptor is an epoll-fd. */ _c_public_ void n_acd_get_fd(NAcd *acd, int *fdp) { *fdp = acd->fd_epoll; } static int n_acd_handle_timeout(NAcd *acd) { NAcdProbe *probe; uint64_t now; int r; /* * Read the current time once, and handle all timouts that triggered * before the current time. Rereading the current time in each loop * might risk creating a live-lock, and the fact that we read the * time after reading the timer guarantees that the timeout which * woke us up is hanlded. * * When there are no more timeouts to handle at the given time, we * rearm the timer to potentially wake us up again in the future. */ timer_now(&acd->timer, &now); for (;;) { Timeout *timeout; r = timer_pop_timeout(&acd->timer, now, &timeout); if (r < 0) { return r; } else if (!timeout) { /* * There are no more timeouts pending before @now. Rearm * the timer to fire again at the next timeout. */ timer_rearm(&acd->timer); break; } probe = (void *)timeout - offsetof(NAcdProbe, timeout); r = n_acd_probe_handle_timeout(probe); if (r) return r; } return 0; } static int n_acd_handle_packet(NAcd *acd, struct ether_arp *packet) { bool hard_conflict; NAcdProbe *probe; uint32_t addr; CRBNode *node; int r; /* * We are interested in 2 kinds of ARP messages: * * 1) Someone who is *NOT* us sends *ANY* ARP message with our IP * address as sender. This is never good, because it implies an * address conflict. * We call this a hard-conflict. * * 2) Someone who is *NOT* us sends an ARP REQUEST without any sender * IP, but our IP as target. This implies someone else performs an * ARP Probe with our address. This also implies a conflict, but * one that can be resolved by responding to the probe. * We call this a soft-conflict. * * We are never interested in any other ARP message. The kernel already * deals with everything else, hence, we can silently ignore those. * * Now, we simply check whether a sender-address is set. This allows us * to distinguish both cases. We then check further conditions, so we * can bail out early if neither is the case. * * Lastly, we perform a lookup in our probe-set to check whether the * address actually matches, so we can let these probes dispatch the * message. Note that we allow duplicate probes, so we need to dispatch * each matching probe, not just one. */ if (memcmp(packet->arp_spa, (uint8_t[4]){ }, sizeof(packet->arp_spa))) { memcpy(&addr, packet->arp_spa, sizeof(addr)); hard_conflict = true; } else if (packet->ea_hdr.ar_op == htobe16(ARPOP_REQUEST)) { memcpy(&addr, packet->arp_tpa, sizeof(addr)); hard_conflict = false; } else { /* * The BPF filter will not let through any other packet. */ return -EIO; } /* Find top-most node that matches @addr. */ node = acd->ip_tree.root; while (node) { probe = c_rbnode_entry(node, NAcdProbe, ip_node); if (addr < probe->ip.s_addr) node = node->left; else if (addr > probe->ip.s_addr) node = node->right; else break; } /* * If the address is unknown, we drop the package. This might happen if * the kernel queued the packet and passed the BPF filter, but we * modified the set before dequeuing the message. */ if (!node) return 0; /* Forward to left-most child that still matches @addr. */ while (node->left && addr == c_rbnode_entry(node->left, NAcdProbe, ip_node)->ip.s_addr) node = node->left; /* Iterate all matching entries in-order. */ do { probe = c_rbnode_entry(node, NAcdProbe, ip_node); r = n_acd_probe_handle_packet(probe, packet, hard_conflict); if (r) return r; node = c_rbnode_next(node); } while (node && addr == c_rbnode_entry(node, NAcdProbe, ip_node)->ip.s_addr); return 0; } static int n_acd_dispatch_timer(NAcd *acd, struct epoll_event *event) { int r; if (event->events & (EPOLLHUP | EPOLLERR)) { /* * There is no way to handle either gracefully. If we ignored * them, we would busy-loop, so lets rather forward the error * to the caller. */ return -EIO; } if (event->events & EPOLLIN) { r = timer_read(&acd->timer); if (r <= 0) return r; c_assert(r == TIMER_E_TRIGGERED); /* * A timer triggered, handle all pending timeouts at a given * point in time. There can only be a finite number of pending * timeouts, any new ones will be in the future, so not handled * now, but guaranteed to wake us up again when they do trigger. */ r = n_acd_handle_timeout(acd); if (r) return r; } return 0; } static bool n_acd_packet_is_valid(NAcd *acd, void *packet, size_t n_packet) { struct ether_arp *arp; /* * The eBPF filter will ensure that this function always returns true, however, * this allows the eBPF filter to be an optional optimization which is necessary * on older kernels. * * See comments in n-acd-bpf.c for details. */ if (n_packet != sizeof(*arp)) return false; arp = packet; if (arp->arp_hrd != htobe16(ARPHRD_ETHER)) return false; if (arp->arp_pro != htobe16(ETHERTYPE_IP)) return false; if (arp->arp_hln != sizeof(struct ether_addr)) return false; if (arp->arp_pln != sizeof(struct in_addr)) return false; if (!memcmp(arp->arp_sha, acd->mac, sizeof(struct ether_addr))) return false; if (memcmp(arp->arp_spa, &((struct in_addr) { INADDR_ANY }), sizeof(struct in_addr))) { if (arp->arp_op != htobe16(ARPOP_REQUEST) && arp->arp_op != htobe16(ARPOP_REPLY)) return false; } else if (arp->arp_op != htobe16(ARPOP_REQUEST)) { return false; } return true; } static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) { const size_t n_batch = 8; struct mmsghdr msgs[n_batch]; struct iovec iovecs[n_batch]; struct ether_arp data[n_batch]; size_t i; int r, n; for (i = 0; i < n_batch; ++i) { iovecs[i].iov_base = data + i; iovecs[i].iov_len = sizeof(data[i]); msgs[i].msg_hdr = (struct msghdr){ .msg_iov = iovecs + i, .msg_iovlen = 1, }; } /* * We always directly call into recvmmsg(2), regardless which EPOLL* * event is signalled. On sockets, the recv(2)-family of syscalls does * a suitable job of handling all possible scenarios and telling us * about it. Hence, lets take the easy route and always ask the kernel * about the current state. */ n = recvmmsg(acd->fd_socket, msgs, n_batch, 0, NULL); if (n < 0) { if (errno == ENETDOWN) { /* * We get ENETDOWN if the network-device goes down or * is removed. This error is temporary and only queued * once. Subsequent reads will simply return EAGAIN * until the device is up again and has data queued. * Usually, the caller should tear down all probes when * an interface goes down, but we leave it up to the * caller to decide what to do. We propagate the code * and continue. */ return n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN); } else if (errno == EAGAIN) { /* * There is no more data queued and we did not get * preempted. Everything is good to go. * As a safety-net against busy-looping, we do check * for HUP/ERR. Neither should be set, since they imply * error-dequeue behavior on all socket calls. Lets * fail hard if we trigger it, so we can investigate. */ if (event->events & (EPOLLHUP | EPOLLERR)) return -EIO; return 0; } else { /* * Something went wrong. Propagate the error-code, so * this can be investigated. */ return -c_errno(); } } else if (n >= (ssize_t)n_batch) { /* * If all buffers were filled with data, we cannot be sure that * there is nothing left to read. But to avoid starvation, we * cannot loop on this condition. Instead, we mark the context * as preempted so the caller can call us again. * Note that in level-triggered event-loops this condition can * be neglected, but in edge-triggered event-loops it is * crucial to forward this information. * * On the other hand, there are several conditions where the * kernel might return less batches than requested, but was * still preempted. However, all of those cases require the * preemption to have triggered a wakeup *after* we entered * recvmmsg(). Hence, even if we did not recognize the * preemption, an edge must have triggered and as such we will * handle the event on the next turn. */ acd->preempted = true; } for (i = 0; (ssize_t)i < n; ++i) { if (!n_acd_packet_is_valid(acd, data + i, msgs[i].msg_len)) continue; /* * Handle the packet. Bail out if something went wrong. Note * that this must be fatal errors, since we discard all other * packets that follow. */ r = n_acd_handle_packet(acd, data + i); if (r) return r; } return 0; } /** * n_acd_dispatch() - dispatch context * @acd: context object to operate on * * This dispatches the internal state-machine of all probes and operations * running on the context @acd. * * Any outside effect or event triggered by this dispatcher will be queued on * the event-queue of @acd. Whenever the dispatcher returns, the caller is * required to drain the event-queue via n_acd_pop_event() until it is empty. * * This function dispatches as many events as possible up to a static limit to * prevent stalling execution. If the static limit is reached, this function * will return with N_ACD_E_PREEMPTED, otherwise 0 is returned. In most cases * preemption can be ignored, because level-triggered event notification * handles it automatically. However, in case of edge-triggered event * mechanisms, the caller must make sure to call the dispatcher again. * * Return: 0 on success, N_ACD_E_PREEMPTED on preemption, negative error code * on failure. */ _c_public_ int n_acd_dispatch(NAcd *acd) { struct epoll_event events[2]; int n, i, r = 0; n = epoll_wait(acd->fd_epoll, events, sizeof(events) / sizeof(*events), 0); if (n < 0) { /* Linux never returns EINTR if `timeout == 0'. */ return -c_errno(); } acd->preempted = false; for (i = 0; i < n; ++i) { switch (events[i].data.u32) { case N_ACD_EPOLL_TIMER: r = n_acd_dispatch_timer(acd, events + i); break; case N_ACD_EPOLL_SOCKET: r = n_acd_dispatch_socket(acd, events + i); break; default: c_assert(0); r = 0; break; } if (r) return r; } return acd->preempted ? N_ACD_E_PREEMPTED : 0; } /** * n_acd_pop_event() - get the next pending event * @acd: context object to operate on * @eventp: output argument for the event * * Returns a pointer to the next pending event. The event is still owend by * the context, and is only valid until the next call to n_acd_pop_event() * or until the owning object is freed (either the ACD context or the indicated * probe object). * * An event either originates on the ACD context, or one of the configured * probes. If the event-type has a 'probe' pointer, it originated on the * indicated probe (which is *never* NULL), otherwise it originated on the * context. * * Users must call this function repeatedly until either an error is returned, * or the event-pointer is NULL. Wakeups on the epoll-fd are only guaranteed * for each batch of events. Hence, it is the callers responsibility to drain * the event-queue somehow after each call to n_acd_dispatch(). Note that * events can only be added by n_acd_dispatch(), hence, you cannot live-lock * when draining the event queue. * * The possible events are: * * N_ACD_EVENT_READY: A configured IP address was probed successfully * and is ready to be used. Once configured on the * interface, the caller must call n_acd_announce() * to announce and start defending the address. * * N_ACD_EVENT_USED: Someone is already using the IP address being * probed. The probe is put into stopped state and * should be freed by the caller. * * N_ACD_EVENT_DEFENDED: A conflict was detected for an announced IP * address, and the engine attempted to defend it. * This is purely informational, and no action is * required by the caller. * * N_ACD_EVENT_CONFLICT: A conflict was detected for an announced IP * address, and the probe was not able to defend * it (according to the configured policy). The * probe halted, the caller must stop using * the address immediately, and should free the probe. * * N_ACD_EVENT_DOWN: The specified network interface was put down. The * user is recommended to free *ALL* probes and * recreate them as soon as the interface is up again. * Note that this event is purely informational. The * probes will continue running, but all packets will * be blackholed, and no network packets are received, * until the network is back up again. Hence, from an * operational perspective, the legitimacy of the ACD * probes is lost and the user better re-probes all * addresses. * * Returns: 0 on success, negative error code on failure. The popped event is * returned in @eventp. If no event is pending, NULL is placed in * @eventp and 0 is returned. If an error is returned, @eventp is left * untouched. */ _c_public_ int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp) { NAcdEventNode *node, *t_node; c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link) { if (node->is_public) { n_acd_event_node_free(node); continue; } node->is_public = true; *eventp = &node->event; return 0; } *eventp = NULL; return 0; } /** * n_acd_probe() - start new probe * @acd: context object to operate on * @probep: output argument for new probe * @config: probe configuration * * This creates a new probe on the context @acd and returns the probe in * @probep. The configuration @config must provide valid probe parameters. At * least a valid IP address must be provided through the configuration. * * This function does not reject duplicate probes for the same address. It is * the caller's decision whether duplicates are allowed or not. But note that * duplicate probes on the same context will not conflict each other. That is, * running a probe for the same address twice on the same context will not * cause them to consider each other a duplicate. * * Probes are rather lightweight objects. They do not create any * file-descriptors or other kernel objects. Probes always re-use the * infrastructure provided by the context object @acd. This allows running many * probes simultaneously without exhausting resources. * * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT on invalid configuration * parameters, negative error code on failure. */ _c_public_ int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config) { return n_acd_probe_new(probep, acd, config); }