diff options
Diffstat (limited to 'epoll.c')
-rw-r--r-- | epoll.c | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/epoll.c b/epoll.c new file mode 100644 index 00000000..19a88a1f --- /dev/null +++ b/epoll.c @@ -0,0 +1,369 @@ +/* + * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <sys/types.h> +#include <sys/resource.h> +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#else +#include <sys/_time.h> +#endif +#include <sys/queue.h> +#include <sys/epoll.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +#include "event.h" +#include "evsignal.h" +#include "log.h" + +extern volatile sig_atomic_t evsignal_caught; + +/* due to limitations in the epoll interface, we need to keep track of + * all file descriptors outself. + */ +struct evepoll { + struct event *evread; + struct event *evwrite; +}; + +struct epollop { + struct evepoll *fds; + int nfds; + struct epoll_event *events; + int nevents; + int epfd; + sigset_t evsigmask; +}; + +void *epoll_init (void); +int epoll_add (void *, struct event *); +int epoll_del (void *, struct event *); +int epoll_recalc (struct event_base *, void *, int); +int epoll_dispatch (struct event_base *, void *, struct timeval *); +void epoll_dealloc (void *); + +struct eventop epollops = { + "epoll", + epoll_init, + epoll_add, + epoll_del, + epoll_recalc, + epoll_dispatch, + epoll_dealloc +}; + +#ifdef HAVE_SETFD +#define FD_CLOSEONEXEC(x) do { \ + if (fcntl(x, F_SETFD, 1) == -1) \ + event_warn("fcntl(%d, F_SETFD)", x); \ +} while (0) +#else +#define FD_CLOSEONEXEC(x) +#endif + +#define NEVENT 32000 + +void * +epoll_init(void) +{ + int epfd, nfiles = NEVENT; + struct rlimit rl; + struct epollop *epollop; + + /* Disable epollueue when this environment variable is set */ + if (getenv("EVENT_NOEPOLL")) + return (NULL); + + if (getrlimit(RLIMIT_NOFILE, &rl) == 0 && + rl.rlim_cur != RLIM_INFINITY) { + /* + * Solaris is somewhat retarded - it's important to drop + * backwards compatibility when making changes. So, don't + * dare to put rl.rlim_cur here. + */ + nfiles = rl.rlim_cur - 1; + } + + /* Initalize the kernel queue */ + + if ((epfd = epoll_create(nfiles)) == -1) { + event_warn("epoll_create"); + return (NULL); + } + + FD_CLOSEONEXEC(epfd); + + if (!(epollop = calloc(1, sizeof(struct epollop)))) + return (NULL); + + epollop->epfd = epfd; + + /* Initalize fields */ + epollop->events = malloc(nfiles * sizeof(struct epoll_event)); + if (epollop->events == NULL) { + free(epollop); + return (NULL); + } + epollop->nevents = nfiles; + + epollop->fds = calloc(nfiles, sizeof(struct evepoll)); + if (epollop->fds == NULL) { + free(epollop->events); + free(epollop); + return (NULL); + } + epollop->nfds = nfiles; + + evsignal_init(&epollop->evsigmask); + + return (epollop); +} + +int +epoll_recalc(struct event_base *base, void *arg, int max) +{ + struct epollop *epollop = arg; + + if (max > epollop->nfds) { + struct evepoll *fds; + int nfds; + + nfds = epollop->nfds; + while (nfds < max) + nfds <<= 1; + + fds = realloc(epollop->fds, nfds * sizeof(struct evepoll)); + if (fds == NULL) { + event_warn("realloc"); + return (-1); + } + epollop->fds = fds; + memset(fds + epollop->nfds, 0, + (nfds - epollop->nfds) * sizeof(struct evepoll)); + epollop->nfds = nfds; + } + + return (evsignal_recalc(&epollop->evsigmask)); +} + +int +epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv) +{ + struct epollop *epollop = arg; + struct epoll_event *events = epollop->events; + struct evepoll *evep; + int i, res, timeout; + + if (evsignal_deliver(&epollop->evsigmask) == -1) + return (-1); + + timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000; + res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout); + + if (evsignal_recalc(&epollop->evsigmask) == -1) + return (-1); + + if (res == -1) { + if (errno != EINTR) { + event_warn("epoll_wait"); + return (-1); + } + + evsignal_process(); + return (0); + } else if (evsignal_caught) + evsignal_process(); + + event_debug(("%s: epoll_wait reports %d", __func__, res)); + + for (i = 0; i < res; i++) { + int which = 0; + int what = events[i].events; + struct event *evread = NULL, *evwrite = NULL; + + evep = (struct evepoll *)events[i].data.ptr; + + if (what & EPOLLHUP) + what |= EPOLLIN | EPOLLOUT; + else if (what & EPOLLERR) + what |= EPOLLIN | EPOLLOUT; + + if (what & EPOLLIN) { + evread = evep->evread; + which |= EV_READ; + } + + if (what & EPOLLOUT) { + evwrite = evep->evwrite; + which |= EV_WRITE; + } + + if (!which) + continue; + + if (evread != NULL && !(evread->ev_events & EV_PERSIST)) + event_del(evread); + if (evwrite != NULL && evwrite != evread && + !(evwrite->ev_events & EV_PERSIST)) + event_del(evwrite); + + if (evread != NULL) + event_active(evread, EV_READ, 1); + if (evwrite != NULL) + event_active(evwrite, EV_WRITE, 1); + } + + return (0); +} + + +int +epoll_add(void *arg, struct event *ev) +{ + struct epollop *epollop = arg; + struct epoll_event epev = {0, {0}}; + struct evepoll *evep; + int fd, op, events; + + if (ev->ev_events & EV_SIGNAL) + return (evsignal_add(&epollop->evsigmask, ev)); + + fd = ev->ev_fd; + if (fd >= epollop->nfds) { + /* Extent the file descriptor array as necessary */ + if (epoll_recalc(ev->ev_base, epollop, fd) == -1) + return (-1); + } + evep = &epollop->fds[fd]; + op = EPOLL_CTL_ADD; + events = 0; + if (evep->evread != NULL) { + events |= EPOLLIN; + op = EPOLL_CTL_MOD; + } + if (evep->evwrite != NULL) { + events |= EPOLLOUT; + op = EPOLL_CTL_MOD; + } + + if (ev->ev_events & EV_READ) + events |= EPOLLIN; + if (ev->ev_events & EV_WRITE) + events |= EPOLLOUT; + + epev.data.ptr = evep; + epev.events = events; + if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1) + return (-1); + + /* Update events responsible */ + if (ev->ev_events & EV_READ) + evep->evread = ev; + if (ev->ev_events & EV_WRITE) + evep->evwrite = ev; + + return (0); +} + +int +epoll_del(void *arg, struct event *ev) +{ + struct epollop *epollop = arg; + struct epoll_event epev = {0, {0}}; + struct evepoll *evep; + int fd, events, op; + int needwritedelete = 1, needreaddelete = 1; + + if (ev->ev_events & EV_SIGNAL) + return (evsignal_del(&epollop->evsigmask, ev)); + + fd = ev->ev_fd; + if (fd >= epollop->nfds) + return (0); + evep = &epollop->fds[fd]; + + op = EPOLL_CTL_DEL; + events = 0; + + if (ev->ev_events & EV_READ) + events |= EPOLLIN; + if (ev->ev_events & EV_WRITE) + events |= EPOLLOUT; + + if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) { + if ((events & EPOLLIN) && evep->evwrite != NULL) { + needwritedelete = 0; + events = EPOLLOUT; + op = EPOLL_CTL_MOD; + } else if ((events & EPOLLOUT) && evep->evread != NULL) { + needreaddelete = 0; + events = EPOLLIN; + op = EPOLL_CTL_MOD; + } + } + + epev.events = events; + epev.data.ptr = evep; + + if (needreaddelete) + evep->evread = NULL; + if (needwritedelete) + evep->evwrite = NULL; + + if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1) + return (-1); + + return (0); +} + +void +epoll_dealloc(void *arg) +{ + struct epollop *epollop = arg; + + if (epollop->fds) + free(epollop->fds); + if (epollop->events) + free(epollop->events); + if (epollop->epfd >= 0) + close(epollop->epfd); + + memset(epollop, 0, sizeof(struct epollop)); + free(epollop); +} |