summaryrefslogtreecommitdiffstats
path: root/src/lib/ioloop-epoll.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/ioloop-epoll.c230
1 files changed, 230 insertions, 0 deletions
diff --git a/src/lib/ioloop-epoll.c b/src/lib/ioloop-epoll.c
new file mode 100644
index 0000000..ad41008
--- /dev/null
+++ b/src/lib/ioloop-epoll.c
@@ -0,0 +1,230 @@
+/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "sleep.h"
+#include "ioloop-private.h"
+#include "ioloop-iolist.h"
+
+#ifdef IOLOOP_EPOLL
+
+#include <sys/epoll.h>
+#include <unistd.h>
+
+struct ioloop_handler_context {
+ int epfd;
+
+ unsigned int deleted_count;
+ ARRAY(struct io_list *) fd_index;
+ ARRAY(struct epoll_event) events;
+};
+
+void io_loop_handler_init(struct ioloop *ioloop, unsigned int initial_fd_count)
+{
+ struct ioloop_handler_context *ctx;
+
+ ioloop->handler_context = ctx = i_new(struct ioloop_handler_context, 1);
+
+ i_array_init(&ctx->events, initial_fd_count);
+ i_array_init(&ctx->fd_index, initial_fd_count);
+
+ ctx->epfd = epoll_create(initial_fd_count);
+ if (ctx->epfd < 0) {
+ if (errno != EMFILE)
+ i_fatal("epoll_create(): %m");
+ else {
+ i_fatal("epoll_create(): %m (you may need to increase "
+ "/proc/sys/fs/epoll/max_user_instances)");
+ }
+ }
+ fd_close_on_exec(ctx->epfd, TRUE);
+}
+
+void io_loop_handler_deinit(struct ioloop *ioloop)
+{
+ struct ioloop_handler_context *ctx = ioloop->handler_context;
+ struct io_list **list;
+ unsigned int i, count;
+
+ list = array_get_modifiable(&ctx->fd_index, &count);
+ for (i = 0; i < count; i++)
+ i_free(list[i]);
+
+ if (close(ctx->epfd) < 0)
+ i_error("close(epoll) failed: %m");
+ array_free(&ioloop->handler_context->fd_index);
+ array_free(&ioloop->handler_context->events);
+ i_free(ioloop->handler_context);
+}
+
+#define IO_EPOLL_ERROR (EPOLLERR | EPOLLHUP)
+#define IO_EPOLL_INPUT (EPOLLIN | EPOLLPRI | IO_EPOLL_ERROR)
+#define IO_EPOLL_OUTPUT (EPOLLOUT | IO_EPOLL_ERROR)
+
+static int epoll_event_mask(struct io_list *list)
+{
+ int events = 0, i;
+ struct io_file *io;
+
+ for (i = 0; i < IOLOOP_IOLIST_IOS_PER_FD; i++) {
+ io = list->ios[i];
+
+ if (io == NULL)
+ continue;
+
+ if ((io->io.condition & IO_READ) != 0)
+ events |= IO_EPOLL_INPUT;
+ if ((io->io.condition & IO_WRITE) != 0)
+ events |= IO_EPOLL_OUTPUT;
+ if ((io->io.condition & IO_ERROR) != 0)
+ events |= IO_EPOLL_ERROR;
+ }
+
+ return events;
+}
+
+void io_loop_handle_add(struct io_file *io)
+{
+ struct ioloop_handler_context *ctx = io->io.ioloop->handler_context;
+ struct io_list **list;
+ struct epoll_event event;
+ int op;
+ bool first;
+
+ list = array_idx_get_space(&ctx->fd_index, io->fd);
+ if (*list == NULL)
+ *list = i_new(struct io_list, 1);
+
+ first = ioloop_iolist_add(*list, io);
+
+ i_zero(&event);
+ event.data.ptr = *list;
+ event.events = epoll_event_mask(*list);
+
+ op = first ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+
+ if (epoll_ctl(ctx->epfd, op, io->fd, &event) < 0) {
+ if (errno == EPERM && op == EPOLL_CTL_ADD) {
+ i_panic("epoll_ctl(add, %d) failed: %m "
+ "(fd doesn't support epoll%s)", io->fd,
+ io->fd != STDIN_FILENO ? "" :
+ " - instead of '<file', try 'cat file|'");
+ }
+ i_panic("epoll_ctl(%s, %d) failed: %m",
+ op == EPOLL_CTL_ADD ? "add" : "mod", io->fd);
+ }
+
+ if (first) {
+ /* allow epoll_wait() to return the maximum number of events
+ by keeping space allocated for each file descriptor */
+ if (ctx->deleted_count > 0)
+ ctx->deleted_count--;
+ else
+ array_append_zero(&ctx->events);
+ }
+}
+
+void io_loop_handle_remove(struct io_file *io, bool closed)
+{
+ struct ioloop_handler_context *ctx = io->io.ioloop->handler_context;
+ struct io_list **list;
+ struct epoll_event event;
+ int op;
+ bool last;
+
+ list = array_idx_modifiable(&ctx->fd_index, io->fd);
+ last = ioloop_iolist_del(*list, io);
+
+ if (!closed) {
+ i_zero(&event);
+ event.data.ptr = *list;
+ event.events = epoll_event_mask(*list);
+
+ op = last ? EPOLL_CTL_DEL : EPOLL_CTL_MOD;
+
+ if (epoll_ctl(ctx->epfd, op, io->fd, &event) < 0) {
+ const char *errstr = t_strdup_printf(
+ "epoll_ctl(%s, %d) failed: %m",
+ op == EPOLL_CTL_DEL ? "del" : "mod", io->fd);
+ if (errno != ENOSPC && errno != ENOMEM)
+ i_panic("%s", errstr);
+ else
+ i_error("%s", errstr);
+ }
+ }
+ if (last) {
+ /* since we're not freeing memory in any case, just increase
+ deleted counter so next handle_add() can just decrease it
+ instead of appending to the events array */
+ ctx->deleted_count++;
+ }
+ i_free(io);
+}
+
+void io_loop_handler_run_internal(struct ioloop *ioloop)
+{
+ struct ioloop_handler_context *ctx = ioloop->handler_context;
+ struct epoll_event *events;
+ const struct epoll_event *event;
+ struct io_list *list;
+ struct io_file *io;
+ struct timeval tv;
+ unsigned int events_count;
+ int msecs, ret, i, j;
+ bool call;
+
+ i_assert(ctx != NULL);
+
+ /* get the time left for next timeout task */
+ msecs = io_loop_run_get_wait_time(ioloop, &tv);
+
+ events = array_get_modifiable(&ctx->events, &events_count);
+ if (ioloop->io_files != NULL && events_count > ctx->deleted_count) {
+ ret = epoll_wait(ctx->epfd, events, events_count, msecs);
+ if (ret < 0 && errno != EINTR)
+ i_fatal("epoll_wait(): %m");
+ } else {
+ /* no I/Os, but we should have some timeouts.
+ just wait for them. */
+ i_assert(msecs >= 0);
+ i_sleep_intr_msecs(msecs);
+ ret = 0;
+ }
+
+ /* execute timeout handlers */
+ io_loop_handle_timeouts(ioloop);
+
+ if (!ioloop->running)
+ return;
+
+ for (i = 0; i < ret; i++) {
+ /* io_loop_handle_add() may cause events array reallocation,
+ so we have use array_idx() */
+ event = array_idx(&ctx->events, i);
+ list = event->data.ptr;
+
+ for (j = 0; j < IOLOOP_IOLIST_IOS_PER_FD; j++) {
+ io = list->ios[j];
+ if (io == NULL)
+ continue;
+
+ call = FALSE;
+ if ((event->events & (EPOLLHUP | EPOLLERR)) != 0)
+ call = TRUE;
+ else if ((io->io.condition & IO_READ) != 0)
+ call = (event->events & EPOLLIN) != 0;
+ else if ((io->io.condition & IO_WRITE) != 0)
+ call = (event->events & EPOLLOUT) != 0;
+ else if ((io->io.condition & IO_ERROR) != 0)
+ call = (event->events & IO_EPOLL_ERROR) != 0;
+
+ if (call) {
+ io_loop_call_io(&io->io);
+ if (!ioloop->running)
+ return;
+ }
+ }
+ }
+}
+
+#endif /* IOLOOP_EPOLL */