diff options
Diffstat (limited to '')
-rw-r--r-- | src/libsystemd/sd-journal/mmap-cache.c | 596 |
1 files changed, 596 insertions, 0 deletions
diff --git a/src/libsystemd/sd-journal/mmap-cache.c b/src/libsystemd/sd-journal/mmap-cache.c new file mode 100644 index 0000000..82407f9 --- /dev/null +++ b/src/libsystemd/sd-journal/mmap-cache.c @@ -0,0 +1,596 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include "alloc-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "hashmap.h" +#include "list.h" +#include "log.h" +#include "macro.h" +#include "memory-util.h" +#include "mmap-cache.h" +#include "sigbus.h" + +typedef struct Window Window; +typedef struct Context Context; + +struct Window { + MMapCache *cache; + + bool invalidated:1; + bool keep_always:1; + bool in_unused:1; + + void *ptr; + uint64_t offset; + size_t size; + + MMapFileDescriptor *fd; + + LIST_FIELDS(Window, by_fd); + LIST_FIELDS(Window, unused); + + LIST_HEAD(Context, contexts); +}; + +struct Context { + Window *window; + + LIST_FIELDS(Context, by_window); +}; + +struct MMapFileDescriptor { + MMapCache *cache; + int fd; + int prot; + bool sigbus; + LIST_HEAD(Window, windows); +}; + +struct MMapCache { + unsigned n_ref; + unsigned n_windows; + + unsigned n_context_cache_hit, n_window_list_hit, n_missed; + + Hashmap *fds; + + LIST_HEAD(Window, unused); + Window *last_unused; + + Context contexts[MMAP_CACHE_MAX_CONTEXTS]; +}; + +#define WINDOWS_MIN 64 + +#if ENABLE_DEBUG_MMAP_CACHE +/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */ +# define WINDOW_SIZE (page_size()) +#else +# define WINDOW_SIZE (8ULL*1024ULL*1024ULL) +#endif + +MMapCache* mmap_cache_new(void) { + MMapCache *m; + + m = new0(MMapCache, 1); + if (!m) + return NULL; + + m->n_ref = 1; + return m; +} + +static void window_unlink(Window *w) { + + assert(w); + + if (w->ptr) + munmap(w->ptr, w->size); + + if (w->fd) + LIST_REMOVE(by_fd, w->fd->windows, w); + + if (w->in_unused) { + if (w->cache->last_unused == w) + w->cache->last_unused = w->unused_prev; + + LIST_REMOVE(unused, w->cache->unused, w); + } + + LIST_FOREACH(by_window, c, w->contexts) { + assert(c->window == w); + c->window = NULL; + } +} + +static void window_invalidate(Window *w) { + assert(w); + assert(w->fd); + + if (w->invalidated) + return; + + /* Replace the window with anonymous pages. This is useful + * when we hit a SIGBUS and want to make sure the file cannot + * trigger any further SIGBUS, possibly overrunning the sigbus + * queue. */ + + assert_se(mmap(w->ptr, w->size, w->fd->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr); + w->invalidated = true; +} + +static void window_free(Window *w) { + assert(w); + + window_unlink(w); + w->cache->n_windows--; + free(w); +} + +_pure_ static bool window_matches(Window *w, uint64_t offset, size_t size) { + assert(w); + assert(size > 0); + + return + offset >= w->offset && + offset + size <= w->offset + w->size; +} + +_pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, uint64_t offset, size_t size) { + assert(w); + assert(f); + + return + w->fd == f && + window_matches(w, offset, size); +} + +static Window *window_add(MMapCache *m, MMapFileDescriptor *f, bool keep_always, uint64_t offset, size_t size, void *ptr) { + Window *w; + + assert(m); + assert(f); + + if (!m->last_unused || m->n_windows <= WINDOWS_MIN) { + + /* Allocate a new window */ + w = new(Window, 1); + if (!w) + return NULL; + m->n_windows++; + } else { + + /* Reuse an existing one */ + w = m->last_unused; + window_unlink(w); + } + + *w = (Window) { + .cache = m, + .fd = f, + .keep_always = keep_always, + .offset = offset, + .size = size, + .ptr = ptr, + }; + + LIST_PREPEND(by_fd, f->windows, w); + + return w; +} + +static void context_detach_window(MMapCache *m, Context *c) { + Window *w; + + assert(m); + assert(c); + + if (!c->window) + return; + + w = TAKE_PTR(c->window); + LIST_REMOVE(by_window, w->contexts, c); + + if (!w->contexts && !w->keep_always) { + /* Not used anymore? */ +#if ENABLE_DEBUG_MMAP_CACHE + /* Unmap unused windows immediately to expose use-after-unmap + * by SIGSEGV. */ + window_free(w); +#else + LIST_PREPEND(unused, m->unused, w); + if (!m->last_unused) + m->last_unused = w; + + w->in_unused = true; +#endif + } +} + +static void context_attach_window(MMapCache *m, Context *c, Window *w) { + assert(m); + assert(c); + assert(w); + + if (c->window == w) + return; + + context_detach_window(m, c); + + if (w->in_unused) { + /* Used again? */ + if (m->last_unused == w) + m->last_unused = w->unused_prev; + LIST_REMOVE(unused, m->unused, w); + + w->in_unused = false; + } + + c->window = w; + LIST_PREPEND(by_window, w->contexts, c); +} + +static MMapCache *mmap_cache_free(MMapCache *m) { + assert(m); + + for (int i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++) + context_detach_window(m, &m->contexts[i]); + + hashmap_free(m->fds); + + while (m->unused) + window_free(m->unused); + + return mfree(m); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free); + +static int make_room(MMapCache *m) { + assert(m); + + if (!m->last_unused) + return 0; + + window_free(m->last_unused); + return 1; +} + +static int try_context( + MMapFileDescriptor *f, + Context *c, + bool keep_always, + uint64_t offset, + size_t size, + void **ret) { + + assert(f); + assert(f->cache); + assert(f->cache->n_ref > 0); + assert(c); + assert(size > 0); + assert(ret); + + if (!c->window) + return 0; + + if (!window_matches_fd(c->window, f, offset, size)) { + + /* Drop the reference to the window, since it's unnecessary now */ + context_detach_window(f->cache, c); + return 0; + } + + if (c->window->fd->sigbus) + return -EIO; + + c->window->keep_always = c->window->keep_always || keep_always; + + *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset); + f->cache->n_context_cache_hit++; + + return 1; +} + +static int find_mmap( + MMapFileDescriptor *f, + Context *c, + bool keep_always, + uint64_t offset, + size_t size, + void **ret) { + + Window *found = NULL; + + assert(f); + assert(f->cache); + assert(f->cache->n_ref > 0); + assert(c); + assert(size > 0); + + if (f->sigbus) + return -EIO; + + LIST_FOREACH(by_fd, w, f->windows) + if (window_matches(w, offset, size)) { + found = w; + break; + } + + if (!found) + return 0; + + context_attach_window(f->cache, c, found); + found->keep_always = found->keep_always || keep_always; + + *ret = (uint8_t*) found->ptr + (offset - found->offset); + f->cache->n_window_list_hit++; + + return 1; +} + +static int mmap_try_harder(MMapFileDescriptor *f, void *addr, int flags, uint64_t offset, size_t size, void **res) { + void *ptr; + + assert(f); + assert(res); + + for (;;) { + int r; + + ptr = mmap(addr, size, f->prot, flags, f->fd, offset); + if (ptr != MAP_FAILED) + break; + if (errno != ENOMEM) + return negative_errno(); + + r = make_room(f->cache); + if (r < 0) + return r; + if (r == 0) + return -ENOMEM; + } + + *res = ptr; + return 0; +} + +static int add_mmap( + MMapFileDescriptor *f, + Context *c, + bool keep_always, + uint64_t offset, + size_t size, + struct stat *st, + void **ret) { + + uint64_t woffset, wsize; + Window *w; + void *d; + int r; + + assert(f); + assert(f->cache); + assert(f->cache->n_ref > 0); + assert(c); + assert(size > 0); + assert(ret); + + woffset = offset & ~((uint64_t) page_size() - 1ULL); + wsize = size + (offset - woffset); + wsize = PAGE_ALIGN(wsize); + + if (wsize < WINDOW_SIZE) { + uint64_t delta; + + delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2); + + if (delta > offset) + woffset = 0; + else + woffset -= delta; + + wsize = WINDOW_SIZE; + } + + if (st) { + /* Memory maps that are larger then the files + underneath have undefined behavior. Hence, clamp + things to the file size if we know it */ + + if (woffset >= (uint64_t) st->st_size) + return -EADDRNOTAVAIL; + + if (woffset + wsize > (uint64_t) st->st_size) + wsize = PAGE_ALIGN(st->st_size - woffset); + } + + r = mmap_try_harder(f, NULL, MAP_SHARED, woffset, wsize, &d); + if (r < 0) + return r; + + w = window_add(f->cache, f, keep_always, woffset, wsize, d); + if (!w) + goto outofmem; + + context_attach_window(f->cache, c, w); + + *ret = (uint8_t*) w->ptr + (offset - w->offset); + + return 1; + +outofmem: + (void) munmap(d, wsize); + return -ENOMEM; +} + +int mmap_cache_fd_get( + MMapFileDescriptor *f, + unsigned context, + bool keep_always, + uint64_t offset, + size_t size, + struct stat *st, + void **ret) { + + Context *c; + int r; + + assert(f); + assert(f->cache); + assert(f->cache->n_ref > 0); + assert(size > 0); + assert(ret); + assert(context < MMAP_CACHE_MAX_CONTEXTS); + + c = &f->cache->contexts[context]; + + /* Check whether the current context is the right one already */ + r = try_context(f, c, keep_always, offset, size, ret); + if (r != 0) + return r; + + /* Search for a matching mmap */ + r = find_mmap(f, c, keep_always, offset, size, ret); + if (r != 0) + return r; + + f->cache->n_missed++; + + /* Create a new mmap */ + return add_mmap(f, c, keep_always, offset, size, st, ret); +} + +void mmap_cache_stats_log_debug(MMapCache *m) { + assert(m); + + log_debug("mmap cache statistics: %u context cache hit, %u window list hit, %u miss", m->n_context_cache_hit, m->n_window_list_hit, m->n_missed); +} + +static void mmap_cache_process_sigbus(MMapCache *m) { + bool found = false; + MMapFileDescriptor *f; + int r; + + assert(m); + + /* Iterate through all triggered pages and mark their files as + * invalidated */ + for (;;) { + bool ours; + void *addr; + + r = sigbus_pop(&addr); + if (_likely_(r == 0)) + break; + if (r < 0) { + log_error_errno(r, "SIGBUS handling failed: %m"); + abort(); + } + + ours = false; + HASHMAP_FOREACH(f, m->fds) { + LIST_FOREACH(by_fd, w, f->windows) { + if ((uint8_t*) addr >= (uint8_t*) w->ptr && + (uint8_t*) addr < (uint8_t*) w->ptr + w->size) { + found = ours = f->sigbus = true; + break; + } + } + + if (ours) + break; + } + + /* Didn't find a matching window, give up */ + if (!ours) { + log_error("Unknown SIGBUS page, aborting."); + abort(); + } + } + + /* The list of triggered pages is now empty. Now, let's remap + * all windows of the triggered file to anonymous maps, so + * that no page of the file in question is triggered again, so + * that we can be sure not to hit the queue size limit. */ + if (_likely_(!found)) + return; + + HASHMAP_FOREACH(f, m->fds) { + if (!f->sigbus) + continue; + + LIST_FOREACH(by_fd, w, f->windows) + window_invalidate(w); + } +} + +bool mmap_cache_fd_got_sigbus(MMapFileDescriptor *f) { + assert(f); + + mmap_cache_process_sigbus(f->cache); + + return f->sigbus; +} + +MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd, int prot) { + MMapFileDescriptor *f; + int r; + + assert(m); + assert(fd >= 0); + + f = hashmap_get(m->fds, FD_TO_PTR(fd)); + if (f) + return f; + + r = hashmap_ensure_allocated(&m->fds, NULL); + if (r < 0) + return NULL; + + f = new0(MMapFileDescriptor, 1); + if (!f) + return NULL; + + r = hashmap_put(m->fds, FD_TO_PTR(fd), f); + if (r < 0) + return mfree(f); + + f->cache = mmap_cache_ref(m); + f->fd = fd; + f->prot = prot; + + return f; +} + +void mmap_cache_fd_free(MMapFileDescriptor *f) { + assert(f); + assert(f->cache); + + /* Make sure that any queued SIGBUS are first dispatched, so + * that we don't end up with a SIGBUS entry we cannot relate + * to any existing memory map */ + + mmap_cache_process_sigbus(f->cache); + + while (f->windows) + window_free(f->windows); + + if (f->cache) { + assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd))); + f->cache = mmap_cache_unref(f->cache); + } + + free(f); +} + +MMapCache* mmap_cache_fd_cache(MMapFileDescriptor *f) { + assert(f); + + return f->cache; +} |