diff options
Diffstat (limited to 'src/lib/file-cache.c')
-rw-r--r-- | src/lib/file-cache.c | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/src/lib/file-cache.c b/src/lib/file-cache.c new file mode 100644 index 0000000..008021e --- /dev/null +++ b/src/lib/file-cache.c @@ -0,0 +1,336 @@ +/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "mmap-util.h" +#include "file-cache.h" + +#include <sys/stat.h> + +struct file_cache { + int fd; + char *path; + buffer_t *page_bitmask; + + void *mmap_base; + size_t mmap_length; + size_t read_highwater; +}; + +struct file_cache *file_cache_new(int fd) +{ + return file_cache_new_path(fd, ""); +} + +struct file_cache *file_cache_new_path(int fd, const char *path) +{ + struct file_cache *cache; + + cache = i_new(struct file_cache, 1); + cache->fd = fd; + cache->path = i_strdup(path); + cache->page_bitmask = buffer_create_dynamic(default_pool, 128); + return cache; +} + +void file_cache_free(struct file_cache **_cache) +{ + struct file_cache *cache = *_cache; + + *_cache = NULL; + + if (cache->mmap_base != NULL) { + if (munmap_anon(cache->mmap_base, cache->mmap_length) < 0) + i_error("munmap_anon(%s) failed: %m", cache->path); + } + buffer_free(&cache->page_bitmask); + i_free(cache->path); + i_free(cache); +} + +void file_cache_set_fd(struct file_cache *cache, int fd) +{ + cache->fd = fd; + file_cache_invalidate(cache, 0, cache->mmap_length); +} + +int file_cache_set_size(struct file_cache *cache, uoff_t size) +{ + size_t page_size = mmap_get_page_size(); + uoff_t diff; + void *new_base; + + i_assert(page_size > 0); + + diff = size % page_size; + if (diff != 0) + size += page_size - diff; + + i_assert((size % page_size) == 0); + if (size <= cache->mmap_length) + return 0; + + if (size > SIZE_MAX) { + i_error("file_cache_set_size(%s, %"PRIuUOFF_T"): size too large", + cache->path, size); + return -1; + } + + /* grow mmaping */ + if (cache->mmap_base == NULL) { + cache->mmap_base = mmap_anon(size); + if (cache->mmap_base == MAP_FAILED) { + i_error("mmap_anon(%s, %"PRIuUOFF_T") failed: %m", + cache->path, size); + cache->mmap_base = NULL; + cache->mmap_length = 0; + return -1; + } + } else { + new_base = mremap_anon(cache->mmap_base, cache->mmap_length, + size, MREMAP_MAYMOVE); + if (new_base == MAP_FAILED) { + i_error("mremap_anon(%s, %"PRIuUOFF_T") failed: %m", + cache->path, size); + return -1; + } + + cache->mmap_base = new_base; + } + cache->mmap_length = size; + return 0; +} + +ssize_t file_cache_read(struct file_cache *cache, uoff_t offset, size_t size) +{ + size_t page_size = mmap_get_page_size(); + size_t poffset, psize, dest_offset, dest_size; + unsigned char *bits, *dest; + ssize_t ret; + + i_assert(page_size > 0); + + if (size > SSIZE_T_MAX) { + /* make sure our calculations won't overflow. most likely + we'll be reading less data, but allow it anyway so caller + doesn't have to deal with any extra checks. */ + size = SSIZE_T_MAX; + } + if (offset >= UOFF_T_MAX - size) + size = UOFF_T_MAX - offset; + + if (offset + size > cache->mmap_length && + offset + size - cache->mmap_length > 1024*1024) { + /* growing more than a megabyte, make sure that the + file is large enough so we don't allocate memory + more than needed */ + struct stat st; + + if (fstat(cache->fd, &st) < 0) { + if (errno != ESTALE) + i_error("fstat(%s) failed: %m", cache->path); + return -1; + } + + if (offset + size > (uoff_t)st.st_size) { + if (offset >= (uoff_t)st.st_size) + return 0; + size = (uoff_t)st.st_size - offset; + } + } + + if (file_cache_set_size(cache, offset + size) < 0) + return -1; + + poffset = offset / page_size; + psize = (offset + size + page_size-1) / page_size - poffset; + i_assert(psize > 0); + + bits = buffer_get_space_unsafe(cache->page_bitmask, 0, + (poffset + psize + CHAR_BIT - 1) / + CHAR_BIT); + + dest_offset = poffset * page_size; + dest = PTR_OFFSET(cache->mmap_base, dest_offset); + dest_size = page_size; + + while (psize > 0) { + if ((bits[poffset / CHAR_BIT] & (1 << (poffset % CHAR_BIT))) != 0) { + /* page is already in cache */ + dest_offset += page_size; + if (dest_offset <= cache->read_highwater) { + psize--; poffset++; + dest += page_size; + continue; + } + + /* this is the last partially cached block. + use the caching only if we don't want to + read past read_highwater */ + if (offset + size <= cache->read_highwater) { + i_assert(psize == 1); + break; + } + + /* mark the block noncached again and + read it */ + bits[poffset / CHAR_BIT] &= + ~(1 << (poffset % CHAR_BIT)); + dest_offset -= page_size; + } + + ret = pread(cache->fd, dest, dest_size, dest_offset); + if (ret <= 0) { + if (ret < 0) + return -1; + + /* EOF. mark the last block as cached even if it + isn't completely. read_highwater tells us how far + we've actually made. */ + if (dest_offset == cache->read_highwater) { + i_assert(poffset == + cache->read_highwater / page_size); + bits[poffset / CHAR_BIT] |= + 1 << (poffset % CHAR_BIT); + } + return dest_offset <= offset ? 0 : + dest_offset - offset < size ? + dest_offset - offset : size; + } + + dest += ret; + dest_offset += ret; + + if (cache->read_highwater < dest_offset) { + unsigned int high_poffset = + cache->read_highwater / page_size; + + /* read_highwater needs to be updated. if we didn't + just read that block, we can't trust anymore that + we have it cached */ + bits[high_poffset / CHAR_BIT] &= + ~(1 << (high_poffset % CHAR_BIT)); + cache->read_highwater = dest_offset; + } + + if ((size_t)ret != dest_size) { + /* partial read - probably EOF but make sure. */ + dest_size -= ret; + continue; + } + + bits[poffset / CHAR_BIT] |= 1 << (poffset % CHAR_BIT); + dest_size = page_size; + psize--; poffset++; + } + + return size; +} + +const void *file_cache_get_map(struct file_cache *cache, size_t *size_r) +{ + *size_r = cache->read_highwater; + return cache->mmap_base; +} + +void file_cache_write(struct file_cache *cache, const void *data, size_t size, + uoff_t offset) +{ + size_t page_size = mmap_get_page_size(); + unsigned char *bits; + unsigned int first_page, last_page; + + i_assert(page_size > 0); + i_assert(UOFF_T_MAX - offset > size); + + if (file_cache_set_size(cache, offset + size) < 0) { + /* couldn't grow mapping. just make sure the written memory + area is invalidated then. */ + file_cache_invalidate(cache, offset, size); + return; + } + + memcpy(PTR_OFFSET(cache->mmap_base, offset), data, size); + + if (cache->read_highwater < offset + size) { + unsigned int page = cache->read_highwater / page_size; + + bits = buffer_get_space_unsafe(cache->page_bitmask, + page / CHAR_BIT, 1); + *bits &= ~(1 << (page % CHAR_BIT)); + cache->read_highwater = offset + size; + } + + /* mark fully written pages cached */ + if (size >= page_size) { + first_page = offset / page_size; + last_page = (offset + size) / page_size; + if ((offset % page_size) != 0) + first_page++; + + bits = buffer_get_space_unsafe(cache->page_bitmask, 0, + last_page / CHAR_BIT + 1); + for (; first_page < last_page; first_page++) { + bits[first_page / CHAR_BIT] |= + 1 << (first_page % CHAR_BIT); + } + } +} + +void file_cache_invalidate(struct file_cache *cache, uoff_t offset, uoff_t size) +{ + size_t page_size = mmap_get_page_size(); + unsigned char *bits, mask; + unsigned int i; + + if (offset >= cache->read_highwater || size == 0) + return; + + i_assert(page_size > 0); + + if (size > cache->read_highwater - offset) { + /* ignore anything after read highwater */ + size = cache->read_highwater - offset; + } + if (size >= cache->read_highwater) { + /* we're invalidating everything up to read highwater. + drop the highwater position. */ + cache->read_highwater = offset & ~(page_size-1); + } + + size = (offset + size + page_size-1) / page_size; + offset /= page_size; + i_assert(size > offset); + size -= offset; + + if (size != 1) { + /* tell operating system that we don't need the memory anymore + and it may free it. don't bother to do it for single pages, + there's a good chance that they get re-read back + immediately. */ + (void)madvise(PTR_OFFSET(cache->mmap_base, offset * page_size), + size * page_size, MADV_DONTNEED); + } + + bits = buffer_get_space_unsafe(cache->page_bitmask, offset / CHAR_BIT, + 1 + (size + CHAR_BIT - 1) / CHAR_BIT); + + /* set the first byte */ + for (i = offset % CHAR_BIT, mask = 0; i < CHAR_BIT && size > 0; i++) { + mask |= 1 << i; + size--; + } + *bits++ &= ~mask; + + /* set the middle bytes */ + memset(bits, 0, size / CHAR_BIT); + bits += size / CHAR_BIT; + size %= CHAR_BIT; + + /* set the last byte */ + if (size > 0) { + for (i = 0, mask = 0; i < size; i++) + mask |= 1 << i; + *bits &= ~mask; + } +} |