From 2c3c1048746a4622d8c89a29670120dc8fab93c4 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:49:45 +0200 Subject: Adding upstream version 6.1.76. Signed-off-by: Daniel Baumann --- fs/cachefiles/Kconfig | 40 ++ fs/cachefiles/Makefile | 21 ++ fs/cachefiles/cache.c | 383 +++++++++++++++++++ fs/cachefiles/daemon.c | 814 ++++++++++++++++++++++++++++++++++++++++ fs/cachefiles/error_inject.c | 46 +++ fs/cachefiles/interface.c | 447 ++++++++++++++++++++++ fs/cachefiles/internal.h | 477 ++++++++++++++++++++++++ fs/cachefiles/io.c | 651 ++++++++++++++++++++++++++++++++ fs/cachefiles/key.c | 138 +++++++ fs/cachefiles/main.c | 93 +++++ fs/cachefiles/namei.c | 862 +++++++++++++++++++++++++++++++++++++++++++ fs/cachefiles/ondemand.c | 514 ++++++++++++++++++++++++++ fs/cachefiles/security.c | 112 ++++++ fs/cachefiles/volume.c | 139 +++++++ fs/cachefiles/xattr.c | 276 ++++++++++++++ 15 files changed, 5013 insertions(+) create mode 100644 fs/cachefiles/Kconfig create mode 100644 fs/cachefiles/Makefile create mode 100644 fs/cachefiles/cache.c create mode 100644 fs/cachefiles/daemon.c create mode 100644 fs/cachefiles/error_inject.c create mode 100644 fs/cachefiles/interface.c create mode 100644 fs/cachefiles/internal.h create mode 100644 fs/cachefiles/io.c create mode 100644 fs/cachefiles/key.c create mode 100644 fs/cachefiles/main.c create mode 100644 fs/cachefiles/namei.c create mode 100644 fs/cachefiles/ondemand.c create mode 100644 fs/cachefiles/security.c create mode 100644 fs/cachefiles/volume.c create mode 100644 fs/cachefiles/xattr.c (limited to 'fs/cachefiles') diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig new file mode 100644 index 000000000..8df715640 --- /dev/null +++ b/fs/cachefiles/Kconfig @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config CACHEFILES + tristate "Filesystem caching on files" + depends on FSCACHE && BLOCK + help + This permits use of a mounted filesystem as a cache for other + filesystems - primarily networking filesystems - thus allowing fast + local disk to enhance the speed of slower devices. + + See Documentation/filesystems/caching/cachefiles.rst for more + information. + +config CACHEFILES_DEBUG + bool "Debug CacheFiles" + depends on CACHEFILES + help + This permits debugging to be dynamically enabled in the filesystem + caching on files module. If this is set, the debugging output may be + enabled by setting bits in /sys/modules/cachefiles/parameter/debug or + by including a debugging specifier in /etc/cachefilesd.conf. + +config CACHEFILES_ERROR_INJECTION + bool "Provide error injection for cachefiles" + depends on CACHEFILES && SYSCTL + help + This permits error injection to be enabled in cachefiles whilst a + cache is in service. + +config CACHEFILES_ONDEMAND + bool "Support for on-demand read" + depends on CACHEFILES + default n + help + This permits userspace to enable the cachefiles on-demand read mode. + In this mode, when a cache miss occurs, responsibility for fetching + the data lies with the cachefiles backend instead of with the netfs + and is delegated to userspace. + + If unsure, say N. diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile new file mode 100644 index 000000000..c37a7a9af --- /dev/null +++ b/fs/cachefiles/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for caching in a mounted filesystem +# + +cachefiles-y := \ + cache.o \ + daemon.o \ + interface.o \ + io.o \ + key.o \ + main.o \ + namei.o \ + security.o \ + volume.o \ + xattr.o + +cachefiles-$(CONFIG_CACHEFILES_ERROR_INJECTION) += error_inject.o +cachefiles-$(CONFIG_CACHEFILES_ONDEMAND) += ondemand.o + +obj-$(CONFIG_CACHEFILES) := cachefiles.o diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c new file mode 100644 index 000000000..7077f72e6 --- /dev/null +++ b/fs/cachefiles/cache.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Manage high-level VFS aspects of a cache. + * + * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include "internal.h" + +/* + * Bring a cache online. + */ +int cachefiles_add_cache(struct cachefiles_cache *cache) +{ + struct fscache_cache *cache_cookie; + struct path path; + struct kstatfs stats; + struct dentry *graveyard, *cachedir, *root; + const struct cred *saved_cred; + int ret; + + _enter(""); + + cache_cookie = fscache_acquire_cache(cache->tag); + if (IS_ERR(cache_cookie)) + return PTR_ERR(cache_cookie); + + /* we want to work under the module's security ID */ + ret = cachefiles_get_security_ID(cache); + if (ret < 0) + goto error_getsec; + + cachefiles_begin_secure(cache, &saved_cred); + + /* look up the directory at the root of the cache */ + ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path); + if (ret < 0) + goto error_open_root; + + cache->mnt = path.mnt; + root = path.dentry; + + ret = -EINVAL; + if (is_idmapped_mnt(path.mnt)) { + pr_warn("File cache on idmapped mounts not supported"); + goto error_unsupported; + } + + /* Check features of the backing filesystem: + * - Directories must support looking up and directory creation + * - We create tmpfiles to handle invalidation + * - We use xattrs to store metadata + * - We need to be able to query the amount of space available + * - We want to be able to sync the filesystem when stopping the cache + * - We use DIO to/from pages, so the blocksize mustn't be too big. + */ + ret = -EOPNOTSUPP; + if (d_is_negative(root) || + !d_backing_inode(root)->i_op->lookup || + !d_backing_inode(root)->i_op->mkdir || + !d_backing_inode(root)->i_op->tmpfile || + !(d_backing_inode(root)->i_opflags & IOP_XATTR) || + !root->d_sb->s_op->statfs || + !root->d_sb->s_op->sync_fs || + root->d_sb->s_blocksize > PAGE_SIZE) + goto error_unsupported; + + ret = -EROFS; + if (sb_rdonly(root->d_sb)) + goto error_unsupported; + + /* determine the security of the on-disk cache as this governs + * security ID of files we create */ + ret = cachefiles_determine_cache_security(cache, root, &saved_cred); + if (ret < 0) + goto error_unsupported; + + /* get the cache size and blocksize */ + ret = vfs_statfs(&path, &stats); + if (ret < 0) + goto error_unsupported; + + ret = -ERANGE; + if (stats.f_bsize <= 0) + goto error_unsupported; + + ret = -EOPNOTSUPP; + if (stats.f_bsize > PAGE_SIZE) + goto error_unsupported; + + cache->bsize = stats.f_bsize; + cache->bshift = ilog2(stats.f_bsize); + + _debug("blksize %u (shift %u)", + cache->bsize, cache->bshift); + + _debug("size %llu, avail %llu", + (unsigned long long) stats.f_blocks, + (unsigned long long) stats.f_bavail); + + /* set up caching limits */ + do_div(stats.f_files, 100); + cache->fstop = stats.f_files * cache->fstop_percent; + cache->fcull = stats.f_files * cache->fcull_percent; + cache->frun = stats.f_files * cache->frun_percent; + + _debug("limits {%llu,%llu,%llu} files", + (unsigned long long) cache->frun, + (unsigned long long) cache->fcull, + (unsigned long long) cache->fstop); + + do_div(stats.f_blocks, 100); + cache->bstop = stats.f_blocks * cache->bstop_percent; + cache->bcull = stats.f_blocks * cache->bcull_percent; + cache->brun = stats.f_blocks * cache->brun_percent; + + _debug("limits {%llu,%llu,%llu} blocks", + (unsigned long long) cache->brun, + (unsigned long long) cache->bcull, + (unsigned long long) cache->bstop); + + /* get the cache directory and check its type */ + cachedir = cachefiles_get_directory(cache, root, "cache", NULL); + if (IS_ERR(cachedir)) { + ret = PTR_ERR(cachedir); + goto error_unsupported; + } + + cache->store = cachedir; + + /* get the graveyard directory */ + graveyard = cachefiles_get_directory(cache, root, "graveyard", NULL); + if (IS_ERR(graveyard)) { + ret = PTR_ERR(graveyard); + goto error_unsupported; + } + + cache->graveyard = graveyard; + cache->cache = cache_cookie; + + ret = fscache_add_cache(cache_cookie, &cachefiles_cache_ops, cache); + if (ret < 0) + goto error_add_cache; + + /* done */ + set_bit(CACHEFILES_READY, &cache->flags); + dput(root); + + pr_info("File cache on %s registered\n", cache_cookie->name); + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check); + cachefiles_end_secure(cache, saved_cred); + _leave(" = 0 [%px]", cache->cache); + return 0; + +error_add_cache: + cachefiles_put_directory(cache->graveyard); + cache->graveyard = NULL; +error_unsupported: + cachefiles_put_directory(cache->store); + cache->store = NULL; + mntput(cache->mnt); + cache->mnt = NULL; + dput(root); +error_open_root: + cachefiles_end_secure(cache, saved_cred); +error_getsec: + fscache_relinquish_cache(cache_cookie); + cache->cache = NULL; + pr_err("Failed to register: %d\n", ret); + return ret; +} + +/* + * See if we have space for a number of pages and/or a number of files in the + * cache + */ +int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr, + enum cachefiles_has_space_for reason) +{ + struct kstatfs stats; + u64 b_avail, b_writing; + int ret; + + struct path path = { + .mnt = cache->mnt, + .dentry = cache->mnt->mnt_root, + }; + + //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", + // (unsigned long long) cache->frun, + // (unsigned long long) cache->fcull, + // (unsigned long long) cache->fstop, + // (unsigned long long) cache->brun, + // (unsigned long long) cache->bcull, + // (unsigned long long) cache->bstop, + // fnr, bnr); + + /* find out how many pages of blockdev are available */ + memset(&stats, 0, sizeof(stats)); + + ret = vfs_statfs(&path, &stats); + if (ret < 0) { + trace_cachefiles_vfs_error(NULL, d_inode(path.dentry), ret, + cachefiles_trace_statfs_error); + if (ret == -EIO) + cachefiles_io_error(cache, "statfs failed"); + _leave(" = %d", ret); + return ret; + } + + b_avail = stats.f_bavail; + b_writing = atomic_long_read(&cache->b_writing); + if (b_avail > b_writing) + b_avail -= b_writing; + else + b_avail = 0; + + //_debug("avail %llu,%llu", + // (unsigned long long)stats.f_ffree, + // (unsigned long long)b_avail); + + /* see if there is sufficient space */ + if (stats.f_ffree > fnr) + stats.f_ffree -= fnr; + else + stats.f_ffree = 0; + + if (b_avail > bnr) + b_avail -= bnr; + else + b_avail = 0; + + ret = -ENOBUFS; + if (stats.f_ffree < cache->fstop || + b_avail < cache->bstop) + goto stop_and_begin_cull; + + ret = 0; + if (stats.f_ffree < cache->fcull || + b_avail < cache->bcull) + goto begin_cull; + + if (test_bit(CACHEFILES_CULLING, &cache->flags) && + stats.f_ffree >= cache->frun && + b_avail >= cache->brun && + test_and_clear_bit(CACHEFILES_CULLING, &cache->flags) + ) { + _debug("cease culling"); + cachefiles_state_changed(cache); + } + + //_leave(" = 0"); + return 0; + +stop_and_begin_cull: + switch (reason) { + case cachefiles_has_space_for_write: + fscache_count_no_write_space(); + break; + case cachefiles_has_space_for_create: + fscache_count_no_create_space(); + break; + default: + break; + } +begin_cull: + if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { + _debug("### CULL CACHE ###"); + cachefiles_state_changed(cache); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Mark all the objects as being out of service and queue them all for cleanup. + */ +static void cachefiles_withdraw_objects(struct cachefiles_cache *cache) +{ + struct cachefiles_object *object; + unsigned int count = 0; + + _enter(""); + + spin_lock(&cache->object_list_lock); + + while (!list_empty(&cache->object_list)) { + object = list_first_entry(&cache->object_list, + struct cachefiles_object, cache_link); + cachefiles_see_object(object, cachefiles_obj_see_withdrawal); + list_del_init(&object->cache_link); + fscache_withdraw_cookie(object->cookie); + count++; + if ((count & 63) == 0) { + spin_unlock(&cache->object_list_lock); + cond_resched(); + spin_lock(&cache->object_list_lock); + } + } + + spin_unlock(&cache->object_list_lock); + _leave(" [%u objs]", count); +} + +/* + * Withdraw volumes. + */ +static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) +{ + _enter(""); + + for (;;) { + struct cachefiles_volume *volume = NULL; + + spin_lock(&cache->object_list_lock); + if (!list_empty(&cache->volumes)) { + volume = list_first_entry(&cache->volumes, + struct cachefiles_volume, cache_link); + list_del_init(&volume->cache_link); + } + spin_unlock(&cache->object_list_lock); + if (!volume) + break; + + cachefiles_withdraw_volume(volume); + } + + _leave(""); +} + +/* + * Sync a cache to backing disk. + */ +static void cachefiles_sync_cache(struct cachefiles_cache *cache) +{ + const struct cred *saved_cred; + int ret; + + _enter("%s", cache->cache->name); + + /* make sure all pages pinned by operations on behalf of the netfs are + * written to disc */ + cachefiles_begin_secure(cache, &saved_cred); + down_read(&cache->mnt->mnt_sb->s_umount); + ret = sync_filesystem(cache->mnt->mnt_sb); + up_read(&cache->mnt->mnt_sb->s_umount); + cachefiles_end_secure(cache, saved_cred); + + if (ret == -EIO) + cachefiles_io_error(cache, + "Attempt to sync backing fs superblock returned error %d", + ret); +} + +/* + * Withdraw cache objects. + */ +void cachefiles_withdraw_cache(struct cachefiles_cache *cache) +{ + struct fscache_cache *fscache = cache->cache; + + pr_info("File cache on %s unregistering\n", fscache->name); + + fscache_withdraw_cache(fscache); + + /* we now have to destroy all the active objects pertaining to this + * cache - which we do by passing them off to thread pool to be + * disposed of */ + cachefiles_withdraw_objects(cache); + fscache_wait_for_objects(fscache); + + cachefiles_withdraw_volumes(cache); + cachefiles_sync_cache(cache); + cache->cache = NULL; + fscache_relinquish_cache(fscache); +} diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c new file mode 100644 index 000000000..aa4efcabb --- /dev/null +++ b/fs/cachefiles/daemon.c @@ -0,0 +1,814 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Daemon interface + * + * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static int cachefiles_daemon_open(struct inode *, struct file *); +static int cachefiles_daemon_release(struct inode *, struct file *); +static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t, + loff_t *); +static ssize_t cachefiles_daemon_write(struct file *, const char __user *, + size_t, loff_t *); +static __poll_t cachefiles_daemon_poll(struct file *, + struct poll_table_struct *); +static int cachefiles_daemon_frun(struct cachefiles_cache *, char *); +static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *); +static int cachefiles_daemon_fstop(struct cachefiles_cache *, char *); +static int cachefiles_daemon_brun(struct cachefiles_cache *, char *); +static int cachefiles_daemon_bcull(struct cachefiles_cache *, char *); +static int cachefiles_daemon_bstop(struct cachefiles_cache *, char *); +static int cachefiles_daemon_cull(struct cachefiles_cache *, char *); +static int cachefiles_daemon_debug(struct cachefiles_cache *, char *); +static int cachefiles_daemon_dir(struct cachefiles_cache *, char *); +static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *); +static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *); +static int cachefiles_daemon_tag(struct cachefiles_cache *, char *); +static int cachefiles_daemon_bind(struct cachefiles_cache *, char *); +static void cachefiles_daemon_unbind(struct cachefiles_cache *); + +static unsigned long cachefiles_open; + +const struct file_operations cachefiles_daemon_fops = { + .owner = THIS_MODULE, + .open = cachefiles_daemon_open, + .release = cachefiles_daemon_release, + .read = cachefiles_daemon_read, + .write = cachefiles_daemon_write, + .poll = cachefiles_daemon_poll, + .llseek = noop_llseek, +}; + +struct cachefiles_daemon_cmd { + char name[8]; + int (*handler)(struct cachefiles_cache *cache, char *args); +}; + +static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { + { "bind", cachefiles_daemon_bind }, + { "brun", cachefiles_daemon_brun }, + { "bcull", cachefiles_daemon_bcull }, + { "bstop", cachefiles_daemon_bstop }, + { "cull", cachefiles_daemon_cull }, + { "debug", cachefiles_daemon_debug }, + { "dir", cachefiles_daemon_dir }, + { "frun", cachefiles_daemon_frun }, + { "fcull", cachefiles_daemon_fcull }, + { "fstop", cachefiles_daemon_fstop }, + { "inuse", cachefiles_daemon_inuse }, + { "secctx", cachefiles_daemon_secctx }, + { "tag", cachefiles_daemon_tag }, +#ifdef CONFIG_CACHEFILES_ONDEMAND + { "copen", cachefiles_ondemand_copen }, +#endif + { "", NULL } +}; + + +/* + * Prepare a cache for caching. + */ +static int cachefiles_daemon_open(struct inode *inode, struct file *file) +{ + struct cachefiles_cache *cache; + + _enter(""); + + /* only the superuser may do this */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* the cachefiles device may only be open once at a time */ + if (xchg(&cachefiles_open, 1) == 1) + return -EBUSY; + + /* allocate a cache record */ + cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL); + if (!cache) { + cachefiles_open = 0; + return -ENOMEM; + } + + mutex_init(&cache->daemon_mutex); + init_waitqueue_head(&cache->daemon_pollwq); + INIT_LIST_HEAD(&cache->volumes); + INIT_LIST_HEAD(&cache->object_list); + spin_lock_init(&cache->object_list_lock); + refcount_set(&cache->unbind_pincount, 1); + xa_init_flags(&cache->reqs, XA_FLAGS_ALLOC); + xa_init_flags(&cache->ondemand_ids, XA_FLAGS_ALLOC1); + + /* set default caching limits + * - limit at 1% free space and/or free files + * - cull below 5% free space and/or free files + * - cease culling above 7% free space and/or free files + */ + cache->frun_percent = 7; + cache->fcull_percent = 5; + cache->fstop_percent = 1; + cache->brun_percent = 7; + cache->bcull_percent = 5; + cache->bstop_percent = 1; + + file->private_data = cache; + cache->cachefilesd = file; + return 0; +} + +static void cachefiles_flush_reqs(struct cachefiles_cache *cache) +{ + struct xarray *xa = &cache->reqs; + struct cachefiles_req *req; + unsigned long index; + + /* + * Make sure the following two operations won't be reordered. + * 1) set CACHEFILES_DEAD bit + * 2) flush requests in the xarray + * Otherwise the request may be enqueued after xarray has been + * flushed, leaving the orphan request never being completed. + * + * CPU 1 CPU 2 + * ===== ===== + * flush requests in the xarray + * test CACHEFILES_DEAD bit + * enqueue the request + * set CACHEFILES_DEAD bit + */ + smp_mb(); + + xa_lock(xa); + xa_for_each(xa, index, req) { + req->error = -EIO; + complete(&req->done); + } + xa_unlock(xa); + + xa_destroy(&cache->reqs); + xa_destroy(&cache->ondemand_ids); +} + +void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache) +{ + if (refcount_dec_and_test(&cache->unbind_pincount)) { + cachefiles_daemon_unbind(cache); + cachefiles_open = 0; + kfree(cache); + } +} + +void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache) +{ + refcount_inc(&cache->unbind_pincount); +} + +/* + * Release a cache. + */ +static int cachefiles_daemon_release(struct inode *inode, struct file *file) +{ + struct cachefiles_cache *cache = file->private_data; + + _enter(""); + + ASSERT(cache); + + set_bit(CACHEFILES_DEAD, &cache->flags); + + if (cachefiles_in_ondemand_mode(cache)) + cachefiles_flush_reqs(cache); + + /* clean up the control file interface */ + cache->cachefilesd = NULL; + file->private_data = NULL; + + cachefiles_put_unbind_pincount(cache); + + _leave(""); + return 0; +} + +static ssize_t cachefiles_do_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) +{ + unsigned long long b_released; + unsigned f_released; + char buffer[256]; + int n; + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check); + + /* summarise */ + f_released = atomic_xchg(&cache->f_released, 0); + b_released = atomic_long_xchg(&cache->b_released, 0); + clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags); + + n = snprintf(buffer, sizeof(buffer), + "cull=%c" + " frun=%llx" + " fcull=%llx" + " fstop=%llx" + " brun=%llx" + " bcull=%llx" + " bstop=%llx" + " freleased=%x" + " breleased=%llx", + test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0', + (unsigned long long) cache->frun, + (unsigned long long) cache->fcull, + (unsigned long long) cache->fstop, + (unsigned long long) cache->brun, + (unsigned long long) cache->bcull, + (unsigned long long) cache->bstop, + f_released, + b_released); + + if (n > buflen) + return -EMSGSIZE; + + if (copy_to_user(_buffer, buffer, n) != 0) + return -EFAULT; + + return n; +} + +/* + * Read the cache state. + */ +static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, + size_t buflen, loff_t *pos) +{ + struct cachefiles_cache *cache = file->private_data; + + //_enter(",,%zu,", buflen); + + if (!test_bit(CACHEFILES_READY, &cache->flags)) + return 0; + + if (cachefiles_in_ondemand_mode(cache)) + return cachefiles_ondemand_daemon_read(cache, _buffer, buflen); + else + return cachefiles_do_daemon_read(cache, _buffer, buflen); +} + +/* + * Take a command from cachefilesd, parse it and act on it. + */ +static ssize_t cachefiles_daemon_write(struct file *file, + const char __user *_data, + size_t datalen, + loff_t *pos) +{ + const struct cachefiles_daemon_cmd *cmd; + struct cachefiles_cache *cache = file->private_data; + ssize_t ret; + char *data, *args, *cp; + + //_enter(",,%zu,", datalen); + + ASSERT(cache); + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) + return -EIO; + + if (datalen > PAGE_SIZE - 1) + return -EOPNOTSUPP; + + /* drag the command string into the kernel so we can parse it */ + data = memdup_user_nul(_data, datalen); + if (IS_ERR(data)) + return PTR_ERR(data); + + ret = -EINVAL; + if (memchr(data, '\0', datalen)) + goto error; + + /* strip any newline */ + cp = memchr(data, '\n', datalen); + if (cp) { + if (cp == data) + goto error; + + *cp = '\0'; + } + + /* parse the command */ + ret = -EOPNOTSUPP; + + for (args = data; *args; args++) + if (isspace(*args)) + break; + if (*args) { + if (args == data) + goto error; + *args = '\0'; + args = skip_spaces(++args); + } + + /* run the appropriate command handler */ + for (cmd = cachefiles_daemon_cmds; cmd->name[0]; cmd++) + if (strcmp(cmd->name, data) == 0) + goto found_command; + +error: + kfree(data); + //_leave(" = %zd", ret); + return ret; + +found_command: + mutex_lock(&cache->daemon_mutex); + + ret = -EIO; + if (!test_bit(CACHEFILES_DEAD, &cache->flags)) + ret = cmd->handler(cache, args); + + mutex_unlock(&cache->daemon_mutex); + + if (ret == 0) + ret = datalen; + goto error; +} + +/* + * Poll for culling state + * - use EPOLLOUT to indicate culling state + */ +static __poll_t cachefiles_daemon_poll(struct file *file, + struct poll_table_struct *poll) +{ + struct cachefiles_cache *cache = file->private_data; + __poll_t mask; + + poll_wait(file, &cache->daemon_pollwq, poll); + mask = 0; + + if (cachefiles_in_ondemand_mode(cache)) { + if (!xa_empty(&cache->reqs)) + mask |= EPOLLIN; + } else { + if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) + mask |= EPOLLIN; + } + + if (test_bit(CACHEFILES_CULLING, &cache->flags)) + mask |= EPOLLOUT; + + return mask; +} + +/* + * Give a range error for cache space constraints + * - can be tail-called + */ +static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, + char *args) +{ + pr_err("Free space limits must be in range 0%%<=stop%" + */ +static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) +{ + unsigned long frun; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + frun = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (frun <= cache->fcull_percent || frun >= 100) + return cachefiles_daemon_range_error(cache, args); + + cache->frun_percent = frun; + return 0; +} + +/* + * Set the percentage of files at which to start culling + * - command: "fcull %" + */ +static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) +{ + unsigned long fcull; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + fcull = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (fcull <= cache->fstop_percent || fcull >= cache->frun_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->fcull_percent = fcull; + return 0; +} + +/* + * Set the percentage of files at which to stop allocating + * - command: "fstop %" + */ +static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) +{ + unsigned long fstop; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + fstop = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (fstop >= cache->fcull_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->fstop_percent = fstop; + return 0; +} + +/* + * Set the percentage of blocks at which to stop culling + * - command: "brun %" + */ +static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) +{ + unsigned long brun; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + brun = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (brun <= cache->bcull_percent || brun >= 100) + return cachefiles_daemon_range_error(cache, args); + + cache->brun_percent = brun; + return 0; +} + +/* + * Set the percentage of blocks at which to start culling + * - command: "bcull %" + */ +static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) +{ + unsigned long bcull; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + bcull = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->bcull_percent = bcull; + return 0; +} + +/* + * Set the percentage of blocks at which to stop allocating + * - command: "bstop %" + */ +static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) +{ + unsigned long bstop; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + bstop = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (bstop >= cache->bcull_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->bstop_percent = bstop; + return 0; +} + +/* + * Set the cache directory + * - command: "dir " + */ +static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) +{ + char *dir; + + _enter(",%s", args); + + if (!*args) { + pr_err("Empty directory specified\n"); + return -EINVAL; + } + + if (cache->rootdirname) { + pr_err("Second cache directory specified\n"); + return -EEXIST; + } + + dir = kstrdup(args, GFP_KERNEL); + if (!dir) + return -ENOMEM; + + cache->rootdirname = dir; + return 0; +} + +/* + * Set the cache security context + * - command: "secctx " + */ +static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) +{ + char *secctx; + + _enter(",%s", args); + + if (!*args) { + pr_err("Empty security context specified\n"); + return -EINVAL; + } + + if (cache->secctx) { + pr_err("Second security context specified\n"); + return -EINVAL; + } + + secctx = kstrdup(args, GFP_KERNEL); + if (!secctx) + return -ENOMEM; + + cache->secctx = secctx; + return 0; +} + +/* + * Set the cache tag + * - command: "tag " + */ +static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) +{ + char *tag; + + _enter(",%s", args); + + if (!*args) { + pr_err("Empty tag specified\n"); + return -EINVAL; + } + + if (cache->tag) + return -EEXIST; + + tag = kstrdup(args, GFP_KERNEL); + if (!tag) + return -ENOMEM; + + cache->tag = tag; + return 0; +} + +/* + * Request a node in the cache be culled from the current working directory + * - command: "cull " + */ +static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) +{ + struct path path; + const struct cred *saved_cred; + int ret; + + _enter(",%s", args); + + if (strchr(args, '/')) + goto inval; + + if (!test_bit(CACHEFILES_READY, &cache->flags)) { + pr_err("cull applied to unready cache\n"); + return -EIO; + } + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + pr_err("cull applied to dead cache\n"); + return -EIO; + } + + get_fs_pwd(current->fs, &path); + + if (!d_can_lookup(path.dentry)) + goto notdir; + + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_cull(cache, path.dentry, args); + cachefiles_end_secure(cache, saved_cred); + + path_put(&path); + _leave(" = %d", ret); + return ret; + +notdir: + path_put(&path); + pr_err("cull command requires dirfd to be a directory\n"); + return -ENOTDIR; + +inval: + pr_err("cull command requires dirfd and filename\n"); + return -EINVAL; +} + +/* + * Set debugging mode + * - command: "debug " + */ +static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) +{ + unsigned long mask; + + _enter(",%s", args); + + mask = simple_strtoul(args, &args, 0); + if (args[0] != '\0') + goto inval; + + cachefiles_debug = mask; + _leave(" = 0"); + return 0; + +inval: + pr_err("debug command requires mask\n"); + return -EINVAL; +} + +/* + * Find out whether an object in the current working directory is in use or not + * - command: "inuse " + */ +static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) +{ + struct path path; + const struct cred *saved_cred; + int ret; + + //_enter(",%s", args); + + if (strchr(args, '/')) + goto inval; + + if (!test_bit(CACHEFILES_READY, &cache->flags)) { + pr_err("inuse applied to unready cache\n"); + return -EIO; + } + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + pr_err("inuse applied to dead cache\n"); + return -EIO; + } + + get_fs_pwd(current->fs, &path); + + if (!d_can_lookup(path.dentry)) + goto notdir; + + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_check_in_use(cache, path.dentry, args); + cachefiles_end_secure(cache, saved_cred); + + path_put(&path); + //_leave(" = %d", ret); + return ret; + +notdir: + path_put(&path); + pr_err("inuse command requires dirfd to be a directory\n"); + return -ENOTDIR; + +inval: + pr_err("inuse command requires dirfd and filename\n"); + return -EINVAL; +} + +/* + * Bind a directory as a cache + */ +static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) +{ + _enter("{%u,%u,%u,%u,%u,%u},%s", + cache->frun_percent, + cache->fcull_percent, + cache->fstop_percent, + cache->brun_percent, + cache->bcull_percent, + cache->bstop_percent, + args); + + if (cache->fstop_percent >= cache->fcull_percent || + cache->fcull_percent >= cache->frun_percent || + cache->frun_percent >= 100) + return -ERANGE; + + if (cache->bstop_percent >= cache->bcull_percent || + cache->bcull_percent >= cache->brun_percent || + cache->brun_percent >= 100) + return -ERANGE; + + if (!cache->rootdirname) { + pr_err("No cache directory specified\n"); + return -EINVAL; + } + + /* Don't permit already bound caches to be re-bound */ + if (test_bit(CACHEFILES_READY, &cache->flags)) { + pr_err("Cache already bound\n"); + return -EBUSY; + } + + if (IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND)) { + if (!strcmp(args, "ondemand")) { + set_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags); + } else if (*args) { + pr_err("Invalid argument to the 'bind' command\n"); + return -EINVAL; + } + } else if (*args) { + pr_err("'bind' command doesn't take an argument\n"); + return -EINVAL; + } + + /* Make sure we have copies of the tag string */ + if (!cache->tag) { + /* + * The tag string is released by the fops->release() + * function, so we don't release it on error here + */ + cache->tag = kstrdup("CacheFiles", GFP_KERNEL); + if (!cache->tag) + return -ENOMEM; + } + + return cachefiles_add_cache(cache); +} + +/* + * Unbind a cache. + */ +static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) +{ + _enter(""); + + if (test_bit(CACHEFILES_READY, &cache->flags)) + cachefiles_withdraw_cache(cache); + + cachefiles_put_directory(cache->graveyard); + cachefiles_put_directory(cache->store); + mntput(cache->mnt); + + kfree(cache->rootdirname); + kfree(cache->secctx); + kfree(cache->tag); + + _leave(""); +} diff --git a/fs/cachefiles/error_inject.c b/fs/cachefiles/error_inject.c new file mode 100644 index 000000000..58f8aec96 --- /dev/null +++ b/fs/cachefiles/error_inject.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Error injection handling. + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include "internal.h" + +unsigned int cachefiles_error_injection_state; + +static struct ctl_table_header *cachefiles_sysctl; +static struct ctl_table cachefiles_sysctls[] = { + { + .procname = "error_injection", + .data = &cachefiles_error_injection_state, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + {} +}; + +static struct ctl_table cachefiles_sysctls_root[] = { + { + .procname = "cachefiles", + .mode = 0555, + .child = cachefiles_sysctls, + }, + {} +}; + +int __init cachefiles_register_error_injection(void) +{ + cachefiles_sysctl = register_sysctl_table(cachefiles_sysctls_root); + if (!cachefiles_sysctl) + return -ENOMEM; + return 0; + +} + +void cachefiles_unregister_error_injection(void) +{ + unregister_sysctl_table(cachefiles_sysctl); +} diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c new file mode 100644 index 000000000..a69073a1d --- /dev/null +++ b/fs/cachefiles/interface.c @@ -0,0 +1,447 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* FS-Cache interface to CacheFiles + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static atomic_t cachefiles_object_debug_id; + +/* + * Allocate a cache object record. + */ +static +struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie) +{ + struct fscache_volume *vcookie = cookie->volume; + struct cachefiles_volume *volume = vcookie->cache_priv; + struct cachefiles_object *object; + + _enter("{%s},%x,", vcookie->key, cookie->debug_id); + + object = kmem_cache_zalloc(cachefiles_object_jar, GFP_KERNEL); + if (!object) + return NULL; + + refcount_set(&object->ref, 1); + + spin_lock_init(&object->lock); + INIT_LIST_HEAD(&object->cache_link); + object->volume = volume; + object->debug_id = atomic_inc_return(&cachefiles_object_debug_id); + object->cookie = fscache_get_cookie(cookie, fscache_cookie_get_attach_object); + + fscache_count_object(vcookie->cache); + trace_cachefiles_ref(object->debug_id, cookie->debug_id, 1, + cachefiles_obj_new); + return object; +} + +/* + * Note that an object has been seen. + */ +void cachefiles_see_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why) +{ + trace_cachefiles_ref(object->debug_id, object->cookie->debug_id, + refcount_read(&object->ref), why); +} + +/* + * Increment the usage count on an object; + */ +struct cachefiles_object *cachefiles_grab_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why) +{ + int r; + + __refcount_inc(&object->ref, &r); + trace_cachefiles_ref(object->debug_id, object->cookie->debug_id, r, why); + return object; +} + +/* + * dispose of a reference to an object + */ +void cachefiles_put_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why) +{ + unsigned int object_debug_id = object->debug_id; + unsigned int cookie_debug_id = object->cookie->debug_id; + struct fscache_cache *cache; + bool done; + int r; + + done = __refcount_dec_and_test(&object->ref, &r); + trace_cachefiles_ref(object_debug_id, cookie_debug_id, r, why); + if (done) { + _debug("- kill object OBJ%x", object_debug_id); + + ASSERTCMP(object->file, ==, NULL); + + kfree(object->d_name); + + cache = object->volume->cache->cache; + fscache_put_cookie(object->cookie, fscache_cookie_put_object); + object->cookie = NULL; + kmem_cache_free(cachefiles_object_jar, object); + fscache_uncount_object(cache); + } + + _leave(""); +} + +/* + * Adjust the size of a cache file if necessary to match the DIO size. We keep + * the EOF marker a multiple of DIO blocks so that we don't fall back to doing + * non-DIO for a partial block straddling the EOF, but we also have to be + * careful of someone expanding the file and accidentally accreting the + * padding. + */ +static int cachefiles_adjust_size(struct cachefiles_object *object) +{ + struct iattr newattrs; + struct file *file = object->file; + uint64_t ni_size; + loff_t oi_size; + int ret; + + ni_size = object->cookie->object_size; + ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE); + + _enter("{OBJ%x},[%llu]", + object->debug_id, (unsigned long long) ni_size); + + if (!file) + return -ENOBUFS; + + oi_size = i_size_read(file_inode(file)); + if (oi_size == ni_size) + return 0; + + inode_lock(file_inode(file)); + + /* if there's an extension to a partial page at the end of the backing + * file, we need to discard the partial page so that we pick up new + * data after it */ + if (oi_size & ~PAGE_MASK && ni_size > oi_size) { + _debug("discard tail %llx", oi_size); + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = oi_size & PAGE_MASK; + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = notify_change(&init_user_ns, file->f_path.dentry, + &newattrs, NULL); + if (ret < 0) + goto truncate_failed; + } + + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = ni_size; + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = notify_change(&init_user_ns, file->f_path.dentry, + &newattrs, NULL); + +truncate_failed: + inode_unlock(file_inode(file)); + + if (ret < 0) + trace_cachefiles_io_error(NULL, file_inode(file), ret, + cachefiles_trace_notify_change_error); + if (ret == -EIO) { + cachefiles_io_error_obj(object, "Size set failed"); + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Attempt to look up the nominated node in this cache + */ +static bool cachefiles_lookup_cookie(struct fscache_cookie *cookie) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache = cookie->volume->cache->cache_priv; + const struct cred *saved_cred; + bool success; + + object = cachefiles_alloc_object(cookie); + if (!object) + goto fail; + + _enter("{OBJ%x}", object->debug_id); + + if (!cachefiles_cook_key(object)) + goto fail_put; + + cookie->cache_priv = object; + + cachefiles_begin_secure(cache, &saved_cred); + + success = cachefiles_look_up_object(object); + if (!success) + goto fail_withdraw; + + cachefiles_see_object(object, cachefiles_obj_see_lookup_cookie); + + spin_lock(&cache->object_list_lock); + list_add(&object->cache_link, &cache->object_list); + spin_unlock(&cache->object_list_lock); + cachefiles_adjust_size(object); + + cachefiles_end_secure(cache, saved_cred); + _leave(" = t"); + return true; + +fail_withdraw: + cachefiles_end_secure(cache, saved_cred); + cachefiles_see_object(object, cachefiles_obj_see_lookup_failed); + fscache_caching_failed(cookie); + _debug("failed c=%08x o=%08x", cookie->debug_id, object->debug_id); + /* The caller holds an access count on the cookie, so we need them to + * drop it before we can withdraw the object. + */ + return false; + +fail_put: + cachefiles_put_object(object, cachefiles_obj_put_alloc_fail); +fail: + return false; +} + +/* + * Shorten the backing object to discard any dirty data and free up + * any unused granules. + */ +static bool cachefiles_shorten_object(struct cachefiles_object *object, + struct file *file, loff_t new_size) +{ + struct cachefiles_cache *cache = object->volume->cache; + struct inode *inode = file_inode(file); + loff_t i_size, dio_size; + int ret; + + dio_size = round_up(new_size, CACHEFILES_DIO_BLOCK_SIZE); + i_size = i_size_read(inode); + + trace_cachefiles_trunc(object, inode, i_size, dio_size, + cachefiles_trunc_shrink); + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = vfs_truncate(&file->f_path, dio_size); + if (ret < 0) { + trace_cachefiles_io_error(object, file_inode(file), ret, + cachefiles_trace_trunc_error); + cachefiles_io_error_obj(object, "Trunc-to-size failed %d", ret); + cachefiles_remove_object_xattr(cache, object, file->f_path.dentry); + return false; + } + + if (new_size < dio_size) { + trace_cachefiles_trunc(object, inode, dio_size, new_size, + cachefiles_trunc_dio_adjust); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_fallocate(file, FALLOC_FL_ZERO_RANGE, + new_size, dio_size - new_size); + if (ret < 0) { + trace_cachefiles_io_error(object, file_inode(file), ret, + cachefiles_trace_fallocate_error); + cachefiles_io_error_obj(object, "Trunc-to-dio-size failed %d", ret); + cachefiles_remove_object_xattr(cache, object, file->f_path.dentry); + return false; + } + } + + return true; +} + +/* + * Resize the backing object. + */ +static void cachefiles_resize_cookie(struct netfs_cache_resources *cres, + loff_t new_size) +{ + struct cachefiles_object *object = cachefiles_cres_object(cres); + struct cachefiles_cache *cache = object->volume->cache; + struct fscache_cookie *cookie = object->cookie; + const struct cred *saved_cred; + struct file *file = cachefiles_cres_file(cres); + loff_t old_size = cookie->object_size; + + _enter("%llu->%llu", old_size, new_size); + + if (new_size < old_size) { + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_shorten_object(object, file, new_size); + cachefiles_end_secure(cache, saved_cred); + object->cookie->object_size = new_size; + return; + } + + /* The file is being expanded. We don't need to do anything + * particularly. cookie->initial_size doesn't change and so the point + * at which we have to download before doesn't change. + */ + cookie->object_size = new_size; +} + +/* + * Commit changes to the object as we drop it. + */ +static void cachefiles_commit_object(struct cachefiles_object *object, + struct cachefiles_cache *cache) +{ + bool update = false; + + if (test_and_clear_bit(FSCACHE_COOKIE_LOCAL_WRITE, &object->cookie->flags)) + update = true; + if (test_and_clear_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags)) + update = true; + if (update) + cachefiles_set_object_xattr(object); + + if (test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) + cachefiles_commit_tmpfile(cache, object); +} + +/* + * Finalise and object and close the VFS structs that we have. + */ +static void cachefiles_clean_up_object(struct cachefiles_object *object, + struct cachefiles_cache *cache) +{ + if (test_bit(FSCACHE_COOKIE_RETIRED, &object->cookie->flags)) { + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + cachefiles_see_object(object, cachefiles_obj_see_clean_delete); + _debug("- inval object OBJ%x", object->debug_id); + cachefiles_delete_object(object, FSCACHE_OBJECT_WAS_RETIRED); + } else { + cachefiles_see_object(object, cachefiles_obj_see_clean_drop_tmp); + _debug("- inval object OBJ%x tmpfile", object->debug_id); + } + } else { + cachefiles_see_object(object, cachefiles_obj_see_clean_commit); + cachefiles_commit_object(object, cache); + } + + cachefiles_unmark_inode_in_use(object, object->file); + if (object->file) { + fput(object->file); + object->file = NULL; + } +} + +/* + * Withdraw caching for a cookie. + */ +static void cachefiles_withdraw_cookie(struct fscache_cookie *cookie) +{ + struct cachefiles_object *object = cookie->cache_priv; + struct cachefiles_cache *cache = object->volume->cache; + const struct cred *saved_cred; + + _enter("o=%x", object->debug_id); + cachefiles_see_object(object, cachefiles_obj_see_withdraw_cookie); + + if (!list_empty(&object->cache_link)) { + spin_lock(&cache->object_list_lock); + cachefiles_see_object(object, cachefiles_obj_see_withdrawal); + list_del_init(&object->cache_link); + spin_unlock(&cache->object_list_lock); + } + + cachefiles_ondemand_clean_object(object); + + if (object->file) { + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_clean_up_object(object, cache); + cachefiles_end_secure(cache, saved_cred); + } + + cookie->cache_priv = NULL; + cachefiles_put_object(object, cachefiles_obj_put_detach); +} + +/* + * Invalidate the storage associated with a cookie. + */ +static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie) +{ + struct cachefiles_object *object = cookie->cache_priv; + struct file *new_file, *old_file; + bool old_tmpfile; + + _enter("o=%x,[%llu]", object->debug_id, object->cookie->object_size); + + old_tmpfile = test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + + if (!object->file) { + fscache_resume_after_invalidation(cookie); + _leave(" = t [light]"); + return true; + } + + new_file = cachefiles_create_tmpfile(object); + if (IS_ERR(new_file)) + goto failed; + + /* Substitute the VFS target */ + _debug("sub"); + spin_lock(&object->lock); + + old_file = object->file; + object->file = new_file; + object->content_info = CACHEFILES_CONTENT_NO_DATA; + set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags); + + spin_unlock(&object->lock); + _debug("subbed"); + + /* Allow I/O to take place again */ + fscache_resume_after_invalidation(cookie); + + if (old_file) { + if (!old_tmpfile) { + struct cachefiles_volume *volume = object->volume; + struct dentry *fan = volume->fanout[(u8)cookie->key_hash]; + + inode_lock_nested(d_inode(fan), I_MUTEX_PARENT); + cachefiles_bury_object(volume->cache, object, fan, + old_file->f_path.dentry, + FSCACHE_OBJECT_INVALIDATED); + } + fput(old_file); + } + + _leave(" = t"); + return true; + +failed: + _leave(" = f"); + return false; +} + +const struct fscache_cache_ops cachefiles_cache_ops = { + .name = "cachefiles", + .acquire_volume = cachefiles_acquire_volume, + .free_volume = cachefiles_free_volume, + .lookup_cookie = cachefiles_lookup_cookie, + .withdraw_cookie = cachefiles_withdraw_cookie, + .invalidate_cookie = cachefiles_invalidate_cookie, + .begin_operation = cachefiles_begin_operation, + .resize_cookie = cachefiles_resize_cookie, + .prepare_to_write = cachefiles_prepare_to_write, +}; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h new file mode 100644 index 000000000..2ad58c465 --- /dev/null +++ b/fs/cachefiles/internal.h @@ -0,0 +1,477 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* General netfs cache on cache files internal defs + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) "CacheFiles: " fmt + + +#include +#include +#include +#include +#include + +#define CACHEFILES_DIO_BLOCK_SIZE 4096 + +struct cachefiles_cache; +struct cachefiles_object; + +enum cachefiles_content { + /* These values are saved on disk */ + CACHEFILES_CONTENT_NO_DATA = 0, /* No content stored */ + CACHEFILES_CONTENT_SINGLE = 1, /* Content is monolithic, all is present */ + CACHEFILES_CONTENT_ALL = 2, /* Content is all present, no map */ + CACHEFILES_CONTENT_BACKFS_MAP = 3, /* Content is piecemeal, mapped through backing fs */ + CACHEFILES_CONTENT_DIRTY = 4, /* Content is dirty (only seen on disk) */ + nr__cachefiles_content +}; + +/* + * Cached volume representation. + */ +struct cachefiles_volume { + struct cachefiles_cache *cache; + struct list_head cache_link; /* Link in cache->volumes */ + struct fscache_volume *vcookie; /* The netfs's representation */ + struct dentry *dentry; /* The volume dentry */ + struct dentry *fanout[256]; /* Fanout subdirs */ +}; + +/* + * Backing file state. + */ +struct cachefiles_object { + struct fscache_cookie *cookie; /* Netfs data storage object cookie */ + struct cachefiles_volume *volume; /* Cache volume that holds this object */ + struct list_head cache_link; /* Link in cache->*_list */ + struct file *file; /* The file representing this object */ + char *d_name; /* Backing file name */ + int debug_id; + spinlock_t lock; + refcount_t ref; + u8 d_name_len; /* Length of filename */ + enum cachefiles_content content_info:8; /* Info about content presence */ + unsigned long flags; +#define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */ +#ifdef CONFIG_CACHEFILES_ONDEMAND + int ondemand_id; +#endif +}; + +#define CACHEFILES_ONDEMAND_ID_CLOSED -1 + +/* + * Cache files cache definition + */ +struct cachefiles_cache { + struct fscache_cache *cache; /* Cache cookie */ + struct vfsmount *mnt; /* mountpoint holding the cache */ + struct dentry *store; /* Directory into which live objects go */ + struct dentry *graveyard; /* directory into which dead objects go */ + struct file *cachefilesd; /* manager daemon handle */ + struct list_head volumes; /* List of volume objects */ + struct list_head object_list; /* List of active objects */ + spinlock_t object_list_lock; /* Lock for volumes and object_list */ + const struct cred *cache_cred; /* security override for accessing cache */ + struct mutex daemon_mutex; /* command serialisation mutex */ + wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */ + atomic_t gravecounter; /* graveyard uniquifier */ + atomic_t f_released; /* number of objects released lately */ + atomic_long_t b_released; /* number of blocks released lately */ + atomic_long_t b_writing; /* Number of blocks being written */ + unsigned frun_percent; /* when to stop culling (% files) */ + unsigned fcull_percent; /* when to start culling (% files) */ + unsigned fstop_percent; /* when to stop allocating (% files) */ + unsigned brun_percent; /* when to stop culling (% blocks) */ + unsigned bcull_percent; /* when to start culling (% blocks) */ + unsigned bstop_percent; /* when to stop allocating (% blocks) */ + unsigned bsize; /* cache's block size */ + unsigned bshift; /* ilog2(bsize) */ + uint64_t frun; /* when to stop culling */ + uint64_t fcull; /* when to start culling */ + uint64_t fstop; /* when to stop allocating */ + sector_t brun; /* when to stop culling */ + sector_t bcull; /* when to start culling */ + sector_t bstop; /* when to stop allocating */ + unsigned long flags; +#define CACHEFILES_READY 0 /* T if cache prepared */ +#define CACHEFILES_DEAD 1 /* T if cache dead */ +#define CACHEFILES_CULLING 2 /* T if cull engaged */ +#define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */ +#define CACHEFILES_ONDEMAND_MODE 4 /* T if in on-demand read mode */ + char *rootdirname; /* name of cache root directory */ + char *secctx; /* LSM security context */ + char *tag; /* cache binding tag */ + refcount_t unbind_pincount;/* refcount to do daemon unbind */ + struct xarray reqs; /* xarray of pending on-demand requests */ + unsigned long req_id_next; + struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ + u32 ondemand_id_next; +}; + +static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) +{ + return IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND) && + test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags); +} + +struct cachefiles_req { + struct cachefiles_object *object; + struct completion done; + int error; + struct cachefiles_msg msg; +}; + +#define CACHEFILES_REQ_NEW XA_MARK_1 + +#include + +static inline +struct file *cachefiles_cres_file(struct netfs_cache_resources *cres) +{ + return cres->cache_priv2; +} + +static inline +struct cachefiles_object *cachefiles_cres_object(struct netfs_cache_resources *cres) +{ + return fscache_cres_cookie(cres)->cache_priv; +} + +/* + * note change of state for daemon + */ +static inline void cachefiles_state_changed(struct cachefiles_cache *cache) +{ + set_bit(CACHEFILES_STATE_CHANGED, &cache->flags); + wake_up_all(&cache->daemon_pollwq); +} + +/* + * cache.c + */ +extern int cachefiles_add_cache(struct cachefiles_cache *cache); +extern void cachefiles_withdraw_cache(struct cachefiles_cache *cache); + +enum cachefiles_has_space_for { + cachefiles_has_space_check, + cachefiles_has_space_for_write, + cachefiles_has_space_for_create, +}; +extern int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr, + enum cachefiles_has_space_for reason); + +/* + * daemon.c + */ +extern const struct file_operations cachefiles_daemon_fops; +extern void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache); +extern void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache); + +/* + * error_inject.c + */ +#ifdef CONFIG_CACHEFILES_ERROR_INJECTION +extern unsigned int cachefiles_error_injection_state; +extern int cachefiles_register_error_injection(void); +extern void cachefiles_unregister_error_injection(void); + +#else +#define cachefiles_error_injection_state 0 + +static inline int cachefiles_register_error_injection(void) +{ + return 0; +} + +static inline void cachefiles_unregister_error_injection(void) +{ +} +#endif + + +static inline int cachefiles_inject_read_error(void) +{ + return cachefiles_error_injection_state & 2 ? -EIO : 0; +} + +static inline int cachefiles_inject_write_error(void) +{ + return cachefiles_error_injection_state & 2 ? -EIO : + cachefiles_error_injection_state & 1 ? -ENOSPC : + 0; +} + +static inline int cachefiles_inject_remove_error(void) +{ + return cachefiles_error_injection_state & 2 ? -EIO : 0; +} + +/* + * interface.c + */ +extern const struct fscache_cache_ops cachefiles_cache_ops; +extern void cachefiles_see_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why); +extern struct cachefiles_object *cachefiles_grab_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why); +extern void cachefiles_put_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why); + +/* + * io.c + */ +extern bool cachefiles_begin_operation(struct netfs_cache_resources *cres, + enum fscache_want_state want_state); +extern int __cachefiles_prepare_write(struct cachefiles_object *object, + struct file *file, + loff_t *_start, size_t *_len, + bool no_space_allocated_yet); +extern int __cachefiles_write(struct cachefiles_object *object, + struct file *file, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv); + +/* + * key.c + */ +extern bool cachefiles_cook_key(struct cachefiles_object *object); + +/* + * main.c + */ +extern struct kmem_cache *cachefiles_object_jar; + +/* + * namei.c + */ +extern void cachefiles_unmark_inode_in_use(struct cachefiles_object *object, + struct file *file); +extern int cachefiles_bury_object(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dir, + struct dentry *rep, + enum fscache_why_object_killed why); +extern int cachefiles_delete_object(struct cachefiles_object *object, + enum fscache_why_object_killed why); +extern bool cachefiles_look_up_object(struct cachefiles_object *object); +extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *name, + bool *_is_new); +extern void cachefiles_put_directory(struct dentry *dir); + +extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, + char *filename); + +extern int cachefiles_check_in_use(struct cachefiles_cache *cache, + struct dentry *dir, char *filename); +extern struct file *cachefiles_create_tmpfile(struct cachefiles_object *object); +extern bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, + struct cachefiles_object *object); + +/* + * ondemand.c + */ +#ifdef CONFIG_CACHEFILES_ONDEMAND +extern ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen); + +extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache, + char *args); + +extern int cachefiles_ondemand_init_object(struct cachefiles_object *object); +extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); + +extern int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len); + +#else +static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) +{ + return -EOPNOTSUPP; +} + +static inline int cachefiles_ondemand_init_object(struct cachefiles_object *object) +{ + return 0; +} + +static inline void cachefiles_ondemand_clean_object(struct cachefiles_object *object) +{ +} + +static inline int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) +{ + return -EOPNOTSUPP; +} +#endif + +/* + * security.c + */ +extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); +extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, + struct dentry *root, + const struct cred **_saved_cred); + +static inline void cachefiles_begin_secure(struct cachefiles_cache *cache, + const struct cred **_saved_cred) +{ + *_saved_cred = override_creds(cache->cache_cred); +} + +static inline void cachefiles_end_secure(struct cachefiles_cache *cache, + const struct cred *saved_cred) +{ + revert_creds(saved_cred); +} + +/* + * volume.c + */ +void cachefiles_acquire_volume(struct fscache_volume *volume); +void cachefiles_free_volume(struct fscache_volume *volume); +void cachefiles_withdraw_volume(struct cachefiles_volume *volume); + +/* + * xattr.c + */ +extern int cachefiles_set_object_xattr(struct cachefiles_object *object); +extern int cachefiles_check_auxdata(struct cachefiles_object *object, + struct file *file); +extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dentry); +extern void cachefiles_prepare_to_write(struct fscache_cookie *cookie); +extern bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume); +extern int cachefiles_check_volume_xattr(struct cachefiles_volume *volume); + +/* + * Error handling + */ +#define cachefiles_io_error(___cache, FMT, ...) \ +do { \ + pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ + fscache_io_error((___cache)->cache); \ + set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ +} while (0) + +#define cachefiles_io_error_obj(object, FMT, ...) \ +do { \ + struct cachefiles_cache *___cache; \ + \ + ___cache = (object)->volume->cache; \ + cachefiles_io_error(___cache, FMT " [o=%08x]", ##__VA_ARGS__, \ + (object)->debug_id); \ +} while (0) + + +/* + * Debug tracing + */ +extern unsigned cachefiles_debug; +#define CACHEFILES_DEBUG_KENTER 1 +#define CACHEFILES_DEBUG_KLEAVE 2 +#define CACHEFILES_DEBUG_KDEBUG 4 + +#define dbgprintk(FMT, ...) \ + printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) + +#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) + + +#if defined(__KDEBUG) +#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) +#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) +#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) + +#elif defined(CONFIG_CACHEFILES_DEBUG) +#define _enter(FMT, ...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KENTER) \ + kenter(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _leave(FMT, ...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE) \ + kleave(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _debug(FMT, ...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG) \ + kdebug(FMT, ##__VA_ARGS__); \ +} while (0) + +#else +#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) +#endif + +#if 1 /* defined(__KDEBUGALL) */ + +#define ASSERT(X) \ +do { \ + if (unlikely(!(X))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ + if (unlikely(!((X) OP (Y)))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIF(C, X) \ +do { \ + if (unlikely((C) && !(X))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ + if (unlikely((C) && !((X) OP (Y)))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#else + +#define ASSERT(X) do {} while (0) +#define ASSERTCMP(X, OP, Y) do {} while (0) +#define ASSERTIF(C, X) do {} while (0) +#define ASSERTIFCMP(C, X, OP, Y) do {} while (0) + +#endif diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c new file mode 100644 index 000000000..000a28f46 --- /dev/null +++ b/fs/cachefiles/io.c @@ -0,0 +1,651 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* kiocb-using read/write + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +struct cachefiles_kiocb { + struct kiocb iocb; + refcount_t ki_refcnt; + loff_t start; + union { + size_t skipped; + size_t len; + }; + struct cachefiles_object *object; + netfs_io_terminated_t term_func; + void *term_func_priv; + bool was_async; + unsigned int inval_counter; /* Copy of cookie->inval_counter */ + u64 b_writing; +}; + +static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) +{ + if (refcount_dec_and_test(&ki->ki_refcnt)) { + cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq); + fput(ki->iocb.ki_filp); + kfree(ki); + } +} + +/* + * Handle completion of a read from the cache. + */ +static void cachefiles_read_complete(struct kiocb *iocb, long ret) +{ + struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + struct inode *inode = file_inode(ki->iocb.ki_filp); + + _enter("%ld", ret); + + if (ret < 0) + trace_cachefiles_io_error(ki->object, inode, ret, + cachefiles_trace_read_error); + + if (ki->term_func) { + if (ret >= 0) { + if (ki->object->cookie->inval_counter == ki->inval_counter) + ki->skipped += ret; + else + ret = -ESTALE; + } + + ki->term_func(ki->term_func_priv, ret, ki->was_async); + } + + cachefiles_put_kiocb(ki); +} + +/* + * Initiate a read from the cache. + */ +static int cachefiles_read(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + enum netfs_read_from_hole read_hole, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + struct cachefiles_object *object; + struct cachefiles_kiocb *ki; + struct file *file; + unsigned int old_nofs; + ssize_t ret = -ENOBUFS; + size_t len = iov_iter_count(iter), skipped = 0; + + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) + goto presubmission_error; + + fscache_count_read(); + object = cachefiles_cres_object(cres); + file = cachefiles_cres_file(cres); + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start_pos, len, + i_size_read(file_inode(file))); + + /* If the caller asked us to seek for data before doing the read, then + * we should do that now. If we find a gap, we fill it with zeros. + */ + if (read_hole != NETFS_READ_HOLE_IGNORE) { + loff_t off = start_pos, off2; + + off2 = cachefiles_inject_read_error(); + if (off2 == 0) + off2 = vfs_llseek(file, off, SEEK_DATA); + if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) { + skipped = 0; + ret = off2; + goto presubmission_error; + } + + if (off2 == -ENXIO || off2 >= start_pos + len) { + /* The region is beyond the EOF or there's no more data + * in the region, so clear the rest of the buffer and + * return success. + */ + ret = -ENODATA; + if (read_hole == NETFS_READ_HOLE_FAIL) + goto presubmission_error; + + iov_iter_zero(len, iter); + skipped = len; + ret = 0; + goto presubmission_error; + } + + skipped = off2 - off; + iov_iter_zero(skipped, iter); + } + + ret = -ENOMEM; + ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); + if (!ki) + goto presubmission_error; + + refcount_set(&ki->ki_refcnt, 2); + ki->iocb.ki_filp = file; + ki->iocb.ki_pos = start_pos + skipped; + ki->iocb.ki_flags = IOCB_DIRECT; + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->skipped = skipped; + ki->object = object; + ki->inval_counter = cres->inval_counter; + ki->term_func = term_func; + ki->term_func_priv = term_func_priv; + ki->was_async = true; + + if (ki->term_func) + ki->iocb.ki_complete = cachefiles_read_complete; + + get_file(ki->iocb.ki_filp); + cachefiles_grab_object(object, cachefiles_obj_get_ioreq); + + trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped); + old_nofs = memalloc_nofs_save(); + ret = cachefiles_inject_read_error(); + if (ret == 0) + ret = vfs_iocb_iter_read(file, &ki->iocb, iter); + memalloc_nofs_restore(old_nofs); + switch (ret) { + case -EIOCBQUEUED: + goto in_progress; + + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + ret = -EINTR; + fallthrough; + default: + ki->was_async = false; + cachefiles_read_complete(&ki->iocb, ret); + if (ret > 0) + ret = 0; + break; + } + +in_progress: + cachefiles_put_kiocb(ki); + _leave(" = %zd", ret); + return ret; + +presubmission_error: + if (term_func) + term_func(term_func_priv, ret < 0 ? ret : skipped, false); + return ret; +} + +/* + * Query the occupancy of the cache in a region, returning where the next chunk + * of data starts and how long it is. + */ +static int cachefiles_query_occupancy(struct netfs_cache_resources *cres, + loff_t start, size_t len, size_t granularity, + loff_t *_data_start, size_t *_data_len) +{ + struct cachefiles_object *object; + struct file *file; + loff_t off, off2; + + *_data_start = -1; + *_data_len = 0; + + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) + return -ENOBUFS; + + object = cachefiles_cres_object(cres); + file = cachefiles_cres_file(cres); + granularity = max_t(size_t, object->volume->cache->bsize, granularity); + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start, len, + i_size_read(file_inode(file))); + + off = cachefiles_inject_read_error(); + if (off == 0) + off = vfs_llseek(file, start, SEEK_DATA); + if (off == -ENXIO) + return -ENODATA; /* Beyond EOF */ + if (off < 0 && off >= (loff_t)-MAX_ERRNO) + return -ENOBUFS; /* Error. */ + if (round_up(off, granularity) >= start + len) + return -ENODATA; /* No data in range */ + + off2 = cachefiles_inject_read_error(); + if (off2 == 0) + off2 = vfs_llseek(file, off, SEEK_HOLE); + if (off2 == -ENXIO) + return -ENODATA; /* Beyond EOF */ + if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO) + return -ENOBUFS; /* Error. */ + + /* Round away partial blocks */ + off = round_up(off, granularity); + off2 = round_down(off2, granularity); + if (off2 <= off) + return -ENODATA; + + *_data_start = off; + if (off2 > start + len) + *_data_len = len; + else + *_data_len = off2 - off; + return 0; +} + +/* + * Handle completion of a write to the cache. + */ +static void cachefiles_write_complete(struct kiocb *iocb, long ret) +{ + struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + struct cachefiles_object *object = ki->object; + struct inode *inode = file_inode(ki->iocb.ki_filp); + + _enter("%ld", ret); + + /* Tell lockdep we inherited freeze protection from submission thread */ + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + __sb_end_write(inode->i_sb, SB_FREEZE_WRITE); + + if (ret < 0) + trace_cachefiles_io_error(object, inode, ret, + cachefiles_trace_write_error); + + atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing); + set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags); + if (ki->term_func) + ki->term_func(ki->term_func_priv, ret, ki->was_async); + cachefiles_put_kiocb(ki); +} + +/* + * Initiate a write to the cache. + */ +int __cachefiles_write(struct cachefiles_object *object, + struct file *file, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + struct cachefiles_cache *cache; + struct cachefiles_kiocb *ki; + struct inode *inode; + unsigned int old_nofs; + ssize_t ret; + size_t len = iov_iter_count(iter); + + fscache_count_write(); + cache = object->volume->cache; + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start_pos, len, + i_size_read(file_inode(file))); + + ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); + if (!ki) { + if (term_func) + term_func(term_func_priv, -ENOMEM, false); + return -ENOMEM; + } + + refcount_set(&ki->ki_refcnt, 2); + ki->iocb.ki_filp = file; + ki->iocb.ki_pos = start_pos; + ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->object = object; + ki->start = start_pos; + ki->len = len; + ki->term_func = term_func; + ki->term_func_priv = term_func_priv; + ki->was_async = true; + ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift; + + if (ki->term_func) + ki->iocb.ki_complete = cachefiles_write_complete; + atomic_long_add(ki->b_writing, &cache->b_writing); + + /* Open-code file_start_write here to grab freeze protection, which + * will be released by another thread in aio_complete_rw(). Fool + * lockdep by telling it the lock got released so that it doesn't + * complain about the held lock when we return to userspace. + */ + inode = file_inode(file); + __sb_start_write(inode->i_sb, SB_FREEZE_WRITE); + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); + + get_file(ki->iocb.ki_filp); + cachefiles_grab_object(object, cachefiles_obj_get_ioreq); + + trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len); + old_nofs = memalloc_nofs_save(); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_iocb_iter_write(file, &ki->iocb, iter); + memalloc_nofs_restore(old_nofs); + switch (ret) { + case -EIOCBQUEUED: + goto in_progress; + + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + ret = -EINTR; + fallthrough; + default: + ki->was_async = false; + cachefiles_write_complete(&ki->iocb, ret); + if (ret > 0) + ret = 0; + break; + } + +in_progress: + cachefiles_put_kiocb(ki); + _leave(" = %zd", ret); + return ret; +} + +static int cachefiles_write(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) { + if (term_func) + term_func(term_func_priv, -ENOBUFS, false); + return -ENOBUFS; + } + + return __cachefiles_write(cachefiles_cres_object(cres), + cachefiles_cres_file(cres), + start_pos, iter, + term_func, term_func_priv); +} + +/* + * Prepare a read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ +static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq, + loff_t i_size) +{ + enum cachefiles_prepare_read_trace why; + struct netfs_io_request *rreq = subreq->rreq; + struct netfs_cache_resources *cres = &rreq->cache_resources; + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct fscache_cookie *cookie = fscache_cres_cookie(cres); + const struct cred *saved_cred; + struct file *file = cachefiles_cres_file(cres); + enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER; + loff_t off, to; + ino_t ino = file ? file_inode(file)->i_ino : 0; + int rc; + + _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); + + if (subreq->start >= i_size) { + ret = NETFS_FILL_WITH_ZEROES; + why = cachefiles_trace_read_after_eof; + goto out_no_object; + } + + if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) { + __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); + why = cachefiles_trace_read_no_data; + if (!test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) + goto out_no_object; + } + + /* The object and the file may be being created in the background. */ + if (!file) { + why = cachefiles_trace_read_no_file; + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) + goto out_no_object; + file = cachefiles_cres_file(cres); + if (!file) + goto out_no_object; + ino = file_inode(file)->i_ino; + } + + object = cachefiles_cres_object(cres); + cache = object->volume->cache; + cachefiles_begin_secure(cache, &saved_cred); +retry: + off = cachefiles_inject_read_error(); + if (off == 0) + off = vfs_llseek(file, subreq->start, SEEK_DATA); + if (off < 0 && off >= (loff_t)-MAX_ERRNO) { + if (off == (loff_t)-ENXIO) { + why = cachefiles_trace_read_seek_nxio; + goto download_and_store; + } + trace_cachefiles_io_error(object, file_inode(file), off, + cachefiles_trace_seek_error); + why = cachefiles_trace_read_seek_error; + goto out; + } + + if (off >= subreq->start + subreq->len) { + why = cachefiles_trace_read_found_hole; + goto download_and_store; + } + + if (off > subreq->start) { + off = round_up(off, cache->bsize); + subreq->len = off - subreq->start; + why = cachefiles_trace_read_found_part; + goto download_and_store; + } + + to = cachefiles_inject_read_error(); + if (to == 0) + to = vfs_llseek(file, subreq->start, SEEK_HOLE); + if (to < 0 && to >= (loff_t)-MAX_ERRNO) { + trace_cachefiles_io_error(object, file_inode(file), to, + cachefiles_trace_seek_error); + why = cachefiles_trace_read_seek_error; + goto out; + } + + if (to < subreq->start + subreq->len) { + if (subreq->start + subreq->len >= i_size) + to = round_up(to, cache->bsize); + else + to = round_down(to, cache->bsize); + subreq->len = to - subreq->start; + } + + why = cachefiles_trace_read_have_data; + ret = NETFS_READ_FROM_CACHE; + goto out; + +download_and_store: + __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); + if (test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) { + rc = cachefiles_ondemand_read(object, subreq->start, + subreq->len); + if (!rc) { + __clear_bit(NETFS_SREQ_ONDEMAND, &subreq->flags); + goto retry; + } + ret = NETFS_INVALID_READ; + } +out: + cachefiles_end_secure(cache, saved_cred); +out_no_object: + trace_cachefiles_prep_read(subreq, ret, why, ino); + return ret; +} + +/* + * Prepare for a write to occur. + */ +int __cachefiles_prepare_write(struct cachefiles_object *object, + struct file *file, + loff_t *_start, size_t *_len, + bool no_space_allocated_yet) +{ + struct cachefiles_cache *cache = object->volume->cache; + loff_t start = *_start, pos; + size_t len = *_len, down; + int ret; + + /* Round to DIO size */ + down = start - round_down(start, PAGE_SIZE); + *_start = start - down; + *_len = round_up(down + len, PAGE_SIZE); + + /* We need to work out whether there's sufficient disk space to perform + * the write - but we can skip that check if we have space already + * allocated. + */ + if (no_space_allocated_yet) + goto check_space; + + pos = cachefiles_inject_read_error(); + if (pos == 0) + pos = vfs_llseek(file, *_start, SEEK_DATA); + if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { + if (pos == -ENXIO) + goto check_space; /* Unallocated tail */ + trace_cachefiles_io_error(object, file_inode(file), pos, + cachefiles_trace_seek_error); + return pos; + } + if ((u64)pos >= (u64)*_start + *_len) + goto check_space; /* Unallocated region */ + + /* We have a block that's at least partially filled - if we're low on + * space, we need to see if it's fully allocated. If it's not, we may + * want to cull it. + */ + if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, + cachefiles_has_space_check) == 0) + return 0; /* Enough space to simply overwrite the whole block */ + + pos = cachefiles_inject_read_error(); + if (pos == 0) + pos = vfs_llseek(file, *_start, SEEK_HOLE); + if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { + trace_cachefiles_io_error(object, file_inode(file), pos, + cachefiles_trace_seek_error); + return pos; + } + if ((u64)pos >= (u64)*_start + *_len) + return 0; /* Fully allocated */ + + /* Partially allocated, but insufficient space: cull. */ + fscache_count_no_write_space(); + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + *_start, *_len); + if (ret < 0) { + trace_cachefiles_io_error(object, file_inode(file), ret, + cachefiles_trace_fallocate_error); + cachefiles_io_error_obj(object, + "CacheFiles: fallocate failed (%d)\n", ret); + ret = -EIO; + } + + return ret; + +check_space: + return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, + cachefiles_has_space_for_write); +} + +static int cachefiles_prepare_write(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size, + bool no_space_allocated_yet) +{ + struct cachefiles_object *object = cachefiles_cres_object(cres); + struct cachefiles_cache *cache = object->volume->cache; + const struct cred *saved_cred; + int ret; + + if (!cachefiles_cres_file(cres)) { + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) + return -ENOBUFS; + if (!cachefiles_cres_file(cres)) + return -ENOBUFS; + } + + cachefiles_begin_secure(cache, &saved_cred); + ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres), + _start, _len, + no_space_allocated_yet); + cachefiles_end_secure(cache, saved_cred); + return ret; +} + +/* + * Clean up an operation. + */ +static void cachefiles_end_operation(struct netfs_cache_resources *cres) +{ + struct file *file = cachefiles_cres_file(cres); + + if (file) + fput(file); + fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end); +} + +static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { + .end_operation = cachefiles_end_operation, + .read = cachefiles_read, + .write = cachefiles_write, + .prepare_read = cachefiles_prepare_read, + .prepare_write = cachefiles_prepare_write, + .query_occupancy = cachefiles_query_occupancy, +}; + +/* + * Open the cache file when beginning a cache operation. + */ +bool cachefiles_begin_operation(struct netfs_cache_resources *cres, + enum fscache_want_state want_state) +{ + struct cachefiles_object *object = cachefiles_cres_object(cres); + + if (!cachefiles_cres_file(cres)) { + cres->ops = &cachefiles_netfs_cache_ops; + if (object->file) { + spin_lock(&object->lock); + if (!cres->cache_priv2 && object->file) + cres->cache_priv2 = get_file(object->file); + spin_unlock(&object->lock); + } + } + + if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) { + pr_err("failed to get cres->file\n"); + return false; + } + + return true; +} diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c new file mode 100644 index 000000000..bf935e25b --- /dev/null +++ b/fs/cachefiles/key.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Key to pathname encoder + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include "internal.h" + +static const char cachefiles_charmap[64] = + "0123456789" /* 0 - 9 */ + "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ + "_-" /* 62 - 63 */ + ; + +static const char cachefiles_filecharmap[256] = { + /* we skip space and tab and control chars */ + [33 ... 46] = 1, /* '!' -> '.' */ + /* we skip '/' as it's significant to pathwalk */ + [48 ... 127] = 1, /* '0' -> '~' */ +}; + +static inline unsigned int how_many_hex_digits(unsigned int x) +{ + return x ? round_up(ilog2(x) + 1, 4) / 4 : 0; +} + +/* + * turn the raw key into something cooked + * - the key may be up to NAME_MAX in length (including the length word) + * - "base64" encode the strange keys, mapping 3 bytes of raw to four of + * cooked + * - need to cut the cooked key into 252 char lengths (189 raw bytes) + */ +bool cachefiles_cook_key(struct cachefiles_object *object) +{ + const u8 *key = fscache_get_key(object->cookie), *kend; + unsigned char ch; + unsigned int acc, i, n, nle, nbe, keylen = object->cookie->key_len; + unsigned int b64len, len, print, pad; + char *name, sep; + + _enter(",%u,%*phN", keylen, keylen, key); + + BUG_ON(keylen > NAME_MAX - 3); + + print = 1; + for (i = 0; i < keylen; i++) { + ch = key[i]; + print &= cachefiles_filecharmap[ch]; + } + + /* If the path is usable ASCII, then we render it directly */ + if (print) { + len = 1 + keylen; + name = kmalloc(len + 1, GFP_KERNEL); + if (!name) + return false; + + name[0] = 'D'; /* Data object type, string encoding */ + memcpy(name + 1, key, keylen); + goto success; + } + + /* See if it makes sense to encode it as "hex,hex,hex" for each 32-bit + * chunk. We rely on the key having been padded out to a whole number + * of 32-bit words. + */ + n = round_up(keylen, 4); + nbe = nle = 0; + for (i = 0; i < n; i += 4) { + u32 be = be32_to_cpu(*(__be32 *)(key + i)); + u32 le = le32_to_cpu(*(__le32 *)(key + i)); + + nbe += 1 + how_many_hex_digits(be); + nle += 1 + how_many_hex_digits(le); + } + + b64len = DIV_ROUND_UP(keylen, 3); + pad = b64len * 3 - keylen; + b64len = 2 + b64len * 4; /* Length if we base64-encode it */ + _debug("len=%u nbe=%u nle=%u b64=%u", keylen, nbe, nle, b64len); + if (nbe < b64len || nle < b64len) { + unsigned int nlen = min(nbe, nle) + 1; + name = kmalloc(nlen, GFP_KERNEL); + if (!name) + return false; + sep = (nbe <= nle) ? 'S' : 'T'; /* Encoding indicator */ + len = 0; + for (i = 0; i < n; i += 4) { + u32 x; + if (nbe <= nle) + x = be32_to_cpu(*(__be32 *)(key + i)); + else + x = le32_to_cpu(*(__le32 *)(key + i)); + name[len++] = sep; + if (x != 0) + len += snprintf(name + len, nlen - len, "%x", x); + sep = ','; + } + goto success; + } + + /* We need to base64-encode it */ + name = kmalloc(b64len + 1, GFP_KERNEL); + if (!name) + return false; + + name[0] = 'E'; + name[1] = '0' + pad; + len = 2; + kend = key + keylen; + do { + acc = *key++; + if (key < kend) { + acc |= *key++ << 8; + if (key < kend) + acc |= *key++ << 16; + } + + name[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + name[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + name[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + name[len++] = cachefiles_charmap[acc & 63]; + } while (key < kend); + +success: + name[len] = 0; + object->d_name = name; + object->d_name_len = len; + _leave(" = %s", object->d_name); + return true; +} diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c new file mode 100644 index 000000000..3f369c6f8 --- /dev/null +++ b/fs/cachefiles/main.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Network filesystem caching backend to use cache files on a premounted + * filesystem + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define CREATE_TRACE_POINTS +#include "internal.h" + +unsigned cachefiles_debug; +module_param_named(debug, cachefiles_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(cachefiles_debug, "CacheFiles debugging mask"); + +MODULE_DESCRIPTION("Mounted-filesystem based cache"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +struct kmem_cache *cachefiles_object_jar; + +static struct miscdevice cachefiles_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "cachefiles", + .fops = &cachefiles_daemon_fops, +}; + +/* + * initialise the fs caching module + */ +static int __init cachefiles_init(void) +{ + int ret; + + ret = cachefiles_register_error_injection(); + if (ret < 0) + goto error_einj; + ret = misc_register(&cachefiles_dev); + if (ret < 0) + goto error_dev; + + /* create an object jar */ + ret = -ENOMEM; + cachefiles_object_jar = + kmem_cache_create("cachefiles_object_jar", + sizeof(struct cachefiles_object), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!cachefiles_object_jar) { + pr_notice("Failed to allocate an object jar\n"); + goto error_object_jar; + } + + pr_info("Loaded\n"); + return 0; + +error_object_jar: + misc_deregister(&cachefiles_dev); +error_dev: + cachefiles_unregister_error_injection(); +error_einj: + pr_err("failed to register: %d\n", ret); + return ret; +} + +fs_initcall(cachefiles_init); + +/* + * clean up on module removal + */ +static void __exit cachefiles_exit(void) +{ + pr_info("Unloading\n"); + + kmem_cache_destroy(cachefiles_object_jar); + misc_deregister(&cachefiles_dev); + cachefiles_unregister_error_injection(); +} + +module_exit(cachefiles_exit); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c new file mode 100644 index 000000000..50b2ee163 --- /dev/null +++ b/fs/cachefiles/namei.c @@ -0,0 +1,862 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* CacheFiles path walking and related routines + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include "internal.h" + +/* + * Mark the backing file as being a cache file if it's not already in use. The + * mark tells the culling request command that it's not allowed to cull the + * file or directory. The caller must hold the inode lock. + */ +static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object, + struct inode *inode) +{ + bool can_use = false; + + if (!(inode->i_flags & S_KERNEL_FILE)) { + inode->i_flags |= S_KERNEL_FILE; + trace_cachefiles_mark_active(object, inode); + can_use = true; + } else { + trace_cachefiles_mark_failed(object, inode); + } + + return can_use; +} + +static bool cachefiles_mark_inode_in_use(struct cachefiles_object *object, + struct inode *inode) +{ + bool can_use; + + inode_lock(inode); + can_use = __cachefiles_mark_inode_in_use(object, inode); + inode_unlock(inode); + return can_use; +} + +/* + * Unmark a backing inode. The caller must hold the inode lock. + */ +static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object, + struct inode *inode) +{ + inode->i_flags &= ~S_KERNEL_FILE; + trace_cachefiles_mark_inactive(object, inode); +} + +static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object, + struct inode *inode) +{ + inode_lock(inode); + __cachefiles_unmark_inode_in_use(object, inode); + inode_unlock(inode); +} + +/* + * Unmark a backing inode and tell cachefilesd that there's something that can + * be culled. + */ +void cachefiles_unmark_inode_in_use(struct cachefiles_object *object, + struct file *file) +{ + struct cachefiles_cache *cache = object->volume->cache; + struct inode *inode = file_inode(file); + + cachefiles_do_unmark_inode_in_use(object, inode); + + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + atomic_long_add(inode->i_blocks, &cache->b_released); + if (atomic_inc_return(&cache->f_released)) + cachefiles_state_changed(cache); + } +} + +/* + * get a subdirectory + */ +struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *dirname, + bool *_is_new) +{ + struct dentry *subdir; + struct path path; + int ret; + + _enter(",,%s", dirname); + + /* search the current directory for the element name */ + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + +retry: + ret = cachefiles_inject_read_error(); + if (ret == 0) + subdir = lookup_one_len(dirname, dir, strlen(dirname)); + else + subdir = ERR_PTR(ret); + trace_cachefiles_lookup(NULL, dir, subdir); + if (IS_ERR(subdir)) { + trace_cachefiles_vfs_error(NULL, d_backing_inode(dir), + PTR_ERR(subdir), + cachefiles_trace_lookup_error); + if (PTR_ERR(subdir) == -ENOMEM) + goto nomem_d_alloc; + goto lookup_error; + } + + _debug("subdir -> %pd %s", + subdir, d_backing_inode(subdir) ? "positive" : "negative"); + + /* we need to create the subdir if it doesn't exist yet */ + if (d_is_negative(subdir)) { + ret = cachefiles_has_space(cache, 1, 0, + cachefiles_has_space_for_create); + if (ret < 0) + goto mkdir_error; + + _debug("attempt mkdir"); + + path.mnt = cache->mnt; + path.dentry = dir; + ret = security_path_mkdir(&path, subdir, 0700); + if (ret < 0) + goto mkdir_error; + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_mkdir(&init_user_ns, d_inode(dir), subdir, 0700); + if (ret < 0) { + trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, + cachefiles_trace_mkdir_error); + goto mkdir_error; + } + trace_cachefiles_mkdir(dir, subdir); + + if (unlikely(d_unhashed(subdir))) { + cachefiles_put_directory(subdir); + goto retry; + } + ASSERT(d_backing_inode(subdir)); + + _debug("mkdir -> %pd{ino=%lu}", + subdir, d_backing_inode(subdir)->i_ino); + if (_is_new) + *_is_new = true; + } + + /* Tell rmdir() it's not allowed to delete the subdir */ + inode_lock(d_inode(subdir)); + inode_unlock(d_inode(dir)); + + if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) { + pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n", + subdir, d_inode(subdir)->i_ino); + goto mark_error; + } + + inode_unlock(d_inode(subdir)); + + /* we need to make sure the subdir is a directory */ + ASSERT(d_backing_inode(subdir)); + + if (!d_can_lookup(subdir)) { + pr_err("%s is not a directory\n", dirname); + ret = -EIO; + goto check_error; + } + + ret = -EPERM; + if (!(d_backing_inode(subdir)->i_opflags & IOP_XATTR) || + !d_backing_inode(subdir)->i_op->lookup || + !d_backing_inode(subdir)->i_op->mkdir || + !d_backing_inode(subdir)->i_op->rename || + !d_backing_inode(subdir)->i_op->rmdir || + !d_backing_inode(subdir)->i_op->unlink) + goto check_error; + + _leave(" = [%lu]", d_backing_inode(subdir)->i_ino); + return subdir; + +check_error: + cachefiles_put_directory(subdir); + _leave(" = %d [check]", ret); + return ERR_PTR(ret); + +mark_error: + inode_unlock(d_inode(subdir)); + dput(subdir); + return ERR_PTR(-EBUSY); + +mkdir_error: + inode_unlock(d_inode(dir)); + dput(subdir); + pr_err("mkdir %s failed with error %d\n", dirname, ret); + return ERR_PTR(ret); + +lookup_error: + inode_unlock(d_inode(dir)); + ret = PTR_ERR(subdir); + pr_err("Lookup %s failed with error %d\n", dirname, ret); + return ERR_PTR(ret); + +nomem_d_alloc: + inode_unlock(d_inode(dir)); + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); +} + +/* + * Put a subdirectory. + */ +void cachefiles_put_directory(struct dentry *dir) +{ + if (dir) { + cachefiles_do_unmark_inode_in_use(NULL, d_inode(dir)); + dput(dir); + } +} + +/* + * Remove a regular file from the cache. + */ +static int cachefiles_unlink(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dir, struct dentry *dentry, + enum fscache_why_object_killed why) +{ + struct path path = { + .mnt = cache->mnt, + .dentry = dir, + }; + int ret; + + trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why); + ret = security_path_unlink(&path, dentry); + if (ret < 0) { + cachefiles_io_error(cache, "Unlink security error"); + return ret; + } + + ret = cachefiles_inject_remove_error(); + if (ret == 0) { + ret = vfs_unlink(&init_user_ns, d_backing_inode(dir), dentry, NULL); + if (ret == -EIO) + cachefiles_io_error(cache, "Unlink failed"); + } + if (ret != 0) + trace_cachefiles_vfs_error(object, d_backing_inode(dir), ret, + cachefiles_trace_unlink_error); + return ret; +} + +/* + * Delete an object representation from the cache + * - File backed objects are unlinked + * - Directory backed objects are stuffed into the graveyard for userspace to + * delete + */ +int cachefiles_bury_object(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dir, + struct dentry *rep, + enum fscache_why_object_killed why) +{ + struct dentry *grave, *trap; + struct path path, path_to_graveyard; + char nbuffer[8 + 8 + 1]; + int ret; + + _enter(",'%pd','%pd'", dir, rep); + + if (rep->d_parent != dir) { + inode_unlock(d_inode(dir)); + _leave(" = -ESTALE"); + return -ESTALE; + } + + /* non-directories can just be unlinked */ + if (!d_is_dir(rep)) { + dget(rep); /* Stop the dentry being negated if it's only pinned + * by a file struct. + */ + ret = cachefiles_unlink(cache, object, dir, rep, why); + dput(rep); + + inode_unlock(d_inode(dir)); + _leave(" = %d", ret); + return ret; + } + + /* directories have to be moved to the graveyard */ + _debug("move stale object to graveyard"); + inode_unlock(d_inode(dir)); + +try_again: + /* first step is to make up a grave dentry in the graveyard */ + sprintf(nbuffer, "%08x%08x", + (uint32_t) ktime_get_real_seconds(), + (uint32_t) atomic_inc_return(&cache->gravecounter)); + + /* do the multiway lock magic */ + trap = lock_rename(cache->graveyard, dir); + + /* do some checks before getting the grave dentry */ + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { + /* the entry was probably culled when we dropped the parent dir + * lock */ + unlock_rename(cache->graveyard, dir); + _leave(" = 0 [culled?]"); + return 0; + } + + if (!d_can_lookup(cache->graveyard)) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "Graveyard no longer a directory"); + return -EIO; + } + + if (trap == rep) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "May not make directory loop"); + return -EIO; + } + + if (d_mountpoint(rep)) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "Mountpoint in cache"); + return -EIO; + } + + grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer)); + if (IS_ERR(grave)) { + unlock_rename(cache->graveyard, dir); + trace_cachefiles_vfs_error(object, d_inode(cache->graveyard), + PTR_ERR(grave), + cachefiles_trace_lookup_error); + + if (PTR_ERR(grave) == -ENOMEM) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + cachefiles_io_error(cache, "Lookup error %ld", PTR_ERR(grave)); + return -EIO; + } + + if (d_is_positive(grave)) { + unlock_rename(cache->graveyard, dir); + dput(grave); + grave = NULL; + cond_resched(); + goto try_again; + } + + if (d_mountpoint(grave)) { + unlock_rename(cache->graveyard, dir); + dput(grave); + cachefiles_io_error(cache, "Mountpoint in graveyard"); + return -EIO; + } + + /* target should not be an ancestor of source */ + if (trap == grave) { + unlock_rename(cache->graveyard, dir); + dput(grave); + cachefiles_io_error(cache, "May not make directory loop"); + return -EIO; + } + + /* attempt the rename */ + path.mnt = cache->mnt; + path.dentry = dir; + path_to_graveyard.mnt = cache->mnt; + path_to_graveyard.dentry = cache->graveyard; + ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0); + if (ret < 0) { + cachefiles_io_error(cache, "Rename security error %d", ret); + } else { + struct renamedata rd = { + .old_mnt_userns = &init_user_ns, + .old_dir = d_inode(dir), + .old_dentry = rep, + .new_mnt_userns = &init_user_ns, + .new_dir = d_inode(cache->graveyard), + .new_dentry = grave, + }; + trace_cachefiles_rename(object, d_inode(rep)->i_ino, why); + ret = cachefiles_inject_read_error(); + if (ret == 0) + ret = vfs_rename(&rd); + if (ret != 0) + trace_cachefiles_vfs_error(object, d_inode(dir), ret, + cachefiles_trace_rename_error); + if (ret != 0 && ret != -ENOMEM) + cachefiles_io_error(cache, + "Rename failed with error %d", ret); + } + + __cachefiles_unmark_inode_in_use(object, d_inode(rep)); + unlock_rename(cache->graveyard, dir); + dput(grave); + _leave(" = 0"); + return 0; +} + +/* + * Delete a cache file. + */ +int cachefiles_delete_object(struct cachefiles_object *object, + enum fscache_why_object_killed why) +{ + struct cachefiles_volume *volume = object->volume; + struct dentry *dentry = object->file->f_path.dentry; + struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash]; + int ret; + + _enter(",OBJ%x{%pD}", object->debug_id, object->file); + + /* Stop the dentry being negated if it's only pinned by a file struct. */ + dget(dentry); + + inode_lock_nested(d_backing_inode(fan), I_MUTEX_PARENT); + ret = cachefiles_unlink(volume->cache, object, fan, dentry, why); + inode_unlock(d_backing_inode(fan)); + dput(dentry); + return ret; +} + +/* + * Create a temporary file and leave it unattached and un-xattr'd until the + * time comes to discard the object from memory. + */ +struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) +{ + struct cachefiles_volume *volume = object->volume; + struct cachefiles_cache *cache = volume->cache; + const struct cred *saved_cred; + struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash]; + struct file *file; + const struct path parentpath = { .mnt = cache->mnt, .dentry = fan }; + uint64_t ni_size; + long ret; + + + cachefiles_begin_secure(cache, &saved_cred); + + ret = cachefiles_inject_write_error(); + if (ret == 0) { + file = vfs_tmpfile_open(&init_user_ns, &parentpath, S_IFREG, + O_RDWR | O_LARGEFILE | O_DIRECT, + cache->cache_cred); + ret = PTR_ERR_OR_ZERO(file); + } + if (ret) { + trace_cachefiles_vfs_error(object, d_inode(fan), ret, + cachefiles_trace_tmpfile_error); + if (ret == -EIO) + cachefiles_io_error_obj(object, "Failed to create tmpfile"); + goto err; + } + + trace_cachefiles_tmpfile(object, file_inode(file)); + + /* This is a newly created file with no other possible user */ + if (!cachefiles_mark_inode_in_use(object, file_inode(file))) + WARN_ON(1); + + ret = cachefiles_ondemand_init_object(object); + if (ret < 0) + goto err_unuse; + + ni_size = object->cookie->object_size; + ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE); + + if (ni_size > 0) { + trace_cachefiles_trunc(object, file_inode(file), 0, ni_size, + cachefiles_trunc_expand_tmpfile); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_truncate(&file->f_path, ni_size); + if (ret < 0) { + trace_cachefiles_vfs_error( + object, file_inode(file), ret, + cachefiles_trace_trunc_error); + goto err_unuse; + } + } + + ret = -EINVAL; + if (unlikely(!file->f_op->read_iter) || + unlikely(!file->f_op->write_iter)) { + fput(file); + pr_notice("Cache does not support read_iter and write_iter\n"); + goto err_unuse; + } +out: + cachefiles_end_secure(cache, saved_cred); + return file; + +err_unuse: + cachefiles_do_unmark_inode_in_use(object, file_inode(file)); + fput(file); +err: + file = ERR_PTR(ret); + goto out; +} + +/* + * Create a new file. + */ +static bool cachefiles_create_file(struct cachefiles_object *object) +{ + struct file *file; + int ret; + + ret = cachefiles_has_space(object->volume->cache, 1, 0, + cachefiles_has_space_for_create); + if (ret < 0) + return false; + + file = cachefiles_create_tmpfile(object); + if (IS_ERR(file)) + return false; + + set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags); + set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + _debug("create -> %pD{ino=%lu}", file, file_inode(file)->i_ino); + object->file = file; + return true; +} + +/* + * Open an existing file, checking its attributes and replacing it if it is + * stale. + */ +static bool cachefiles_open_file(struct cachefiles_object *object, + struct dentry *dentry) +{ + struct cachefiles_cache *cache = object->volume->cache; + struct file *file; + struct path path; + int ret; + + _enter("%pd", dentry); + + if (!cachefiles_mark_inode_in_use(object, d_inode(dentry))) { + pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n", + dentry, d_inode(dentry)->i_ino); + return false; + } + + /* We need to open a file interface onto a data file now as we can't do + * it on demand because writeback called from do_exit() sees + * current->fs == NULL - which breaks d_path() called from ext4 open. + */ + path.mnt = cache->mnt; + path.dentry = dentry; + file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, + d_backing_inode(dentry), cache->cache_cred); + if (IS_ERR(file)) { + trace_cachefiles_vfs_error(object, d_backing_inode(dentry), + PTR_ERR(file), + cachefiles_trace_open_error); + goto error; + } + + if (unlikely(!file->f_op->read_iter) || + unlikely(!file->f_op->write_iter)) { + pr_notice("Cache does not support read_iter and write_iter\n"); + goto error_fput; + } + _debug("file -> %pd positive", dentry); + + ret = cachefiles_ondemand_init_object(object); + if (ret < 0) + goto error_fput; + + ret = cachefiles_check_auxdata(object, file); + if (ret < 0) + goto check_failed; + + clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &object->cookie->flags); + + object->file = file; + + /* Always update the atime on an object we've just looked up (this is + * used to keep track of culling, and atimes are only updated by read, + * write and readdir but not lookup or open). + */ + touch_atime(&file->f_path); + dput(dentry); + return true; + +check_failed: + fscache_cookie_lookup_negative(object->cookie); + cachefiles_unmark_inode_in_use(object, file); + fput(file); + dput(dentry); + if (ret == -ESTALE) + return cachefiles_create_file(object); + return false; + +error_fput: + fput(file); +error: + cachefiles_do_unmark_inode_in_use(object, d_inode(dentry)); + dput(dentry); + return false; +} + +/* + * walk from the parent object to the child object through the backing + * filesystem, creating directories as we go + */ +bool cachefiles_look_up_object(struct cachefiles_object *object) +{ + struct cachefiles_volume *volume = object->volume; + struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash]; + int ret; + + _enter("OBJ%x,%s,", object->debug_id, object->d_name); + + /* Look up path "cache/vol/fanout/file". */ + ret = cachefiles_inject_read_error(); + if (ret == 0) + dentry = lookup_positive_unlocked(object->d_name, fan, + object->d_name_len); + else + dentry = ERR_PTR(ret); + trace_cachefiles_lookup(object, fan, dentry); + if (IS_ERR(dentry)) { + if (dentry == ERR_PTR(-ENOENT)) + goto new_file; + if (dentry == ERR_PTR(-EIO)) + cachefiles_io_error_obj(object, "Lookup failed"); + return false; + } + + if (!d_is_reg(dentry)) { + pr_err("%pd is not a file\n", dentry); + inode_lock_nested(d_inode(fan), I_MUTEX_PARENT); + ret = cachefiles_bury_object(volume->cache, object, fan, dentry, + FSCACHE_OBJECT_IS_WEIRD); + dput(dentry); + if (ret < 0) + return false; + goto new_file; + } + + if (!cachefiles_open_file(object, dentry)) + return false; + + _leave(" = t [%lu]", file_inode(object->file)->i_ino); + return true; + +new_file: + fscache_cookie_lookup_negative(object->cookie); + return cachefiles_create_file(object); +} + +/* + * Attempt to link a temporary file into its rightful place in the cache. + */ +bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, + struct cachefiles_object *object) +{ + struct cachefiles_volume *volume = object->volume; + struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash]; + bool success = false; + int ret; + + _enter(",%pD", object->file); + + inode_lock_nested(d_inode(fan), I_MUTEX_PARENT); + ret = cachefiles_inject_read_error(); + if (ret == 0) + dentry = lookup_one_len(object->d_name, fan, object->d_name_len); + else + dentry = ERR_PTR(ret); + if (IS_ERR(dentry)) { + trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry), + cachefiles_trace_lookup_error); + _debug("lookup fail %ld", PTR_ERR(dentry)); + goto out_unlock; + } + + if (!d_is_negative(dentry)) { + if (d_backing_inode(dentry) == file_inode(object->file)) { + success = true; + goto out_dput; + } + + ret = cachefiles_unlink(volume->cache, object, fan, dentry, + FSCACHE_OBJECT_IS_STALE); + if (ret < 0) + goto out_dput; + + dput(dentry); + ret = cachefiles_inject_read_error(); + if (ret == 0) + dentry = lookup_one_len(object->d_name, fan, object->d_name_len); + else + dentry = ERR_PTR(ret); + if (IS_ERR(dentry)) { + trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry), + cachefiles_trace_lookup_error); + _debug("lookup fail %ld", PTR_ERR(dentry)); + goto out_unlock; + } + } + + ret = cachefiles_inject_read_error(); + if (ret == 0) + ret = vfs_link(object->file->f_path.dentry, &init_user_ns, + d_inode(fan), dentry, NULL); + if (ret < 0) { + trace_cachefiles_vfs_error(object, d_inode(fan), ret, + cachefiles_trace_link_error); + _debug("link fail %d", ret); + } else { + trace_cachefiles_link(object, file_inode(object->file)); + spin_lock(&object->lock); + /* TODO: Do we want to switch the file pointer to the new dentry? */ + clear_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + spin_unlock(&object->lock); + success = true; + } + +out_dput: + dput(dentry); +out_unlock: + inode_unlock(d_inode(fan)); + _leave(" = %u", success); + return success; +} + +/* + * Look up an inode to be checked or culled. Return -EBUSY if the inode is + * marked in use. + */ +static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache, + struct dentry *dir, + char *filename) +{ + struct dentry *victim; + int ret = -ENOENT; + + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + + victim = lookup_one_len(filename, dir, strlen(filename)); + if (IS_ERR(victim)) + goto lookup_error; + if (d_is_negative(victim)) + goto lookup_put; + if (d_inode(victim)->i_flags & S_KERNEL_FILE) + goto lookup_busy; + return victim; + +lookup_busy: + ret = -EBUSY; +lookup_put: + inode_unlock(d_inode(dir)); + dput(victim); + return ERR_PTR(ret); + +lookup_error: + inode_unlock(d_inode(dir)); + ret = PTR_ERR(victim); + if (ret == -ENOENT) + return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */ + + if (ret == -EIO) { + cachefiles_io_error(cache, "Lookup failed"); + } else if (ret != -ENOMEM) { + pr_err("Internal error: %d\n", ret); + ret = -EIO; + } + + return ERR_PTR(ret); +} + +/* + * Cull an object if it's not in use + * - called only by cache manager daemon + */ +int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, + char *filename) +{ + struct dentry *victim; + struct inode *inode; + int ret; + + _enter(",%pd/,%s", dir, filename); + + victim = cachefiles_lookup_for_cull(cache, dir, filename); + if (IS_ERR(victim)) + return PTR_ERR(victim); + + /* check to see if someone is using this object */ + inode = d_inode(victim); + inode_lock(inode); + if (inode->i_flags & S_KERNEL_FILE) { + ret = -EBUSY; + } else { + /* Stop the cache from picking it back up */ + inode->i_flags |= S_KERNEL_FILE; + ret = 0; + } + inode_unlock(inode); + if (ret < 0) + goto error_unlock; + + ret = cachefiles_bury_object(cache, NULL, dir, victim, + FSCACHE_OBJECT_WAS_CULLED); + if (ret < 0) + goto error; + + fscache_count_culled(); + dput(victim); + _leave(" = 0"); + return 0; + +error_unlock: + inode_unlock(d_inode(dir)); +error: + dput(victim); + if (ret == -ENOENT) + return -ESTALE; /* Probably got retired by the netfs */ + + if (ret != -ENOMEM) { + pr_err("Internal error: %d\n", ret); + ret = -EIO; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Find out if an object is in use or not + * - called only by cache manager daemon + * - returns -EBUSY or 0 to indicate whether an object is in use or not + */ +int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, + char *filename) +{ + struct dentry *victim; + int ret = 0; + + victim = cachefiles_lookup_for_cull(cache, dir, filename); + if (IS_ERR(victim)) + return PTR_ERR(victim); + + inode_unlock(d_inode(dir)); + dput(victim); + return ret; +} diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c new file mode 100644 index 000000000..0254ed39f --- /dev/null +++ b/fs/cachefiles/ondemand.c @@ -0,0 +1,514 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include "internal.h" + +static int cachefiles_ondemand_fd_release(struct inode *inode, + struct file *file) +{ + struct cachefiles_object *object = file->private_data; + struct cachefiles_cache *cache = object->volume->cache; + int object_id = object->ondemand_id; + struct cachefiles_req *req; + XA_STATE(xas, &cache->reqs, 0); + + xa_lock(&cache->reqs); + object->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; + + /* + * Flush all pending READ requests since their completion depends on + * anon_fd. + */ + xas_for_each(&xas, req, ULONG_MAX) { + if (req->msg.object_id == object_id && + req->msg.opcode == CACHEFILES_OP_READ) { + req->error = -EIO; + complete(&req->done); + xas_store(&xas, NULL); + } + } + xa_unlock(&cache->reqs); + + xa_erase(&cache->ondemand_ids, object_id); + trace_cachefiles_ondemand_fd_release(object, object_id); + cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); + cachefiles_put_unbind_pincount(cache); + return 0; +} + +static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb, + struct iov_iter *iter) +{ + struct cachefiles_object *object = kiocb->ki_filp->private_data; + struct cachefiles_cache *cache = object->volume->cache; + struct file *file = object->file; + size_t len = iter->count; + loff_t pos = kiocb->ki_pos; + const struct cred *saved_cred; + int ret; + + if (!file) + return -ENOBUFS; + + cachefiles_begin_secure(cache, &saved_cred); + ret = __cachefiles_prepare_write(object, file, &pos, &len, true); + cachefiles_end_secure(cache, saved_cred); + if (ret < 0) + return ret; + + trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len); + ret = __cachefiles_write(object, file, pos, iter, NULL, NULL); + if (!ret) + ret = len; + + return ret; +} + +static loff_t cachefiles_ondemand_fd_llseek(struct file *filp, loff_t pos, + int whence) +{ + struct cachefiles_object *object = filp->private_data; + struct file *file = object->file; + + if (!file) + return -ENOBUFS; + + return vfs_llseek(file, pos, whence); +} + +static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct cachefiles_object *object = filp->private_data; + struct cachefiles_cache *cache = object->volume->cache; + struct cachefiles_req *req; + unsigned long id; + + if (ioctl != CACHEFILES_IOC_READ_COMPLETE) + return -EINVAL; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + id = arg; + req = xa_erase(&cache->reqs, id); + if (!req) + return -EINVAL; + + trace_cachefiles_ondemand_cread(object, id); + complete(&req->done); + return 0; +} + +static const struct file_operations cachefiles_ondemand_fd_fops = { + .owner = THIS_MODULE, + .release = cachefiles_ondemand_fd_release, + .write_iter = cachefiles_ondemand_fd_write_iter, + .llseek = cachefiles_ondemand_fd_llseek, + .unlocked_ioctl = cachefiles_ondemand_fd_ioctl, +}; + +/* + * OPEN request Completion (copen) + * - command: "copen ," + * indicates the object size if >=0, error code if negative + */ +int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) +{ + struct cachefiles_req *req; + struct fscache_cookie *cookie; + char *pid, *psize; + unsigned long id; + long size; + int ret; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + if (!*args) { + pr_err("Empty id specified\n"); + return -EINVAL; + } + + pid = args; + psize = strchr(args, ','); + if (!psize) { + pr_err("Cache size is not specified\n"); + return -EINVAL; + } + + *psize = 0; + psize++; + + ret = kstrtoul(pid, 0, &id); + if (ret) + return ret; + + req = xa_erase(&cache->reqs, id); + if (!req) + return -EINVAL; + + /* fail OPEN request if copen format is invalid */ + ret = kstrtol(psize, 0, &size); + if (ret) { + req->error = ret; + goto out; + } + + /* fail OPEN request if daemon reports an error */ + if (size < 0) { + if (!IS_ERR_VALUE(size)) { + req->error = -EINVAL; + ret = -EINVAL; + } else { + req->error = size; + ret = 0; + } + goto out; + } + + cookie = req->object->cookie; + cookie->object_size = size; + if (size) + clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); + else + set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); + trace_cachefiles_ondemand_copen(req->object, id, size); + +out: + complete(&req->done); + return ret; +} + +static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct cachefiles_open *load; + struct file *file; + u32 object_id; + int ret, fd; + + object = cachefiles_grab_object(req->object, + cachefiles_obj_get_ondemand_fd); + cache = object->volume->cache; + + ret = xa_alloc_cyclic(&cache->ondemand_ids, &object_id, NULL, + XA_LIMIT(1, INT_MAX), + &cache->ondemand_id_next, GFP_KERNEL); + if (ret < 0) + goto err; + + fd = get_unused_fd_flags(O_WRONLY); + if (fd < 0) { + ret = fd; + goto err_free_id; + } + + file = anon_inode_getfile("[cachefiles]", &cachefiles_ondemand_fd_fops, + object, O_WRONLY); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err_put_fd; + } + + file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; + fd_install(fd, file); + + load = (void *)req->msg.data; + load->fd = fd; + req->msg.object_id = object_id; + object->ondemand_id = object_id; + + cachefiles_get_unbind_pincount(cache); + trace_cachefiles_ondemand_open(object, &req->msg, load); + return 0; + +err_put_fd: + put_unused_fd(fd); +err_free_id: + xa_erase(&cache->ondemand_ids, object_id); +err: + cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); + return ret; +} + +ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) +{ + struct cachefiles_req *req; + struct cachefiles_msg *msg; + unsigned long id = 0; + size_t n; + int ret = 0; + XA_STATE(xas, &cache->reqs, cache->req_id_next); + + /* + * Cyclically search for a request that has not ever been processed, + * to prevent requests from being processed repeatedly, and make + * request distribution fair. + */ + xa_lock(&cache->reqs); + req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW); + if (!req && cache->req_id_next > 0) { + xas_set(&xas, 0); + req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW); + } + if (!req) { + xa_unlock(&cache->reqs); + return 0; + } + + msg = &req->msg; + n = msg->len; + + if (n > buflen) { + xa_unlock(&cache->reqs); + return -EMSGSIZE; + } + + xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + cache->req_id_next = xas.xa_index + 1; + xa_unlock(&cache->reqs); + + id = xas.xa_index; + msg->msg_id = id; + + if (msg->opcode == CACHEFILES_OP_OPEN) { + ret = cachefiles_ondemand_get_fd(req); + if (ret) + goto error; + } + + if (copy_to_user(_buffer, msg, n) != 0) { + ret = -EFAULT; + goto err_put_fd; + } + + /* CLOSE request has no reply */ + if (msg->opcode == CACHEFILES_OP_CLOSE) { + xa_erase(&cache->reqs, id); + complete(&req->done); + } + + return n; + +err_put_fd: + if (msg->opcode == CACHEFILES_OP_OPEN) + close_fd(((struct cachefiles_open *)msg->data)->fd); +error: + xa_erase(&cache->reqs, id); + req->error = ret; + complete(&req->done); + return ret; +} + +typedef int (*init_req_fn)(struct cachefiles_req *req, void *private); + +static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + enum cachefiles_opcode opcode, + size_t data_len, + init_req_fn init_req, + void *private) +{ + struct cachefiles_cache *cache = object->volume->cache; + struct cachefiles_req *req; + XA_STATE(xas, &cache->reqs, 0); + int ret; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return 0; + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) + return -EIO; + + req = kzalloc(sizeof(*req) + data_len, GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->object = object; + init_completion(&req->done); + req->msg.opcode = opcode; + req->msg.len = sizeof(struct cachefiles_msg) + data_len; + + ret = init_req(req, private); + if (ret) + goto out; + + do { + /* + * Stop enqueuing the request when daemon is dying. The + * following two operations need to be atomic as a whole. + * 1) check cache state, and + * 2) enqueue request if cache is alive. + * Otherwise the request may be enqueued after xarray has been + * flushed, leaving the orphan request never being completed. + * + * CPU 1 CPU 2 + * ===== ===== + * test CACHEFILES_DEAD bit + * set CACHEFILES_DEAD bit + * flush requests in the xarray + * enqueue the request + */ + xas_lock(&xas); + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + xas_unlock(&xas); + ret = -EIO; + goto out; + } + + /* coupled with the barrier in cachefiles_flush_reqs() */ + smp_mb(); + + if (opcode != CACHEFILES_OP_OPEN && object->ondemand_id <= 0) { + WARN_ON_ONCE(object->ondemand_id == 0); + xas_unlock(&xas); + ret = -EIO; + goto out; + } + + xas.xa_index = 0; + xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) + xas_set_err(&xas, -EBUSY); + xas_store(&xas, req); + xas_clear_mark(&xas, XA_FREE_MARK); + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + ret = xas_error(&xas); + if (ret) + goto out; + + wake_up_all(&cache->daemon_pollwq); + wait_for_completion(&req->done); + ret = req->error; +out: + kfree(req); + return ret; +} + +static int cachefiles_ondemand_init_open_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + struct fscache_cookie *cookie = object->cookie; + struct fscache_volume *volume = object->volume->vcookie; + struct cachefiles_open *load = (void *)req->msg.data; + size_t volume_key_size, cookie_key_size; + void *volume_key, *cookie_key; + + /* + * Volume key is a NUL-terminated string. key[0] stores strlen() of the + * string, followed by the content of the string (excluding '\0'). + */ + volume_key_size = volume->key[0] + 1; + volume_key = volume->key + 1; + + /* Cookie key is binary data, which is netfs specific. */ + cookie_key_size = cookie->key_len; + cookie_key = fscache_get_key(cookie); + + if (!(object->cookie->advice & FSCACHE_ADV_WANT_CACHE_SIZE)) { + pr_err("WANT_CACHE_SIZE is needed for on-demand mode\n"); + return -EINVAL; + } + + load->volume_key_size = volume_key_size; + load->cookie_key_size = cookie_key_size; + memcpy(load->data, volume_key, volume_key_size); + memcpy(load->data + volume_key_size, cookie_key, cookie_key_size); + + return 0; +} + +static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + int object_id = object->ondemand_id; + + /* + * It's possible that object id is still 0 if the cookie looking up + * phase failed before OPEN request has ever been sent. Also avoid + * sending CLOSE request for CACHEFILES_ONDEMAND_ID_CLOSED, which means + * anon_fd has already been closed. + */ + if (object_id <= 0) + return -ENOENT; + + req->msg.object_id = object_id; + trace_cachefiles_ondemand_close(object, &req->msg); + return 0; +} + +struct cachefiles_read_ctx { + loff_t off; + size_t len; +}; + +static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + struct cachefiles_read *load = (void *)req->msg.data; + struct cachefiles_read_ctx *read_ctx = private; + int object_id = object->ondemand_id; + + /* Stop enqueuing requests when daemon has closed anon_fd. */ + if (object_id <= 0) { + WARN_ON_ONCE(object_id == 0); + pr_info_once("READ: anonymous fd closed prematurely.\n"); + return -EIO; + } + + req->msg.object_id = object_id; + load->off = read_ctx->off; + load->len = read_ctx->len; + trace_cachefiles_ondemand_read(object, &req->msg, load); + return 0; +} + +int cachefiles_ondemand_init_object(struct cachefiles_object *object) +{ + struct fscache_cookie *cookie = object->cookie; + struct fscache_volume *volume = object->volume->vcookie; + size_t volume_key_size, cookie_key_size, data_len; + + /* + * CacheFiles will firstly check the cache file under the root cache + * directory. If the coherency check failed, it will fallback to + * creating a new tmpfile as the cache file. Reuse the previously + * allocated object ID if any. + */ + if (object->ondemand_id > 0) + return 0; + + volume_key_size = volume->key[0] + 1; + cookie_key_size = cookie->key_len; + data_len = sizeof(struct cachefiles_open) + + volume_key_size + cookie_key_size; + + return cachefiles_ondemand_send_req(object, CACHEFILES_OP_OPEN, + data_len, cachefiles_ondemand_init_open_req, NULL); +} + +void cachefiles_ondemand_clean_object(struct cachefiles_object *object) +{ + cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0, + cachefiles_ondemand_init_close_req, NULL); +} + +int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) +{ + struct cachefiles_read_ctx read_ctx = {pos, len}; + + return cachefiles_ondemand_send_req(object, CACHEFILES_OP_READ, + sizeof(struct cachefiles_read), + cachefiles_ondemand_init_read_req, &read_ctx); +} diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c new file mode 100644 index 000000000..fe777164f --- /dev/null +++ b/fs/cachefiles/security.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* CacheFiles security management + * + * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include "internal.h" + +/* + * determine the security context within which we access the cache from within + * the kernel + */ +int cachefiles_get_security_ID(struct cachefiles_cache *cache) +{ + struct cred *new; + int ret; + + _enter("{%s}", cache->secctx); + + new = prepare_kernel_cred(current); + if (!new) { + ret = -ENOMEM; + goto error; + } + + if (cache->secctx) { + ret = set_security_override_from_ctx(new, cache->secctx); + if (ret < 0) { + put_cred(new); + pr_err("Security denies permission to nominate security context: error %d\n", + ret); + goto error; + } + } + + cache->cache_cred = new; + ret = 0; +error: + _leave(" = %d", ret); + return ret; +} + +/* + * see if mkdir and create can be performed in the root directory + */ +static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, + struct dentry *root) +{ + int ret; + + ret = security_inode_mkdir(d_backing_inode(root), root, 0); + if (ret < 0) { + pr_err("Security denies permission to make dirs: error %d", + ret); + return ret; + } + + ret = security_inode_create(d_backing_inode(root), root, 0); + if (ret < 0) + pr_err("Security denies permission to create files: error %d", + ret); + + return ret; +} + +/* + * check the security details of the on-disk cache + * - must be called with security override in force + * - must return with a security override in force - even in the case of an + * error + */ +int cachefiles_determine_cache_security(struct cachefiles_cache *cache, + struct dentry *root, + const struct cred **_saved_cred) +{ + struct cred *new; + int ret; + + _enter(""); + + /* duplicate the cache creds for COW (the override is currently in + * force, so we can use prepare_creds() to do this) */ + new = prepare_creds(); + if (!new) + return -ENOMEM; + + cachefiles_end_secure(cache, *_saved_cred); + + /* use the cache root dir's security context as the basis with + * which create files */ + ret = set_create_files_as(new, d_backing_inode(root)); + if (ret < 0) { + abort_creds(new); + cachefiles_begin_secure(cache, _saved_cred); + _leave(" = %d [cfa]", ret); + return ret; + } + + put_cred(cache->cache_cred); + cache->cache_cred = new; + + cachefiles_begin_secure(cache, _saved_cred); + ret = cachefiles_check_cache_dir(cache, root); + + if (ret == -EOPNOTSUPP) + ret = 0; + _leave(" = %d", ret); + return ret; +} diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c new file mode 100644 index 000000000..89df0ba8b --- /dev/null +++ b/fs/cachefiles/volume.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Volume handling. + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include "internal.h" +#include + +/* + * Allocate and set up a volume representation. We make sure all the fanout + * directories are created and pinned. + */ +void cachefiles_acquire_volume(struct fscache_volume *vcookie) +{ + struct cachefiles_volume *volume; + struct cachefiles_cache *cache = vcookie->cache->cache_priv; + const struct cred *saved_cred; + struct dentry *vdentry, *fan; + size_t len; + char *name; + bool is_new = false; + int ret, n_accesses, i; + + _enter(""); + + volume = kzalloc(sizeof(struct cachefiles_volume), GFP_KERNEL); + if (!volume) + return; + volume->vcookie = vcookie; + volume->cache = cache; + INIT_LIST_HEAD(&volume->cache_link); + + cachefiles_begin_secure(cache, &saved_cred); + + len = vcookie->key[0]; + name = kmalloc(len + 3, GFP_NOFS); + if (!name) + goto error_vol; + name[0] = 'I'; + memcpy(name + 1, vcookie->key + 1, len); + name[len + 1] = 0; + +retry: + vdentry = cachefiles_get_directory(cache, cache->store, name, &is_new); + if (IS_ERR(vdentry)) + goto error_name; + volume->dentry = vdentry; + + if (is_new) { + if (!cachefiles_set_volume_xattr(volume)) + goto error_dir; + } else { + ret = cachefiles_check_volume_xattr(volume); + if (ret < 0) { + if (ret != -ESTALE) + goto error_dir; + inode_lock_nested(d_inode(cache->store), I_MUTEX_PARENT); + cachefiles_bury_object(cache, NULL, cache->store, vdentry, + FSCACHE_VOLUME_IS_WEIRD); + cachefiles_put_directory(volume->dentry); + cond_resched(); + goto retry; + } + } + + for (i = 0; i < 256; i++) { + sprintf(name, "@%02x", i); + fan = cachefiles_get_directory(cache, vdentry, name, NULL); + if (IS_ERR(fan)) + goto error_fan; + volume->fanout[i] = fan; + } + + cachefiles_end_secure(cache, saved_cred); + + vcookie->cache_priv = volume; + n_accesses = atomic_inc_return(&vcookie->n_accesses); /* Stop wakeups on dec-to-0 */ + trace_fscache_access_volume(vcookie->debug_id, 0, + refcount_read(&vcookie->ref), + n_accesses, fscache_access_cache_pin); + + spin_lock(&cache->object_list_lock); + list_add(&volume->cache_link, &volume->cache->volumes); + spin_unlock(&cache->object_list_lock); + + kfree(name); + return; + +error_fan: + for (i = 0; i < 256; i++) + cachefiles_put_directory(volume->fanout[i]); +error_dir: + cachefiles_put_directory(volume->dentry); +error_name: + kfree(name); +error_vol: + kfree(volume); + cachefiles_end_secure(cache, saved_cred); +} + +/* + * Release a volume representation. + */ +static void __cachefiles_free_volume(struct cachefiles_volume *volume) +{ + int i; + + _enter(""); + + volume->vcookie->cache_priv = NULL; + + for (i = 0; i < 256; i++) + cachefiles_put_directory(volume->fanout[i]); + cachefiles_put_directory(volume->dentry); + kfree(volume); +} + +void cachefiles_free_volume(struct fscache_volume *vcookie) +{ + struct cachefiles_volume *volume = vcookie->cache_priv; + + if (volume) { + spin_lock(&volume->cache->object_list_lock); + list_del_init(&volume->cache_link); + spin_unlock(&volume->cache->object_list_lock); + __cachefiles_free_volume(volume); + } +} + +void cachefiles_withdraw_volume(struct cachefiles_volume *volume) +{ + fscache_withdraw_volume(volume->vcookie); + cachefiles_set_volume_xattr(volume); + __cachefiles_free_volume(volume); +} diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c new file mode 100644 index 000000000..00b087c14 --- /dev/null +++ b/fs/cachefiles/xattr.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* CacheFiles extended attribute management + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +#define CACHEFILES_COOKIE_TYPE_DATA 1 + +struct cachefiles_xattr { + __be64 object_size; /* Actual size of the object */ + __be64 zero_point; /* Size after which server has no data not written by us */ + __u8 type; /* Type of object */ + __u8 content; /* Content presence (enum cachefiles_content) */ + __u8 data[]; /* netfs coherency data */ +} __packed; + +static const char cachefiles_xattr_cache[] = + XATTR_USER_PREFIX "CacheFiles.cache"; + +struct cachefiles_vol_xattr { + __be32 reserved; /* Reserved, should be 0 */ + __u8 data[]; /* netfs volume coherency data */ +} __packed; + +/* + * set the state xattr on a cache file + */ +int cachefiles_set_object_xattr(struct cachefiles_object *object) +{ + struct cachefiles_xattr *buf; + struct dentry *dentry; + struct file *file = object->file; + unsigned int len = object->cookie->aux_len; + int ret; + + if (!file) + return -ESTALE; + dentry = file->f_path.dentry; + + _enter("%x,#%d", object->debug_id, len); + + buf = kmalloc(sizeof(struct cachefiles_xattr) + len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf->object_size = cpu_to_be64(object->cookie->object_size); + buf->zero_point = 0; + buf->type = CACHEFILES_COOKIE_TYPE_DATA; + buf->content = object->content_info; + if (test_bit(FSCACHE_COOKIE_LOCAL_WRITE, &object->cookie->flags)) + buf->content = CACHEFILES_CONTENT_DIRTY; + if (len > 0) + memcpy(buf->data, fscache_get_aux(object->cookie), len); + + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, + buf, sizeof(struct cachefiles_xattr) + len, 0); + if (ret < 0) { + trace_cachefiles_vfs_error(object, file_inode(file), ret, + cachefiles_trace_setxattr_error); + trace_cachefiles_coherency(object, file_inode(file)->i_ino, + buf->content, + cachefiles_coherency_set_fail); + if (ret != -ENOMEM) + cachefiles_io_error_obj( + object, + "Failed to set xattr with error %d", ret); + } else { + trace_cachefiles_coherency(object, file_inode(file)->i_ino, + buf->content, + cachefiles_coherency_set_ok); + } + + kfree(buf); + _leave(" = %d", ret); + return ret; +} + +/* + * check the consistency between the backing cache and the FS-Cache cookie + */ +int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file) +{ + struct cachefiles_xattr *buf; + struct dentry *dentry = file->f_path.dentry; + unsigned int len = object->cookie->aux_len, tlen; + const void *p = fscache_get_aux(object->cookie); + enum cachefiles_coherency_trace why; + ssize_t xlen; + int ret = -ESTALE; + + tlen = sizeof(struct cachefiles_xattr) + len; + buf = kmalloc(tlen, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + xlen = cachefiles_inject_read_error(); + if (xlen == 0) + xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, tlen); + if (xlen != tlen) { + if (xlen < 0) + trace_cachefiles_vfs_error(object, file_inode(file), xlen, + cachefiles_trace_getxattr_error); + if (xlen == -EIO) + cachefiles_io_error_obj( + object, + "Failed to read aux with error %zd", xlen); + why = cachefiles_coherency_check_xattr; + } else if (buf->type != CACHEFILES_COOKIE_TYPE_DATA) { + why = cachefiles_coherency_check_type; + } else if (memcmp(buf->data, p, len) != 0) { + why = cachefiles_coherency_check_aux; + } else if (be64_to_cpu(buf->object_size) != object->cookie->object_size) { + why = cachefiles_coherency_check_objsize; + } else if (buf->content == CACHEFILES_CONTENT_DIRTY) { + // TODO: Begin conflict resolution + pr_warn("Dirty object in cache\n"); + why = cachefiles_coherency_check_dirty; + } else { + why = cachefiles_coherency_check_ok; + ret = 0; + } + + trace_cachefiles_coherency(object, file_inode(file)->i_ino, + buf->content, why); + kfree(buf); + return ret; +} + +/* + * remove the object's xattr to mark it stale + */ +int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dentry) +{ + int ret; + + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = vfs_removexattr(&init_user_ns, dentry, cachefiles_xattr_cache); + if (ret < 0) { + trace_cachefiles_vfs_error(object, d_inode(dentry), ret, + cachefiles_trace_remxattr_error); + if (ret == -ENOENT || ret == -ENODATA) + ret = 0; + else if (ret != -ENOMEM) + cachefiles_io_error(cache, + "Can't remove xattr from %lu" + " (error %d)", + d_backing_inode(dentry)->i_ino, -ret); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Stick a marker on the cache object to indicate that it's dirty. + */ +void cachefiles_prepare_to_write(struct fscache_cookie *cookie) +{ + const struct cred *saved_cred; + struct cachefiles_object *object = cookie->cache_priv; + struct cachefiles_cache *cache = object->volume->cache; + + _enter("c=%08x", object->cookie->debug_id); + + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_set_object_xattr(object); + cachefiles_end_secure(cache, saved_cred); + } +} + +/* + * Set the state xattr on a volume directory. + */ +bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) +{ + struct cachefiles_vol_xattr *buf; + unsigned int len = volume->vcookie->coherency_len; + const void *p = volume->vcookie->coherency; + struct dentry *dentry = volume->dentry; + int ret; + + _enter("%x,#%d", volume->vcookie->debug_id, len); + + len += sizeof(*buf); + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return false; + buf->reserved = cpu_to_be32(0); + memcpy(buf->data, p, volume->vcookie->coherency_len); + + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, + buf, len, 0); + if (ret < 0) { + trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret, + cachefiles_trace_setxattr_error); + trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, + cachefiles_coherency_vol_set_fail); + if (ret != -ENOMEM) + cachefiles_io_error( + volume->cache, "Failed to set xattr with error %d", ret); + } else { + trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, + cachefiles_coherency_vol_set_ok); + } + + kfree(buf); + _leave(" = %d", ret); + return ret == 0; +} + +/* + * Check the consistency between the backing cache and the volume cookie. + */ +int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) +{ + struct cachefiles_vol_xattr *buf; + struct dentry *dentry = volume->dentry; + unsigned int len = volume->vcookie->coherency_len; + const void *p = volume->vcookie->coherency; + enum cachefiles_coherency_trace why; + ssize_t xlen; + int ret = -ESTALE; + + _enter(""); + + len += sizeof(*buf); + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + xlen = cachefiles_inject_read_error(); + if (xlen == 0) + xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, len); + if (xlen != len) { + if (xlen < 0) { + trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen, + cachefiles_trace_getxattr_error); + if (xlen == -EIO) + cachefiles_io_error( + volume->cache, + "Failed to read xattr with error %zd", xlen); + } + why = cachefiles_coherency_vol_check_xattr; + } else if (buf->reserved != cpu_to_be32(0)) { + why = cachefiles_coherency_vol_check_resv; + } else if (memcmp(buf->data, p, len - sizeof(*buf)) != 0) { + why = cachefiles_coherency_vol_check_cmp; + } else { + why = cachefiles_coherency_vol_check_ok; + ret = 0; + } + + trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, why); + kfree(buf); + _leave(" = %d", ret); + return ret; +} -- cgit v1.2.3