summaryrefslogtreecommitdiffstats
path: root/fs/cachefiles
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
commit2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch)
tree848558de17fb3008cdf4d861b01ac7781903ce39 /fs/cachefiles
parentInitial commit. (diff)
downloadlinux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz
linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'fs/cachefiles')
-rw-r--r--fs/cachefiles/Kconfig40
-rw-r--r--fs/cachefiles/Makefile21
-rw-r--r--fs/cachefiles/cache.c383
-rw-r--r--fs/cachefiles/daemon.c814
-rw-r--r--fs/cachefiles/error_inject.c46
-rw-r--r--fs/cachefiles/interface.c447
-rw-r--r--fs/cachefiles/internal.h477
-rw-r--r--fs/cachefiles/io.c651
-rw-r--r--fs/cachefiles/key.c138
-rw-r--r--fs/cachefiles/main.c93
-rw-r--r--fs/cachefiles/namei.c862
-rw-r--r--fs/cachefiles/ondemand.c514
-rw-r--r--fs/cachefiles/security.c112
-rw-r--r--fs/cachefiles/volume.c139
-rw-r--r--fs/cachefiles/xattr.c276
15 files changed, 5013 insertions, 0 deletions
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig
new file mode 100644
index 000000000..8df715640
--- /dev/null
+++ b/fs/cachefiles/Kconfig
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CACHEFILES
+ tristate "Filesystem caching on files"
+ depends on FSCACHE && BLOCK
+ help
+ This permits use of a mounted filesystem as a cache for other
+ filesystems - primarily networking filesystems - thus allowing fast
+ local disk to enhance the speed of slower devices.
+
+ See Documentation/filesystems/caching/cachefiles.rst for more
+ information.
+
+config CACHEFILES_DEBUG
+ bool "Debug CacheFiles"
+ depends on CACHEFILES
+ help
+ This permits debugging to be dynamically enabled in the filesystem
+ caching on files module. If this is set, the debugging output may be
+ enabled by setting bits in /sys/modules/cachefiles/parameter/debug or
+ by including a debugging specifier in /etc/cachefilesd.conf.
+
+config CACHEFILES_ERROR_INJECTION
+ bool "Provide error injection for cachefiles"
+ depends on CACHEFILES && SYSCTL
+ help
+ This permits error injection to be enabled in cachefiles whilst a
+ cache is in service.
+
+config CACHEFILES_ONDEMAND
+ bool "Support for on-demand read"
+ depends on CACHEFILES
+ default n
+ help
+ This permits userspace to enable the cachefiles on-demand read mode.
+ In this mode, when a cache miss occurs, responsibility for fetching
+ the data lies with the cachefiles backend instead of with the netfs
+ and is delegated to userspace.
+
+ If unsure, say N.
diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile
new file mode 100644
index 000000000..c37a7a9af
--- /dev/null
+++ b/fs/cachefiles/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for caching in a mounted filesystem
+#
+
+cachefiles-y := \
+ cache.o \
+ daemon.o \
+ interface.o \
+ io.o \
+ key.o \
+ main.o \
+ namei.o \
+ security.o \
+ volume.o \
+ xattr.o
+
+cachefiles-$(CONFIG_CACHEFILES_ERROR_INJECTION) += error_inject.o
+cachefiles-$(CONFIG_CACHEFILES_ONDEMAND) += ondemand.o
+
+obj-$(CONFIG_CACHEFILES) := cachefiles.o
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
new file mode 100644
index 000000000..7077f72e6
--- /dev/null
+++ b/fs/cachefiles/cache.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Manage high-level VFS aspects of a cache.
+ *
+ * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/statfs.h>
+#include <linux/namei.h>
+#include "internal.h"
+
+/*
+ * Bring a cache online.
+ */
+int cachefiles_add_cache(struct cachefiles_cache *cache)
+{
+ struct fscache_cache *cache_cookie;
+ struct path path;
+ struct kstatfs stats;
+ struct dentry *graveyard, *cachedir, *root;
+ const struct cred *saved_cred;
+ int ret;
+
+ _enter("");
+
+ cache_cookie = fscache_acquire_cache(cache->tag);
+ if (IS_ERR(cache_cookie))
+ return PTR_ERR(cache_cookie);
+
+ /* we want to work under the module's security ID */
+ ret = cachefiles_get_security_ID(cache);
+ if (ret < 0)
+ goto error_getsec;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+
+ /* look up the directory at the root of the cache */
+ ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path);
+ if (ret < 0)
+ goto error_open_root;
+
+ cache->mnt = path.mnt;
+ root = path.dentry;
+
+ ret = -EINVAL;
+ if (is_idmapped_mnt(path.mnt)) {
+ pr_warn("File cache on idmapped mounts not supported");
+ goto error_unsupported;
+ }
+
+ /* Check features of the backing filesystem:
+ * - Directories must support looking up and directory creation
+ * - We create tmpfiles to handle invalidation
+ * - We use xattrs to store metadata
+ * - We need to be able to query the amount of space available
+ * - We want to be able to sync the filesystem when stopping the cache
+ * - We use DIO to/from pages, so the blocksize mustn't be too big.
+ */
+ ret = -EOPNOTSUPP;
+ if (d_is_negative(root) ||
+ !d_backing_inode(root)->i_op->lookup ||
+ !d_backing_inode(root)->i_op->mkdir ||
+ !d_backing_inode(root)->i_op->tmpfile ||
+ !(d_backing_inode(root)->i_opflags & IOP_XATTR) ||
+ !root->d_sb->s_op->statfs ||
+ !root->d_sb->s_op->sync_fs ||
+ root->d_sb->s_blocksize > PAGE_SIZE)
+ goto error_unsupported;
+
+ ret = -EROFS;
+ if (sb_rdonly(root->d_sb))
+ goto error_unsupported;
+
+ /* determine the security of the on-disk cache as this governs
+ * security ID of files we create */
+ ret = cachefiles_determine_cache_security(cache, root, &saved_cred);
+ if (ret < 0)
+ goto error_unsupported;
+
+ /* get the cache size and blocksize */
+ ret = vfs_statfs(&path, &stats);
+ if (ret < 0)
+ goto error_unsupported;
+
+ ret = -ERANGE;
+ if (stats.f_bsize <= 0)
+ goto error_unsupported;
+
+ ret = -EOPNOTSUPP;
+ if (stats.f_bsize > PAGE_SIZE)
+ goto error_unsupported;
+
+ cache->bsize = stats.f_bsize;
+ cache->bshift = ilog2(stats.f_bsize);
+
+ _debug("blksize %u (shift %u)",
+ cache->bsize, cache->bshift);
+
+ _debug("size %llu, avail %llu",
+ (unsigned long long) stats.f_blocks,
+ (unsigned long long) stats.f_bavail);
+
+ /* set up caching limits */
+ do_div(stats.f_files, 100);
+ cache->fstop = stats.f_files * cache->fstop_percent;
+ cache->fcull = stats.f_files * cache->fcull_percent;
+ cache->frun = stats.f_files * cache->frun_percent;
+
+ _debug("limits {%llu,%llu,%llu} files",
+ (unsigned long long) cache->frun,
+ (unsigned long long) cache->fcull,
+ (unsigned long long) cache->fstop);
+
+ do_div(stats.f_blocks, 100);
+ cache->bstop = stats.f_blocks * cache->bstop_percent;
+ cache->bcull = stats.f_blocks * cache->bcull_percent;
+ cache->brun = stats.f_blocks * cache->brun_percent;
+
+ _debug("limits {%llu,%llu,%llu} blocks",
+ (unsigned long long) cache->brun,
+ (unsigned long long) cache->bcull,
+ (unsigned long long) cache->bstop);
+
+ /* get the cache directory and check its type */
+ cachedir = cachefiles_get_directory(cache, root, "cache", NULL);
+ if (IS_ERR(cachedir)) {
+ ret = PTR_ERR(cachedir);
+ goto error_unsupported;
+ }
+
+ cache->store = cachedir;
+
+ /* get the graveyard directory */
+ graveyard = cachefiles_get_directory(cache, root, "graveyard", NULL);
+ if (IS_ERR(graveyard)) {
+ ret = PTR_ERR(graveyard);
+ goto error_unsupported;
+ }
+
+ cache->graveyard = graveyard;
+ cache->cache = cache_cookie;
+
+ ret = fscache_add_cache(cache_cookie, &cachefiles_cache_ops, cache);
+ if (ret < 0)
+ goto error_add_cache;
+
+ /* done */
+ set_bit(CACHEFILES_READY, &cache->flags);
+ dput(root);
+
+ pr_info("File cache on %s registered\n", cache_cookie->name);
+
+ /* check how much space the cache has */
+ cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check);
+ cachefiles_end_secure(cache, saved_cred);
+ _leave(" = 0 [%px]", cache->cache);
+ return 0;
+
+error_add_cache:
+ cachefiles_put_directory(cache->graveyard);
+ cache->graveyard = NULL;
+error_unsupported:
+ cachefiles_put_directory(cache->store);
+ cache->store = NULL;
+ mntput(cache->mnt);
+ cache->mnt = NULL;
+ dput(root);
+error_open_root:
+ cachefiles_end_secure(cache, saved_cred);
+error_getsec:
+ fscache_relinquish_cache(cache_cookie);
+ cache->cache = NULL;
+ pr_err("Failed to register: %d\n", ret);
+ return ret;
+}
+
+/*
+ * See if we have space for a number of pages and/or a number of files in the
+ * cache
+ */
+int cachefiles_has_space(struct cachefiles_cache *cache,
+ unsigned fnr, unsigned bnr,
+ enum cachefiles_has_space_for reason)
+{
+ struct kstatfs stats;
+ u64 b_avail, b_writing;
+ int ret;
+
+ struct path path = {
+ .mnt = cache->mnt,
+ .dentry = cache->mnt->mnt_root,
+ };
+
+ //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u",
+ // (unsigned long long) cache->frun,
+ // (unsigned long long) cache->fcull,
+ // (unsigned long long) cache->fstop,
+ // (unsigned long long) cache->brun,
+ // (unsigned long long) cache->bcull,
+ // (unsigned long long) cache->bstop,
+ // fnr, bnr);
+
+ /* find out how many pages of blockdev are available */
+ memset(&stats, 0, sizeof(stats));
+
+ ret = vfs_statfs(&path, &stats);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(NULL, d_inode(path.dentry), ret,
+ cachefiles_trace_statfs_error);
+ if (ret == -EIO)
+ cachefiles_io_error(cache, "statfs failed");
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+ b_avail = stats.f_bavail;
+ b_writing = atomic_long_read(&cache->b_writing);
+ if (b_avail > b_writing)
+ b_avail -= b_writing;
+ else
+ b_avail = 0;
+
+ //_debug("avail %llu,%llu",
+ // (unsigned long long)stats.f_ffree,
+ // (unsigned long long)b_avail);
+
+ /* see if there is sufficient space */
+ if (stats.f_ffree > fnr)
+ stats.f_ffree -= fnr;
+ else
+ stats.f_ffree = 0;
+
+ if (b_avail > bnr)
+ b_avail -= bnr;
+ else
+ b_avail = 0;
+
+ ret = -ENOBUFS;
+ if (stats.f_ffree < cache->fstop ||
+ b_avail < cache->bstop)
+ goto stop_and_begin_cull;
+
+ ret = 0;
+ if (stats.f_ffree < cache->fcull ||
+ b_avail < cache->bcull)
+ goto begin_cull;
+
+ if (test_bit(CACHEFILES_CULLING, &cache->flags) &&
+ stats.f_ffree >= cache->frun &&
+ b_avail >= cache->brun &&
+ test_and_clear_bit(CACHEFILES_CULLING, &cache->flags)
+ ) {
+ _debug("cease culling");
+ cachefiles_state_changed(cache);
+ }
+
+ //_leave(" = 0");
+ return 0;
+
+stop_and_begin_cull:
+ switch (reason) {
+ case cachefiles_has_space_for_write:
+ fscache_count_no_write_space();
+ break;
+ case cachefiles_has_space_for_create:
+ fscache_count_no_create_space();
+ break;
+ default:
+ break;
+ }
+begin_cull:
+ if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) {
+ _debug("### CULL CACHE ###");
+ cachefiles_state_changed(cache);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Mark all the objects as being out of service and queue them all for cleanup.
+ */
+static void cachefiles_withdraw_objects(struct cachefiles_cache *cache)
+{
+ struct cachefiles_object *object;
+ unsigned int count = 0;
+
+ _enter("");
+
+ spin_lock(&cache->object_list_lock);
+
+ while (!list_empty(&cache->object_list)) {
+ object = list_first_entry(&cache->object_list,
+ struct cachefiles_object, cache_link);
+ cachefiles_see_object(object, cachefiles_obj_see_withdrawal);
+ list_del_init(&object->cache_link);
+ fscache_withdraw_cookie(object->cookie);
+ count++;
+ if ((count & 63) == 0) {
+ spin_unlock(&cache->object_list_lock);
+ cond_resched();
+ spin_lock(&cache->object_list_lock);
+ }
+ }
+
+ spin_unlock(&cache->object_list_lock);
+ _leave(" [%u objs]", count);
+}
+
+/*
+ * Withdraw volumes.
+ */
+static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
+{
+ _enter("");
+
+ for (;;) {
+ struct cachefiles_volume *volume = NULL;
+
+ spin_lock(&cache->object_list_lock);
+ if (!list_empty(&cache->volumes)) {
+ volume = list_first_entry(&cache->volumes,
+ struct cachefiles_volume, cache_link);
+ list_del_init(&volume->cache_link);
+ }
+ spin_unlock(&cache->object_list_lock);
+ if (!volume)
+ break;
+
+ cachefiles_withdraw_volume(volume);
+ }
+
+ _leave("");
+}
+
+/*
+ * Sync a cache to backing disk.
+ */
+static void cachefiles_sync_cache(struct cachefiles_cache *cache)
+{
+ const struct cred *saved_cred;
+ int ret;
+
+ _enter("%s", cache->cache->name);
+
+ /* make sure all pages pinned by operations on behalf of the netfs are
+ * written to disc */
+ cachefiles_begin_secure(cache, &saved_cred);
+ down_read(&cache->mnt->mnt_sb->s_umount);
+ ret = sync_filesystem(cache->mnt->mnt_sb);
+ up_read(&cache->mnt->mnt_sb->s_umount);
+ cachefiles_end_secure(cache, saved_cred);
+
+ if (ret == -EIO)
+ cachefiles_io_error(cache,
+ "Attempt to sync backing fs superblock returned error %d",
+ ret);
+}
+
+/*
+ * Withdraw cache objects.
+ */
+void cachefiles_withdraw_cache(struct cachefiles_cache *cache)
+{
+ struct fscache_cache *fscache = cache->cache;
+
+ pr_info("File cache on %s unregistering\n", fscache->name);
+
+ fscache_withdraw_cache(fscache);
+
+ /* we now have to destroy all the active objects pertaining to this
+ * cache - which we do by passing them off to thread pool to be
+ * disposed of */
+ cachefiles_withdraw_objects(cache);
+ fscache_wait_for_objects(fscache);
+
+ cachefiles_withdraw_volumes(cache);
+ cachefiles_sync_cache(cache);
+ cache->cache = NULL;
+ fscache_relinquish_cache(fscache);
+}
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
new file mode 100644
index 000000000..aa4efcabb
--- /dev/null
+++ b/fs/cachefiles/daemon.c
@@ -0,0 +1,814 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Daemon interface
+ *
+ * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/fs_struct.h>
+#include "internal.h"
+
+static int cachefiles_daemon_open(struct inode *, struct file *);
+static int cachefiles_daemon_release(struct inode *, struct file *);
+static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t,
+ loff_t *);
+static ssize_t cachefiles_daemon_write(struct file *, const char __user *,
+ size_t, loff_t *);
+static __poll_t cachefiles_daemon_poll(struct file *,
+ struct poll_table_struct *);
+static int cachefiles_daemon_frun(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_fstop(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_brun(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_bcull(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_bstop(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_cull(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_debug(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_dir(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_tag(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_bind(struct cachefiles_cache *, char *);
+static void cachefiles_daemon_unbind(struct cachefiles_cache *);
+
+static unsigned long cachefiles_open;
+
+const struct file_operations cachefiles_daemon_fops = {
+ .owner = THIS_MODULE,
+ .open = cachefiles_daemon_open,
+ .release = cachefiles_daemon_release,
+ .read = cachefiles_daemon_read,
+ .write = cachefiles_daemon_write,
+ .poll = cachefiles_daemon_poll,
+ .llseek = noop_llseek,
+};
+
+struct cachefiles_daemon_cmd {
+ char name[8];
+ int (*handler)(struct cachefiles_cache *cache, char *args);
+};
+
+static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = {
+ { "bind", cachefiles_daemon_bind },
+ { "brun", cachefiles_daemon_brun },
+ { "bcull", cachefiles_daemon_bcull },
+ { "bstop", cachefiles_daemon_bstop },
+ { "cull", cachefiles_daemon_cull },
+ { "debug", cachefiles_daemon_debug },
+ { "dir", cachefiles_daemon_dir },
+ { "frun", cachefiles_daemon_frun },
+ { "fcull", cachefiles_daemon_fcull },
+ { "fstop", cachefiles_daemon_fstop },
+ { "inuse", cachefiles_daemon_inuse },
+ { "secctx", cachefiles_daemon_secctx },
+ { "tag", cachefiles_daemon_tag },
+#ifdef CONFIG_CACHEFILES_ONDEMAND
+ { "copen", cachefiles_ondemand_copen },
+#endif
+ { "", NULL }
+};
+
+
+/*
+ * Prepare a cache for caching.
+ */
+static int cachefiles_daemon_open(struct inode *inode, struct file *file)
+{
+ struct cachefiles_cache *cache;
+
+ _enter("");
+
+ /* only the superuser may do this */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* the cachefiles device may only be open once at a time */
+ if (xchg(&cachefiles_open, 1) == 1)
+ return -EBUSY;
+
+ /* allocate a cache record */
+ cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL);
+ if (!cache) {
+ cachefiles_open = 0;
+ return -ENOMEM;
+ }
+
+ mutex_init(&cache->daemon_mutex);
+ init_waitqueue_head(&cache->daemon_pollwq);
+ INIT_LIST_HEAD(&cache->volumes);
+ INIT_LIST_HEAD(&cache->object_list);
+ spin_lock_init(&cache->object_list_lock);
+ refcount_set(&cache->unbind_pincount, 1);
+ xa_init_flags(&cache->reqs, XA_FLAGS_ALLOC);
+ xa_init_flags(&cache->ondemand_ids, XA_FLAGS_ALLOC1);
+
+ /* set default caching limits
+ * - limit at 1% free space and/or free files
+ * - cull below 5% free space and/or free files
+ * - cease culling above 7% free space and/or free files
+ */
+ cache->frun_percent = 7;
+ cache->fcull_percent = 5;
+ cache->fstop_percent = 1;
+ cache->brun_percent = 7;
+ cache->bcull_percent = 5;
+ cache->bstop_percent = 1;
+
+ file->private_data = cache;
+ cache->cachefilesd = file;
+ return 0;
+}
+
+static void cachefiles_flush_reqs(struct cachefiles_cache *cache)
+{
+ struct xarray *xa = &cache->reqs;
+ struct cachefiles_req *req;
+ unsigned long index;
+
+ /*
+ * Make sure the following two operations won't be reordered.
+ * 1) set CACHEFILES_DEAD bit
+ * 2) flush requests in the xarray
+ * Otherwise the request may be enqueued after xarray has been
+ * flushed, leaving the orphan request never being completed.
+ *
+ * CPU 1 CPU 2
+ * ===== =====
+ * flush requests in the xarray
+ * test CACHEFILES_DEAD bit
+ * enqueue the request
+ * set CACHEFILES_DEAD bit
+ */
+ smp_mb();
+
+ xa_lock(xa);
+ xa_for_each(xa, index, req) {
+ req->error = -EIO;
+ complete(&req->done);
+ }
+ xa_unlock(xa);
+
+ xa_destroy(&cache->reqs);
+ xa_destroy(&cache->ondemand_ids);
+}
+
+void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache)
+{
+ if (refcount_dec_and_test(&cache->unbind_pincount)) {
+ cachefiles_daemon_unbind(cache);
+ cachefiles_open = 0;
+ kfree(cache);
+ }
+}
+
+void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache)
+{
+ refcount_inc(&cache->unbind_pincount);
+}
+
+/*
+ * Release a cache.
+ */
+static int cachefiles_daemon_release(struct inode *inode, struct file *file)
+{
+ struct cachefiles_cache *cache = file->private_data;
+
+ _enter("");
+
+ ASSERT(cache);
+
+ set_bit(CACHEFILES_DEAD, &cache->flags);
+
+ if (cachefiles_in_ondemand_mode(cache))
+ cachefiles_flush_reqs(cache);
+
+ /* clean up the control file interface */
+ cache->cachefilesd = NULL;
+ file->private_data = NULL;
+
+ cachefiles_put_unbind_pincount(cache);
+
+ _leave("");
+ return 0;
+}
+
+static ssize_t cachefiles_do_daemon_read(struct cachefiles_cache *cache,
+ char __user *_buffer, size_t buflen)
+{
+ unsigned long long b_released;
+ unsigned f_released;
+ char buffer[256];
+ int n;
+
+ /* check how much space the cache has */
+ cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check);
+
+ /* summarise */
+ f_released = atomic_xchg(&cache->f_released, 0);
+ b_released = atomic_long_xchg(&cache->b_released, 0);
+ clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags);
+
+ n = snprintf(buffer, sizeof(buffer),
+ "cull=%c"
+ " frun=%llx"
+ " fcull=%llx"
+ " fstop=%llx"
+ " brun=%llx"
+ " bcull=%llx"
+ " bstop=%llx"
+ " freleased=%x"
+ " breleased=%llx",
+ test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0',
+ (unsigned long long) cache->frun,
+ (unsigned long long) cache->fcull,
+ (unsigned long long) cache->fstop,
+ (unsigned long long) cache->brun,
+ (unsigned long long) cache->bcull,
+ (unsigned long long) cache->bstop,
+ f_released,
+ b_released);
+
+ if (n > buflen)
+ return -EMSGSIZE;
+
+ if (copy_to_user(_buffer, buffer, n) != 0)
+ return -EFAULT;
+
+ return n;
+}
+
+/*
+ * Read the cache state.
+ */
+static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer,
+ size_t buflen, loff_t *pos)
+{
+ struct cachefiles_cache *cache = file->private_data;
+
+ //_enter(",,%zu,", buflen);
+
+ if (!test_bit(CACHEFILES_READY, &cache->flags))
+ return 0;
+
+ if (cachefiles_in_ondemand_mode(cache))
+ return cachefiles_ondemand_daemon_read(cache, _buffer, buflen);
+ else
+ return cachefiles_do_daemon_read(cache, _buffer, buflen);
+}
+
+/*
+ * Take a command from cachefilesd, parse it and act on it.
+ */
+static ssize_t cachefiles_daemon_write(struct file *file,
+ const char __user *_data,
+ size_t datalen,
+ loff_t *pos)
+{
+ const struct cachefiles_daemon_cmd *cmd;
+ struct cachefiles_cache *cache = file->private_data;
+ ssize_t ret;
+ char *data, *args, *cp;
+
+ //_enter(",,%zu,", datalen);
+
+ ASSERT(cache);
+
+ if (test_bit(CACHEFILES_DEAD, &cache->flags))
+ return -EIO;
+
+ if (datalen > PAGE_SIZE - 1)
+ return -EOPNOTSUPP;
+
+ /* drag the command string into the kernel so we can parse it */
+ data = memdup_user_nul(_data, datalen);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ ret = -EINVAL;
+ if (memchr(data, '\0', datalen))
+ goto error;
+
+ /* strip any newline */
+ cp = memchr(data, '\n', datalen);
+ if (cp) {
+ if (cp == data)
+ goto error;
+
+ *cp = '\0';
+ }
+
+ /* parse the command */
+ ret = -EOPNOTSUPP;
+
+ for (args = data; *args; args++)
+ if (isspace(*args))
+ break;
+ if (*args) {
+ if (args == data)
+ goto error;
+ *args = '\0';
+ args = skip_spaces(++args);
+ }
+
+ /* run the appropriate command handler */
+ for (cmd = cachefiles_daemon_cmds; cmd->name[0]; cmd++)
+ if (strcmp(cmd->name, data) == 0)
+ goto found_command;
+
+error:
+ kfree(data);
+ //_leave(" = %zd", ret);
+ return ret;
+
+found_command:
+ mutex_lock(&cache->daemon_mutex);
+
+ ret = -EIO;
+ if (!test_bit(CACHEFILES_DEAD, &cache->flags))
+ ret = cmd->handler(cache, args);
+
+ mutex_unlock(&cache->daemon_mutex);
+
+ if (ret == 0)
+ ret = datalen;
+ goto error;
+}
+
+/*
+ * Poll for culling state
+ * - use EPOLLOUT to indicate culling state
+ */
+static __poll_t cachefiles_daemon_poll(struct file *file,
+ struct poll_table_struct *poll)
+{
+ struct cachefiles_cache *cache = file->private_data;
+ __poll_t mask;
+
+ poll_wait(file, &cache->daemon_pollwq, poll);
+ mask = 0;
+
+ if (cachefiles_in_ondemand_mode(cache)) {
+ if (!xa_empty(&cache->reqs))
+ mask |= EPOLLIN;
+ } else {
+ if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
+ mask |= EPOLLIN;
+ }
+
+ if (test_bit(CACHEFILES_CULLING, &cache->flags))
+ mask |= EPOLLOUT;
+
+ return mask;
+}
+
+/*
+ * Give a range error for cache space constraints
+ * - can be tail-called
+ */
+static int cachefiles_daemon_range_error(struct cachefiles_cache *cache,
+ char *args)
+{
+ pr_err("Free space limits must be in range 0%%<=stop<cull<run<100%%\n");
+
+ return -EINVAL;
+}
+
+/*
+ * Set the percentage of files at which to stop culling
+ * - command: "frun <N>%"
+ */
+static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long frun;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ frun = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (frun <= cache->fcull_percent || frun >= 100)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->frun_percent = frun;
+ return 0;
+}
+
+/*
+ * Set the percentage of files at which to start culling
+ * - command: "fcull <N>%"
+ */
+static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long fcull;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ fcull = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (fcull <= cache->fstop_percent || fcull >= cache->frun_percent)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->fcull_percent = fcull;
+ return 0;
+}
+
+/*
+ * Set the percentage of files at which to stop allocating
+ * - command: "fstop <N>%"
+ */
+static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long fstop;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ fstop = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (fstop >= cache->fcull_percent)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->fstop_percent = fstop;
+ return 0;
+}
+
+/*
+ * Set the percentage of blocks at which to stop culling
+ * - command: "brun <N>%"
+ */
+static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long brun;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ brun = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (brun <= cache->bcull_percent || brun >= 100)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->brun_percent = brun;
+ return 0;
+}
+
+/*
+ * Set the percentage of blocks at which to start culling
+ * - command: "bcull <N>%"
+ */
+static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long bcull;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ bcull = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->bcull_percent = bcull;
+ return 0;
+}
+
+/*
+ * Set the percentage of blocks at which to stop allocating
+ * - command: "bstop <N>%"
+ */
+static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long bstop;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ bstop = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (bstop >= cache->bcull_percent)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->bstop_percent = bstop;
+ return 0;
+}
+
+/*
+ * Set the cache directory
+ * - command: "dir <name>"
+ */
+static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args)
+{
+ char *dir;
+
+ _enter(",%s", args);
+
+ if (!*args) {
+ pr_err("Empty directory specified\n");
+ return -EINVAL;
+ }
+
+ if (cache->rootdirname) {
+ pr_err("Second cache directory specified\n");
+ return -EEXIST;
+ }
+
+ dir = kstrdup(args, GFP_KERNEL);
+ if (!dir)
+ return -ENOMEM;
+
+ cache->rootdirname = dir;
+ return 0;
+}
+
+/*
+ * Set the cache security context
+ * - command: "secctx <ctx>"
+ */
+static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args)
+{
+ char *secctx;
+
+ _enter(",%s", args);
+
+ if (!*args) {
+ pr_err("Empty security context specified\n");
+ return -EINVAL;
+ }
+
+ if (cache->secctx) {
+ pr_err("Second security context specified\n");
+ return -EINVAL;
+ }
+
+ secctx = kstrdup(args, GFP_KERNEL);
+ if (!secctx)
+ return -ENOMEM;
+
+ cache->secctx = secctx;
+ return 0;
+}
+
+/*
+ * Set the cache tag
+ * - command: "tag <name>"
+ */
+static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args)
+{
+ char *tag;
+
+ _enter(",%s", args);
+
+ if (!*args) {
+ pr_err("Empty tag specified\n");
+ return -EINVAL;
+ }
+
+ if (cache->tag)
+ return -EEXIST;
+
+ tag = kstrdup(args, GFP_KERNEL);
+ if (!tag)
+ return -ENOMEM;
+
+ cache->tag = tag;
+ return 0;
+}
+
+/*
+ * Request a node in the cache be culled from the current working directory
+ * - command: "cull <name>"
+ */
+static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args)
+{
+ struct path path;
+ const struct cred *saved_cred;
+ int ret;
+
+ _enter(",%s", args);
+
+ if (strchr(args, '/'))
+ goto inval;
+
+ if (!test_bit(CACHEFILES_READY, &cache->flags)) {
+ pr_err("cull applied to unready cache\n");
+ return -EIO;
+ }
+
+ if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+ pr_err("cull applied to dead cache\n");
+ return -EIO;
+ }
+
+ get_fs_pwd(current->fs, &path);
+
+ if (!d_can_lookup(path.dentry))
+ goto notdir;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+ ret = cachefiles_cull(cache, path.dentry, args);
+ cachefiles_end_secure(cache, saved_cred);
+
+ path_put(&path);
+ _leave(" = %d", ret);
+ return ret;
+
+notdir:
+ path_put(&path);
+ pr_err("cull command requires dirfd to be a directory\n");
+ return -ENOTDIR;
+
+inval:
+ pr_err("cull command requires dirfd and filename\n");
+ return -EINVAL;
+}
+
+/*
+ * Set debugging mode
+ * - command: "debug <mask>"
+ */
+static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long mask;
+
+ _enter(",%s", args);
+
+ mask = simple_strtoul(args, &args, 0);
+ if (args[0] != '\0')
+ goto inval;
+
+ cachefiles_debug = mask;
+ _leave(" = 0");
+ return 0;
+
+inval:
+ pr_err("debug command requires mask\n");
+ return -EINVAL;
+}
+
+/*
+ * Find out whether an object in the current working directory is in use or not
+ * - command: "inuse <name>"
+ */
+static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args)
+{
+ struct path path;
+ const struct cred *saved_cred;
+ int ret;
+
+ //_enter(",%s", args);
+
+ if (strchr(args, '/'))
+ goto inval;
+
+ if (!test_bit(CACHEFILES_READY, &cache->flags)) {
+ pr_err("inuse applied to unready cache\n");
+ return -EIO;
+ }
+
+ if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+ pr_err("inuse applied to dead cache\n");
+ return -EIO;
+ }
+
+ get_fs_pwd(current->fs, &path);
+
+ if (!d_can_lookup(path.dentry))
+ goto notdir;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+ ret = cachefiles_check_in_use(cache, path.dentry, args);
+ cachefiles_end_secure(cache, saved_cred);
+
+ path_put(&path);
+ //_leave(" = %d", ret);
+ return ret;
+
+notdir:
+ path_put(&path);
+ pr_err("inuse command requires dirfd to be a directory\n");
+ return -ENOTDIR;
+
+inval:
+ pr_err("inuse command requires dirfd and filename\n");
+ return -EINVAL;
+}
+
+/*
+ * Bind a directory as a cache
+ */
+static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
+{
+ _enter("{%u,%u,%u,%u,%u,%u},%s",
+ cache->frun_percent,
+ cache->fcull_percent,
+ cache->fstop_percent,
+ cache->brun_percent,
+ cache->bcull_percent,
+ cache->bstop_percent,
+ args);
+
+ if (cache->fstop_percent >= cache->fcull_percent ||
+ cache->fcull_percent >= cache->frun_percent ||
+ cache->frun_percent >= 100)
+ return -ERANGE;
+
+ if (cache->bstop_percent >= cache->bcull_percent ||
+ cache->bcull_percent >= cache->brun_percent ||
+ cache->brun_percent >= 100)
+ return -ERANGE;
+
+ if (!cache->rootdirname) {
+ pr_err("No cache directory specified\n");
+ return -EINVAL;
+ }
+
+ /* Don't permit already bound caches to be re-bound */
+ if (test_bit(CACHEFILES_READY, &cache->flags)) {
+ pr_err("Cache already bound\n");
+ return -EBUSY;
+ }
+
+ if (IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND)) {
+ if (!strcmp(args, "ondemand")) {
+ set_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags);
+ } else if (*args) {
+ pr_err("Invalid argument to the 'bind' command\n");
+ return -EINVAL;
+ }
+ } else if (*args) {
+ pr_err("'bind' command doesn't take an argument\n");
+ return -EINVAL;
+ }
+
+ /* Make sure we have copies of the tag string */
+ if (!cache->tag) {
+ /*
+ * The tag string is released by the fops->release()
+ * function, so we don't release it on error here
+ */
+ cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
+ if (!cache->tag)
+ return -ENOMEM;
+ }
+
+ return cachefiles_add_cache(cache);
+}
+
+/*
+ * Unbind a cache.
+ */
+static void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
+{
+ _enter("");
+
+ if (test_bit(CACHEFILES_READY, &cache->flags))
+ cachefiles_withdraw_cache(cache);
+
+ cachefiles_put_directory(cache->graveyard);
+ cachefiles_put_directory(cache->store);
+ mntput(cache->mnt);
+
+ kfree(cache->rootdirname);
+ kfree(cache->secctx);
+ kfree(cache->tag);
+
+ _leave("");
+}
diff --git a/fs/cachefiles/error_inject.c b/fs/cachefiles/error_inject.c
new file mode 100644
index 000000000..58f8aec96
--- /dev/null
+++ b/fs/cachefiles/error_inject.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Error injection handling.
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/sysctl.h>
+#include "internal.h"
+
+unsigned int cachefiles_error_injection_state;
+
+static struct ctl_table_header *cachefiles_sysctl;
+static struct ctl_table cachefiles_sysctls[] = {
+ {
+ .procname = "error_injection",
+ .data = &cachefiles_error_injection_state,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec,
+ },
+ {}
+};
+
+static struct ctl_table cachefiles_sysctls_root[] = {
+ {
+ .procname = "cachefiles",
+ .mode = 0555,
+ .child = cachefiles_sysctls,
+ },
+ {}
+};
+
+int __init cachefiles_register_error_injection(void)
+{
+ cachefiles_sysctl = register_sysctl_table(cachefiles_sysctls_root);
+ if (!cachefiles_sysctl)
+ return -ENOMEM;
+ return 0;
+
+}
+
+void cachefiles_unregister_error_injection(void)
+{
+ unregister_sysctl_table(cachefiles_sysctl);
+}
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
new file mode 100644
index 000000000..a69073a1d
--- /dev/null
+++ b/fs/cachefiles/interface.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* FS-Cache interface to CacheFiles
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/mount.h>
+#include <linux/xattr.h>
+#include <linux/file.h>
+#include <linux/falloc.h>
+#include <trace/events/fscache.h>
+#include "internal.h"
+
+static atomic_t cachefiles_object_debug_id;
+
+/*
+ * Allocate a cache object record.
+ */
+static
+struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie)
+{
+ struct fscache_volume *vcookie = cookie->volume;
+ struct cachefiles_volume *volume = vcookie->cache_priv;
+ struct cachefiles_object *object;
+
+ _enter("{%s},%x,", vcookie->key, cookie->debug_id);
+
+ object = kmem_cache_zalloc(cachefiles_object_jar, GFP_KERNEL);
+ if (!object)
+ return NULL;
+
+ refcount_set(&object->ref, 1);
+
+ spin_lock_init(&object->lock);
+ INIT_LIST_HEAD(&object->cache_link);
+ object->volume = volume;
+ object->debug_id = atomic_inc_return(&cachefiles_object_debug_id);
+ object->cookie = fscache_get_cookie(cookie, fscache_cookie_get_attach_object);
+
+ fscache_count_object(vcookie->cache);
+ trace_cachefiles_ref(object->debug_id, cookie->debug_id, 1,
+ cachefiles_obj_new);
+ return object;
+}
+
+/*
+ * Note that an object has been seen.
+ */
+void cachefiles_see_object(struct cachefiles_object *object,
+ enum cachefiles_obj_ref_trace why)
+{
+ trace_cachefiles_ref(object->debug_id, object->cookie->debug_id,
+ refcount_read(&object->ref), why);
+}
+
+/*
+ * Increment the usage count on an object;
+ */
+struct cachefiles_object *cachefiles_grab_object(struct cachefiles_object *object,
+ enum cachefiles_obj_ref_trace why)
+{
+ int r;
+
+ __refcount_inc(&object->ref, &r);
+ trace_cachefiles_ref(object->debug_id, object->cookie->debug_id, r, why);
+ return object;
+}
+
+/*
+ * dispose of a reference to an object
+ */
+void cachefiles_put_object(struct cachefiles_object *object,
+ enum cachefiles_obj_ref_trace why)
+{
+ unsigned int object_debug_id = object->debug_id;
+ unsigned int cookie_debug_id = object->cookie->debug_id;
+ struct fscache_cache *cache;
+ bool done;
+ int r;
+
+ done = __refcount_dec_and_test(&object->ref, &r);
+ trace_cachefiles_ref(object_debug_id, cookie_debug_id, r, why);
+ if (done) {
+ _debug("- kill object OBJ%x", object_debug_id);
+
+ ASSERTCMP(object->file, ==, NULL);
+
+ kfree(object->d_name);
+
+ cache = object->volume->cache->cache;
+ fscache_put_cookie(object->cookie, fscache_cookie_put_object);
+ object->cookie = NULL;
+ kmem_cache_free(cachefiles_object_jar, object);
+ fscache_uncount_object(cache);
+ }
+
+ _leave("");
+}
+
+/*
+ * Adjust the size of a cache file if necessary to match the DIO size. We keep
+ * the EOF marker a multiple of DIO blocks so that we don't fall back to doing
+ * non-DIO for a partial block straddling the EOF, but we also have to be
+ * careful of someone expanding the file and accidentally accreting the
+ * padding.
+ */
+static int cachefiles_adjust_size(struct cachefiles_object *object)
+{
+ struct iattr newattrs;
+ struct file *file = object->file;
+ uint64_t ni_size;
+ loff_t oi_size;
+ int ret;
+
+ ni_size = object->cookie->object_size;
+ ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE);
+
+ _enter("{OBJ%x},[%llu]",
+ object->debug_id, (unsigned long long) ni_size);
+
+ if (!file)
+ return -ENOBUFS;
+
+ oi_size = i_size_read(file_inode(file));
+ if (oi_size == ni_size)
+ return 0;
+
+ inode_lock(file_inode(file));
+
+ /* if there's an extension to a partial page at the end of the backing
+ * file, we need to discard the partial page so that we pick up new
+ * data after it */
+ if (oi_size & ~PAGE_MASK && ni_size > oi_size) {
+ _debug("discard tail %llx", oi_size);
+ newattrs.ia_valid = ATTR_SIZE;
+ newattrs.ia_size = oi_size & PAGE_MASK;
+ ret = cachefiles_inject_remove_error();
+ if (ret == 0)
+ ret = notify_change(&init_user_ns, file->f_path.dentry,
+ &newattrs, NULL);
+ if (ret < 0)
+ goto truncate_failed;
+ }
+
+ newattrs.ia_valid = ATTR_SIZE;
+ newattrs.ia_size = ni_size;
+ ret = cachefiles_inject_write_error();
+ if (ret == 0)
+ ret = notify_change(&init_user_ns, file->f_path.dentry,
+ &newattrs, NULL);
+
+truncate_failed:
+ inode_unlock(file_inode(file));
+
+ if (ret < 0)
+ trace_cachefiles_io_error(NULL, file_inode(file), ret,
+ cachefiles_trace_notify_change_error);
+ if (ret == -EIO) {
+ cachefiles_io_error_obj(object, "Size set failed");
+ ret = -ENOBUFS;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Attempt to look up the nominated node in this cache
+ */
+static bool cachefiles_lookup_cookie(struct fscache_cookie *cookie)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache = cookie->volume->cache->cache_priv;
+ const struct cred *saved_cred;
+ bool success;
+
+ object = cachefiles_alloc_object(cookie);
+ if (!object)
+ goto fail;
+
+ _enter("{OBJ%x}", object->debug_id);
+
+ if (!cachefiles_cook_key(object))
+ goto fail_put;
+
+ cookie->cache_priv = object;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+
+ success = cachefiles_look_up_object(object);
+ if (!success)
+ goto fail_withdraw;
+
+ cachefiles_see_object(object, cachefiles_obj_see_lookup_cookie);
+
+ spin_lock(&cache->object_list_lock);
+ list_add(&object->cache_link, &cache->object_list);
+ spin_unlock(&cache->object_list_lock);
+ cachefiles_adjust_size(object);
+
+ cachefiles_end_secure(cache, saved_cred);
+ _leave(" = t");
+ return true;
+
+fail_withdraw:
+ cachefiles_end_secure(cache, saved_cred);
+ cachefiles_see_object(object, cachefiles_obj_see_lookup_failed);
+ fscache_caching_failed(cookie);
+ _debug("failed c=%08x o=%08x", cookie->debug_id, object->debug_id);
+ /* The caller holds an access count on the cookie, so we need them to
+ * drop it before we can withdraw the object.
+ */
+ return false;
+
+fail_put:
+ cachefiles_put_object(object, cachefiles_obj_put_alloc_fail);
+fail:
+ return false;
+}
+
+/*
+ * Shorten the backing object to discard any dirty data and free up
+ * any unused granules.
+ */
+static bool cachefiles_shorten_object(struct cachefiles_object *object,
+ struct file *file, loff_t new_size)
+{
+ struct cachefiles_cache *cache = object->volume->cache;
+ struct inode *inode = file_inode(file);
+ loff_t i_size, dio_size;
+ int ret;
+
+ dio_size = round_up(new_size, CACHEFILES_DIO_BLOCK_SIZE);
+ i_size = i_size_read(inode);
+
+ trace_cachefiles_trunc(object, inode, i_size, dio_size,
+ cachefiles_trunc_shrink);
+ ret = cachefiles_inject_remove_error();
+ if (ret == 0)
+ ret = vfs_truncate(&file->f_path, dio_size);
+ if (ret < 0) {
+ trace_cachefiles_io_error(object, file_inode(file), ret,
+ cachefiles_trace_trunc_error);
+ cachefiles_io_error_obj(object, "Trunc-to-size failed %d", ret);
+ cachefiles_remove_object_xattr(cache, object, file->f_path.dentry);
+ return false;
+ }
+
+ if (new_size < dio_size) {
+ trace_cachefiles_trunc(object, inode, dio_size, new_size,
+ cachefiles_trunc_dio_adjust);
+ ret = cachefiles_inject_write_error();
+ if (ret == 0)
+ ret = vfs_fallocate(file, FALLOC_FL_ZERO_RANGE,
+ new_size, dio_size - new_size);
+ if (ret < 0) {
+ trace_cachefiles_io_error(object, file_inode(file), ret,
+ cachefiles_trace_fallocate_error);
+ cachefiles_io_error_obj(object, "Trunc-to-dio-size failed %d", ret);
+ cachefiles_remove_object_xattr(cache, object, file->f_path.dentry);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Resize the backing object.
+ */
+static void cachefiles_resize_cookie(struct netfs_cache_resources *cres,
+ loff_t new_size)
+{
+ struct cachefiles_object *object = cachefiles_cres_object(cres);
+ struct cachefiles_cache *cache = object->volume->cache;
+ struct fscache_cookie *cookie = object->cookie;
+ const struct cred *saved_cred;
+ struct file *file = cachefiles_cres_file(cres);
+ loff_t old_size = cookie->object_size;
+
+ _enter("%llu->%llu", old_size, new_size);
+
+ if (new_size < old_size) {
+ cachefiles_begin_secure(cache, &saved_cred);
+ cachefiles_shorten_object(object, file, new_size);
+ cachefiles_end_secure(cache, saved_cred);
+ object->cookie->object_size = new_size;
+ return;
+ }
+
+ /* The file is being expanded. We don't need to do anything
+ * particularly. cookie->initial_size doesn't change and so the point
+ * at which we have to download before doesn't change.
+ */
+ cookie->object_size = new_size;
+}
+
+/*
+ * Commit changes to the object as we drop it.
+ */
+static void cachefiles_commit_object(struct cachefiles_object *object,
+ struct cachefiles_cache *cache)
+{
+ bool update = false;
+
+ if (test_and_clear_bit(FSCACHE_COOKIE_LOCAL_WRITE, &object->cookie->flags))
+ update = true;
+ if (test_and_clear_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags))
+ update = true;
+ if (update)
+ cachefiles_set_object_xattr(object);
+
+ if (test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags))
+ cachefiles_commit_tmpfile(cache, object);
+}
+
+/*
+ * Finalise and object and close the VFS structs that we have.
+ */
+static void cachefiles_clean_up_object(struct cachefiles_object *object,
+ struct cachefiles_cache *cache)
+{
+ if (test_bit(FSCACHE_COOKIE_RETIRED, &object->cookie->flags)) {
+ if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
+ cachefiles_see_object(object, cachefiles_obj_see_clean_delete);
+ _debug("- inval object OBJ%x", object->debug_id);
+ cachefiles_delete_object(object, FSCACHE_OBJECT_WAS_RETIRED);
+ } else {
+ cachefiles_see_object(object, cachefiles_obj_see_clean_drop_tmp);
+ _debug("- inval object OBJ%x tmpfile", object->debug_id);
+ }
+ } else {
+ cachefiles_see_object(object, cachefiles_obj_see_clean_commit);
+ cachefiles_commit_object(object, cache);
+ }
+
+ cachefiles_unmark_inode_in_use(object, object->file);
+ if (object->file) {
+ fput(object->file);
+ object->file = NULL;
+ }
+}
+
+/*
+ * Withdraw caching for a cookie.
+ */
+static void cachefiles_withdraw_cookie(struct fscache_cookie *cookie)
+{
+ struct cachefiles_object *object = cookie->cache_priv;
+ struct cachefiles_cache *cache = object->volume->cache;
+ const struct cred *saved_cred;
+
+ _enter("o=%x", object->debug_id);
+ cachefiles_see_object(object, cachefiles_obj_see_withdraw_cookie);
+
+ if (!list_empty(&object->cache_link)) {
+ spin_lock(&cache->object_list_lock);
+ cachefiles_see_object(object, cachefiles_obj_see_withdrawal);
+ list_del_init(&object->cache_link);
+ spin_unlock(&cache->object_list_lock);
+ }
+
+ cachefiles_ondemand_clean_object(object);
+
+ if (object->file) {
+ cachefiles_begin_secure(cache, &saved_cred);
+ cachefiles_clean_up_object(object, cache);
+ cachefiles_end_secure(cache, saved_cred);
+ }
+
+ cookie->cache_priv = NULL;
+ cachefiles_put_object(object, cachefiles_obj_put_detach);
+}
+
+/*
+ * Invalidate the storage associated with a cookie.
+ */
+static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie)
+{
+ struct cachefiles_object *object = cookie->cache_priv;
+ struct file *new_file, *old_file;
+ bool old_tmpfile;
+
+ _enter("o=%x,[%llu]", object->debug_id, object->cookie->object_size);
+
+ old_tmpfile = test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags);
+
+ if (!object->file) {
+ fscache_resume_after_invalidation(cookie);
+ _leave(" = t [light]");
+ return true;
+ }
+
+ new_file = cachefiles_create_tmpfile(object);
+ if (IS_ERR(new_file))
+ goto failed;
+
+ /* Substitute the VFS target */
+ _debug("sub");
+ spin_lock(&object->lock);
+
+ old_file = object->file;
+ object->file = new_file;
+ object->content_info = CACHEFILES_CONTENT_NO_DATA;
+ set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags);
+ set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags);
+
+ spin_unlock(&object->lock);
+ _debug("subbed");
+
+ /* Allow I/O to take place again */
+ fscache_resume_after_invalidation(cookie);
+
+ if (old_file) {
+ if (!old_tmpfile) {
+ struct cachefiles_volume *volume = object->volume;
+ struct dentry *fan = volume->fanout[(u8)cookie->key_hash];
+
+ inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
+ cachefiles_bury_object(volume->cache, object, fan,
+ old_file->f_path.dentry,
+ FSCACHE_OBJECT_INVALIDATED);
+ }
+ fput(old_file);
+ }
+
+ _leave(" = t");
+ return true;
+
+failed:
+ _leave(" = f");
+ return false;
+}
+
+const struct fscache_cache_ops cachefiles_cache_ops = {
+ .name = "cachefiles",
+ .acquire_volume = cachefiles_acquire_volume,
+ .free_volume = cachefiles_free_volume,
+ .lookup_cookie = cachefiles_lookup_cookie,
+ .withdraw_cookie = cachefiles_withdraw_cookie,
+ .invalidate_cookie = cachefiles_invalidate_cookie,
+ .begin_operation = cachefiles_begin_operation,
+ .resize_cookie = cachefiles_resize_cookie,
+ .prepare_to_write = cachefiles_prepare_to_write,
+};
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
new file mode 100644
index 000000000..2ad58c465
--- /dev/null
+++ b/fs/cachefiles/internal.h
@@ -0,0 +1,477 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* General netfs cache on cache files internal defs
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) "CacheFiles: " fmt
+
+
+#include <linux/fscache-cache.h>
+#include <linux/cred.h>
+#include <linux/security.h>
+#include <linux/xarray.h>
+#include <linux/cachefiles.h>
+
+#define CACHEFILES_DIO_BLOCK_SIZE 4096
+
+struct cachefiles_cache;
+struct cachefiles_object;
+
+enum cachefiles_content {
+ /* These values are saved on disk */
+ CACHEFILES_CONTENT_NO_DATA = 0, /* No content stored */
+ CACHEFILES_CONTENT_SINGLE = 1, /* Content is monolithic, all is present */
+ CACHEFILES_CONTENT_ALL = 2, /* Content is all present, no map */
+ CACHEFILES_CONTENT_BACKFS_MAP = 3, /* Content is piecemeal, mapped through backing fs */
+ CACHEFILES_CONTENT_DIRTY = 4, /* Content is dirty (only seen on disk) */
+ nr__cachefiles_content
+};
+
+/*
+ * Cached volume representation.
+ */
+struct cachefiles_volume {
+ struct cachefiles_cache *cache;
+ struct list_head cache_link; /* Link in cache->volumes */
+ struct fscache_volume *vcookie; /* The netfs's representation */
+ struct dentry *dentry; /* The volume dentry */
+ struct dentry *fanout[256]; /* Fanout subdirs */
+};
+
+/*
+ * Backing file state.
+ */
+struct cachefiles_object {
+ struct fscache_cookie *cookie; /* Netfs data storage object cookie */
+ struct cachefiles_volume *volume; /* Cache volume that holds this object */
+ struct list_head cache_link; /* Link in cache->*_list */
+ struct file *file; /* The file representing this object */
+ char *d_name; /* Backing file name */
+ int debug_id;
+ spinlock_t lock;
+ refcount_t ref;
+ u8 d_name_len; /* Length of filename */
+ enum cachefiles_content content_info:8; /* Info about content presence */
+ unsigned long flags;
+#define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */
+#ifdef CONFIG_CACHEFILES_ONDEMAND
+ int ondemand_id;
+#endif
+};
+
+#define CACHEFILES_ONDEMAND_ID_CLOSED -1
+
+/*
+ * Cache files cache definition
+ */
+struct cachefiles_cache {
+ struct fscache_cache *cache; /* Cache cookie */
+ struct vfsmount *mnt; /* mountpoint holding the cache */
+ struct dentry *store; /* Directory into which live objects go */
+ struct dentry *graveyard; /* directory into which dead objects go */
+ struct file *cachefilesd; /* manager daemon handle */
+ struct list_head volumes; /* List of volume objects */
+ struct list_head object_list; /* List of active objects */
+ spinlock_t object_list_lock; /* Lock for volumes and object_list */
+ const struct cred *cache_cred; /* security override for accessing cache */
+ struct mutex daemon_mutex; /* command serialisation mutex */
+ wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */
+ atomic_t gravecounter; /* graveyard uniquifier */
+ atomic_t f_released; /* number of objects released lately */
+ atomic_long_t b_released; /* number of blocks released lately */
+ atomic_long_t b_writing; /* Number of blocks being written */
+ unsigned frun_percent; /* when to stop culling (% files) */
+ unsigned fcull_percent; /* when to start culling (% files) */
+ unsigned fstop_percent; /* when to stop allocating (% files) */
+ unsigned brun_percent; /* when to stop culling (% blocks) */
+ unsigned bcull_percent; /* when to start culling (% blocks) */
+ unsigned bstop_percent; /* when to stop allocating (% blocks) */
+ unsigned bsize; /* cache's block size */
+ unsigned bshift; /* ilog2(bsize) */
+ uint64_t frun; /* when to stop culling */
+ uint64_t fcull; /* when to start culling */
+ uint64_t fstop; /* when to stop allocating */
+ sector_t brun; /* when to stop culling */
+ sector_t bcull; /* when to start culling */
+ sector_t bstop; /* when to stop allocating */
+ unsigned long flags;
+#define CACHEFILES_READY 0 /* T if cache prepared */
+#define CACHEFILES_DEAD 1 /* T if cache dead */
+#define CACHEFILES_CULLING 2 /* T if cull engaged */
+#define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */
+#define CACHEFILES_ONDEMAND_MODE 4 /* T if in on-demand read mode */
+ char *rootdirname; /* name of cache root directory */
+ char *secctx; /* LSM security context */
+ char *tag; /* cache binding tag */
+ refcount_t unbind_pincount;/* refcount to do daemon unbind */
+ struct xarray reqs; /* xarray of pending on-demand requests */
+ unsigned long req_id_next;
+ struct xarray ondemand_ids; /* xarray for ondemand_id allocation */
+ u32 ondemand_id_next;
+};
+
+static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache)
+{
+ return IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND) &&
+ test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags);
+}
+
+struct cachefiles_req {
+ struct cachefiles_object *object;
+ struct completion done;
+ int error;
+ struct cachefiles_msg msg;
+};
+
+#define CACHEFILES_REQ_NEW XA_MARK_1
+
+#include <trace/events/cachefiles.h>
+
+static inline
+struct file *cachefiles_cres_file(struct netfs_cache_resources *cres)
+{
+ return cres->cache_priv2;
+}
+
+static inline
+struct cachefiles_object *cachefiles_cres_object(struct netfs_cache_resources *cres)
+{
+ return fscache_cres_cookie(cres)->cache_priv;
+}
+
+/*
+ * note change of state for daemon
+ */
+static inline void cachefiles_state_changed(struct cachefiles_cache *cache)
+{
+ set_bit(CACHEFILES_STATE_CHANGED, &cache->flags);
+ wake_up_all(&cache->daemon_pollwq);
+}
+
+/*
+ * cache.c
+ */
+extern int cachefiles_add_cache(struct cachefiles_cache *cache);
+extern void cachefiles_withdraw_cache(struct cachefiles_cache *cache);
+
+enum cachefiles_has_space_for {
+ cachefiles_has_space_check,
+ cachefiles_has_space_for_write,
+ cachefiles_has_space_for_create,
+};
+extern int cachefiles_has_space(struct cachefiles_cache *cache,
+ unsigned fnr, unsigned bnr,
+ enum cachefiles_has_space_for reason);
+
+/*
+ * daemon.c
+ */
+extern const struct file_operations cachefiles_daemon_fops;
+extern void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache);
+extern void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache);
+
+/*
+ * error_inject.c
+ */
+#ifdef CONFIG_CACHEFILES_ERROR_INJECTION
+extern unsigned int cachefiles_error_injection_state;
+extern int cachefiles_register_error_injection(void);
+extern void cachefiles_unregister_error_injection(void);
+
+#else
+#define cachefiles_error_injection_state 0
+
+static inline int cachefiles_register_error_injection(void)
+{
+ return 0;
+}
+
+static inline void cachefiles_unregister_error_injection(void)
+{
+}
+#endif
+
+
+static inline int cachefiles_inject_read_error(void)
+{
+ return cachefiles_error_injection_state & 2 ? -EIO : 0;
+}
+
+static inline int cachefiles_inject_write_error(void)
+{
+ return cachefiles_error_injection_state & 2 ? -EIO :
+ cachefiles_error_injection_state & 1 ? -ENOSPC :
+ 0;
+}
+
+static inline int cachefiles_inject_remove_error(void)
+{
+ return cachefiles_error_injection_state & 2 ? -EIO : 0;
+}
+
+/*
+ * interface.c
+ */
+extern const struct fscache_cache_ops cachefiles_cache_ops;
+extern void cachefiles_see_object(struct cachefiles_object *object,
+ enum cachefiles_obj_ref_trace why);
+extern struct cachefiles_object *cachefiles_grab_object(struct cachefiles_object *object,
+ enum cachefiles_obj_ref_trace why);
+extern void cachefiles_put_object(struct cachefiles_object *object,
+ enum cachefiles_obj_ref_trace why);
+
+/*
+ * io.c
+ */
+extern bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
+ enum fscache_want_state want_state);
+extern int __cachefiles_prepare_write(struct cachefiles_object *object,
+ struct file *file,
+ loff_t *_start, size_t *_len,
+ bool no_space_allocated_yet);
+extern int __cachefiles_write(struct cachefiles_object *object,
+ struct file *file,
+ loff_t start_pos,
+ struct iov_iter *iter,
+ netfs_io_terminated_t term_func,
+ void *term_func_priv);
+
+/*
+ * key.c
+ */
+extern bool cachefiles_cook_key(struct cachefiles_object *object);
+
+/*
+ * main.c
+ */
+extern struct kmem_cache *cachefiles_object_jar;
+
+/*
+ * namei.c
+ */
+extern void cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
+ struct file *file);
+extern int cachefiles_bury_object(struct cachefiles_cache *cache,
+ struct cachefiles_object *object,
+ struct dentry *dir,
+ struct dentry *rep,
+ enum fscache_why_object_killed why);
+extern int cachefiles_delete_object(struct cachefiles_object *object,
+ enum fscache_why_object_killed why);
+extern bool cachefiles_look_up_object(struct cachefiles_object *object);
+extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
+ struct dentry *dir,
+ const char *name,
+ bool *_is_new);
+extern void cachefiles_put_directory(struct dentry *dir);
+
+extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
+ char *filename);
+
+extern int cachefiles_check_in_use(struct cachefiles_cache *cache,
+ struct dentry *dir, char *filename);
+extern struct file *cachefiles_create_tmpfile(struct cachefiles_object *object);
+extern bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
+ struct cachefiles_object *object);
+
+/*
+ * ondemand.c
+ */
+#ifdef CONFIG_CACHEFILES_ONDEMAND
+extern ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
+ char __user *_buffer, size_t buflen);
+
+extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache,
+ char *args);
+
+extern int cachefiles_ondemand_init_object(struct cachefiles_object *object);
+extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object);
+
+extern int cachefiles_ondemand_read(struct cachefiles_object *object,
+ loff_t pos, size_t len);
+
+#else
+static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
+ char __user *_buffer, size_t buflen)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int cachefiles_ondemand_init_object(struct cachefiles_object *object)
+{
+ return 0;
+}
+
+static inline void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
+{
+}
+
+static inline int cachefiles_ondemand_read(struct cachefiles_object *object,
+ loff_t pos, size_t len)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+/*
+ * security.c
+ */
+extern int cachefiles_get_security_ID(struct cachefiles_cache *cache);
+extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
+ struct dentry *root,
+ const struct cred **_saved_cred);
+
+static inline void cachefiles_begin_secure(struct cachefiles_cache *cache,
+ const struct cred **_saved_cred)
+{
+ *_saved_cred = override_creds(cache->cache_cred);
+}
+
+static inline void cachefiles_end_secure(struct cachefiles_cache *cache,
+ const struct cred *saved_cred)
+{
+ revert_creds(saved_cred);
+}
+
+/*
+ * volume.c
+ */
+void cachefiles_acquire_volume(struct fscache_volume *volume);
+void cachefiles_free_volume(struct fscache_volume *volume);
+void cachefiles_withdraw_volume(struct cachefiles_volume *volume);
+
+/*
+ * xattr.c
+ */
+extern int cachefiles_set_object_xattr(struct cachefiles_object *object);
+extern int cachefiles_check_auxdata(struct cachefiles_object *object,
+ struct file *file);
+extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
+ struct cachefiles_object *object,
+ struct dentry *dentry);
+extern void cachefiles_prepare_to_write(struct fscache_cookie *cookie);
+extern bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume);
+extern int cachefiles_check_volume_xattr(struct cachefiles_volume *volume);
+
+/*
+ * Error handling
+ */
+#define cachefiles_io_error(___cache, FMT, ...) \
+do { \
+ pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \
+ fscache_io_error((___cache)->cache); \
+ set_bit(CACHEFILES_DEAD, &(___cache)->flags); \
+} while (0)
+
+#define cachefiles_io_error_obj(object, FMT, ...) \
+do { \
+ struct cachefiles_cache *___cache; \
+ \
+ ___cache = (object)->volume->cache; \
+ cachefiles_io_error(___cache, FMT " [o=%08x]", ##__VA_ARGS__, \
+ (object)->debug_id); \
+} while (0)
+
+
+/*
+ * Debug tracing
+ */
+extern unsigned cachefiles_debug;
+#define CACHEFILES_DEBUG_KENTER 1
+#define CACHEFILES_DEBUG_KLEAVE 2
+#define CACHEFILES_DEBUG_KDEBUG 4
+
+#define dbgprintk(FMT, ...) \
+ printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
+
+#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
+#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
+#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
+#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
+
+#elif defined(CONFIG_CACHEFILES_DEBUG)
+#define _enter(FMT, ...) \
+do { \
+ if (cachefiles_debug & CACHEFILES_DEBUG_KENTER) \
+ kenter(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#define _leave(FMT, ...) \
+do { \
+ if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE) \
+ kleave(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#define _debug(FMT, ...) \
+do { \
+ if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG) \
+ kdebug(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#else
+#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
+#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
+#endif
+
+#if 1 /* defined(__KDEBUGALL) */
+
+#define ASSERT(X) \
+do { \
+ if (unlikely(!(X))) { \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+ if (unlikely(!((X) OP (Y)))) { \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ pr_err("%lx " #OP " %lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIF(C, X) \
+do { \
+ if (unlikely((C) && !(X))) { \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+ if (unlikely((C) && !((X) OP (Y)))) { \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ pr_err("%lx " #OP " %lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while (0)
+
+#else
+
+#define ASSERT(X) do {} while (0)
+#define ASSERTCMP(X, OP, Y) do {} while (0)
+#define ASSERTIF(C, X) do {} while (0)
+#define ASSERTIFCMP(C, X, OP, Y) do {} while (0)
+
+#endif
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
new file mode 100644
index 000000000..000a28f46
--- /dev/null
+++ b/fs/cachefiles/io.c
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* kiocb-using read/write
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/uio.h>
+#include <linux/falloc.h>
+#include <linux/sched/mm.h>
+#include <trace/events/fscache.h>
+#include "internal.h"
+
+struct cachefiles_kiocb {
+ struct kiocb iocb;
+ refcount_t ki_refcnt;
+ loff_t start;
+ union {
+ size_t skipped;
+ size_t len;
+ };
+ struct cachefiles_object *object;
+ netfs_io_terminated_t term_func;
+ void *term_func_priv;
+ bool was_async;
+ unsigned int inval_counter; /* Copy of cookie->inval_counter */
+ u64 b_writing;
+};
+
+static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
+{
+ if (refcount_dec_and_test(&ki->ki_refcnt)) {
+ cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
+ fput(ki->iocb.ki_filp);
+ kfree(ki);
+ }
+}
+
+/*
+ * Handle completion of a read from the cache.
+ */
+static void cachefiles_read_complete(struct kiocb *iocb, long ret)
+{
+ struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
+ struct inode *inode = file_inode(ki->iocb.ki_filp);
+
+ _enter("%ld", ret);
+
+ if (ret < 0)
+ trace_cachefiles_io_error(ki->object, inode, ret,
+ cachefiles_trace_read_error);
+
+ if (ki->term_func) {
+ if (ret >= 0) {
+ if (ki->object->cookie->inval_counter == ki->inval_counter)
+ ki->skipped += ret;
+ else
+ ret = -ESTALE;
+ }
+
+ ki->term_func(ki->term_func_priv, ret, ki->was_async);
+ }
+
+ cachefiles_put_kiocb(ki);
+}
+
+/*
+ * Initiate a read from the cache.
+ */
+static int cachefiles_read(struct netfs_cache_resources *cres,
+ loff_t start_pos,
+ struct iov_iter *iter,
+ enum netfs_read_from_hole read_hole,
+ netfs_io_terminated_t term_func,
+ void *term_func_priv)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_kiocb *ki;
+ struct file *file;
+ unsigned int old_nofs;
+ ssize_t ret = -ENOBUFS;
+ size_t len = iov_iter_count(iter), skipped = 0;
+
+ if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
+ goto presubmission_error;
+
+ fscache_count_read();
+ object = cachefiles_cres_object(cres);
+ file = cachefiles_cres_file(cres);
+
+ _enter("%pD,%li,%llx,%zx/%llx",
+ file, file_inode(file)->i_ino, start_pos, len,
+ i_size_read(file_inode(file)));
+
+ /* If the caller asked us to seek for data before doing the read, then
+ * we should do that now. If we find a gap, we fill it with zeros.
+ */
+ if (read_hole != NETFS_READ_HOLE_IGNORE) {
+ loff_t off = start_pos, off2;
+
+ off2 = cachefiles_inject_read_error();
+ if (off2 == 0)
+ off2 = vfs_llseek(file, off, SEEK_DATA);
+ if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
+ skipped = 0;
+ ret = off2;
+ goto presubmission_error;
+ }
+
+ if (off2 == -ENXIO || off2 >= start_pos + len) {
+ /* The region is beyond the EOF or there's no more data
+ * in the region, so clear the rest of the buffer and
+ * return success.
+ */
+ ret = -ENODATA;
+ if (read_hole == NETFS_READ_HOLE_FAIL)
+ goto presubmission_error;
+
+ iov_iter_zero(len, iter);
+ skipped = len;
+ ret = 0;
+ goto presubmission_error;
+ }
+
+ skipped = off2 - off;
+ iov_iter_zero(skipped, iter);
+ }
+
+ ret = -ENOMEM;
+ ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
+ if (!ki)
+ goto presubmission_error;
+
+ refcount_set(&ki->ki_refcnt, 2);
+ ki->iocb.ki_filp = file;
+ ki->iocb.ki_pos = start_pos + skipped;
+ ki->iocb.ki_flags = IOCB_DIRECT;
+ ki->iocb.ki_ioprio = get_current_ioprio();
+ ki->skipped = skipped;
+ ki->object = object;
+ ki->inval_counter = cres->inval_counter;
+ ki->term_func = term_func;
+ ki->term_func_priv = term_func_priv;
+ ki->was_async = true;
+
+ if (ki->term_func)
+ ki->iocb.ki_complete = cachefiles_read_complete;
+
+ get_file(ki->iocb.ki_filp);
+ cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
+
+ trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
+ old_nofs = memalloc_nofs_save();
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
+ memalloc_nofs_restore(old_nofs);
+ switch (ret) {
+ case -EIOCBQUEUED:
+ goto in_progress;
+
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /* There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ ret = -EINTR;
+ fallthrough;
+ default:
+ ki->was_async = false;
+ cachefiles_read_complete(&ki->iocb, ret);
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+
+in_progress:
+ cachefiles_put_kiocb(ki);
+ _leave(" = %zd", ret);
+ return ret;
+
+presubmission_error:
+ if (term_func)
+ term_func(term_func_priv, ret < 0 ? ret : skipped, false);
+ return ret;
+}
+
+/*
+ * Query the occupancy of the cache in a region, returning where the next chunk
+ * of data starts and how long it is.
+ */
+static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
+ loff_t start, size_t len, size_t granularity,
+ loff_t *_data_start, size_t *_data_len)
+{
+ struct cachefiles_object *object;
+ struct file *file;
+ loff_t off, off2;
+
+ *_data_start = -1;
+ *_data_len = 0;
+
+ if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
+ return -ENOBUFS;
+
+ object = cachefiles_cres_object(cres);
+ file = cachefiles_cres_file(cres);
+ granularity = max_t(size_t, object->volume->cache->bsize, granularity);
+
+ _enter("%pD,%li,%llx,%zx/%llx",
+ file, file_inode(file)->i_ino, start, len,
+ i_size_read(file_inode(file)));
+
+ off = cachefiles_inject_read_error();
+ if (off == 0)
+ off = vfs_llseek(file, start, SEEK_DATA);
+ if (off == -ENXIO)
+ return -ENODATA; /* Beyond EOF */
+ if (off < 0 && off >= (loff_t)-MAX_ERRNO)
+ return -ENOBUFS; /* Error. */
+ if (round_up(off, granularity) >= start + len)
+ return -ENODATA; /* No data in range */
+
+ off2 = cachefiles_inject_read_error();
+ if (off2 == 0)
+ off2 = vfs_llseek(file, off, SEEK_HOLE);
+ if (off2 == -ENXIO)
+ return -ENODATA; /* Beyond EOF */
+ if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
+ return -ENOBUFS; /* Error. */
+
+ /* Round away partial blocks */
+ off = round_up(off, granularity);
+ off2 = round_down(off2, granularity);
+ if (off2 <= off)
+ return -ENODATA;
+
+ *_data_start = off;
+ if (off2 > start + len)
+ *_data_len = len;
+ else
+ *_data_len = off2 - off;
+ return 0;
+}
+
+/*
+ * Handle completion of a write to the cache.
+ */
+static void cachefiles_write_complete(struct kiocb *iocb, long ret)
+{
+ struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
+ struct cachefiles_object *object = ki->object;
+ struct inode *inode = file_inode(ki->iocb.ki_filp);
+
+ _enter("%ld", ret);
+
+ /* Tell lockdep we inherited freeze protection from submission thread */
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
+
+ if (ret < 0)
+ trace_cachefiles_io_error(object, inode, ret,
+ cachefiles_trace_write_error);
+
+ atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
+ set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
+ if (ki->term_func)
+ ki->term_func(ki->term_func_priv, ret, ki->was_async);
+ cachefiles_put_kiocb(ki);
+}
+
+/*
+ * Initiate a write to the cache.
+ */
+int __cachefiles_write(struct cachefiles_object *object,
+ struct file *file,
+ loff_t start_pos,
+ struct iov_iter *iter,
+ netfs_io_terminated_t term_func,
+ void *term_func_priv)
+{
+ struct cachefiles_cache *cache;
+ struct cachefiles_kiocb *ki;
+ struct inode *inode;
+ unsigned int old_nofs;
+ ssize_t ret;
+ size_t len = iov_iter_count(iter);
+
+ fscache_count_write();
+ cache = object->volume->cache;
+
+ _enter("%pD,%li,%llx,%zx/%llx",
+ file, file_inode(file)->i_ino, start_pos, len,
+ i_size_read(file_inode(file)));
+
+ ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
+ if (!ki) {
+ if (term_func)
+ term_func(term_func_priv, -ENOMEM, false);
+ return -ENOMEM;
+ }
+
+ refcount_set(&ki->ki_refcnt, 2);
+ ki->iocb.ki_filp = file;
+ ki->iocb.ki_pos = start_pos;
+ ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
+ ki->iocb.ki_ioprio = get_current_ioprio();
+ ki->object = object;
+ ki->start = start_pos;
+ ki->len = len;
+ ki->term_func = term_func;
+ ki->term_func_priv = term_func_priv;
+ ki->was_async = true;
+ ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
+
+ if (ki->term_func)
+ ki->iocb.ki_complete = cachefiles_write_complete;
+ atomic_long_add(ki->b_writing, &cache->b_writing);
+
+ /* Open-code file_start_write here to grab freeze protection, which
+ * will be released by another thread in aio_complete_rw(). Fool
+ * lockdep by telling it the lock got released so that it doesn't
+ * complain about the held lock when we return to userspace.
+ */
+ inode = file_inode(file);
+ __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
+ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+
+ get_file(ki->iocb.ki_filp);
+ cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
+
+ trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
+ old_nofs = memalloc_nofs_save();
+ ret = cachefiles_inject_write_error();
+ if (ret == 0)
+ ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
+ memalloc_nofs_restore(old_nofs);
+ switch (ret) {
+ case -EIOCBQUEUED:
+ goto in_progress;
+
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /* There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ ret = -EINTR;
+ fallthrough;
+ default:
+ ki->was_async = false;
+ cachefiles_write_complete(&ki->iocb, ret);
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+
+in_progress:
+ cachefiles_put_kiocb(ki);
+ _leave(" = %zd", ret);
+ return ret;
+}
+
+static int cachefiles_write(struct netfs_cache_resources *cres,
+ loff_t start_pos,
+ struct iov_iter *iter,
+ netfs_io_terminated_t term_func,
+ void *term_func_priv)
+{
+ if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
+ if (term_func)
+ term_func(term_func_priv, -ENOBUFS, false);
+ return -ENOBUFS;
+ }
+
+ return __cachefiles_write(cachefiles_cres_object(cres),
+ cachefiles_cres_file(cres),
+ start_pos, iter,
+ term_func, term_func_priv);
+}
+
+/*
+ * Prepare a read operation, shortening it to a cached/uncached
+ * boundary as appropriate.
+ */
+static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
+ loff_t i_size)
+{
+ enum cachefiles_prepare_read_trace why;
+ struct netfs_io_request *rreq = subreq->rreq;
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ struct fscache_cookie *cookie = fscache_cres_cookie(cres);
+ const struct cred *saved_cred;
+ struct file *file = cachefiles_cres_file(cres);
+ enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER;
+ loff_t off, to;
+ ino_t ino = file ? file_inode(file)->i_ino : 0;
+ int rc;
+
+ _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
+
+ if (subreq->start >= i_size) {
+ ret = NETFS_FILL_WITH_ZEROES;
+ why = cachefiles_trace_read_after_eof;
+ goto out_no_object;
+ }
+
+ if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
+ __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
+ why = cachefiles_trace_read_no_data;
+ if (!test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags))
+ goto out_no_object;
+ }
+
+ /* The object and the file may be being created in the background. */
+ if (!file) {
+ why = cachefiles_trace_read_no_file;
+ if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
+ goto out_no_object;
+ file = cachefiles_cres_file(cres);
+ if (!file)
+ goto out_no_object;
+ ino = file_inode(file)->i_ino;
+ }
+
+ object = cachefiles_cres_object(cres);
+ cache = object->volume->cache;
+ cachefiles_begin_secure(cache, &saved_cred);
+retry:
+ off = cachefiles_inject_read_error();
+ if (off == 0)
+ off = vfs_llseek(file, subreq->start, SEEK_DATA);
+ if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
+ if (off == (loff_t)-ENXIO) {
+ why = cachefiles_trace_read_seek_nxio;
+ goto download_and_store;
+ }
+ trace_cachefiles_io_error(object, file_inode(file), off,
+ cachefiles_trace_seek_error);
+ why = cachefiles_trace_read_seek_error;
+ goto out;
+ }
+
+ if (off >= subreq->start + subreq->len) {
+ why = cachefiles_trace_read_found_hole;
+ goto download_and_store;
+ }
+
+ if (off > subreq->start) {
+ off = round_up(off, cache->bsize);
+ subreq->len = off - subreq->start;
+ why = cachefiles_trace_read_found_part;
+ goto download_and_store;
+ }
+
+ to = cachefiles_inject_read_error();
+ if (to == 0)
+ to = vfs_llseek(file, subreq->start, SEEK_HOLE);
+ if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
+ trace_cachefiles_io_error(object, file_inode(file), to,
+ cachefiles_trace_seek_error);
+ why = cachefiles_trace_read_seek_error;
+ goto out;
+ }
+
+ if (to < subreq->start + subreq->len) {
+ if (subreq->start + subreq->len >= i_size)
+ to = round_up(to, cache->bsize);
+ else
+ to = round_down(to, cache->bsize);
+ subreq->len = to - subreq->start;
+ }
+
+ why = cachefiles_trace_read_have_data;
+ ret = NETFS_READ_FROM_CACHE;
+ goto out;
+
+download_and_store:
+ __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
+ if (test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) {
+ rc = cachefiles_ondemand_read(object, subreq->start,
+ subreq->len);
+ if (!rc) {
+ __clear_bit(NETFS_SREQ_ONDEMAND, &subreq->flags);
+ goto retry;
+ }
+ ret = NETFS_INVALID_READ;
+ }
+out:
+ cachefiles_end_secure(cache, saved_cred);
+out_no_object:
+ trace_cachefiles_prep_read(subreq, ret, why, ino);
+ return ret;
+}
+
+/*
+ * Prepare for a write to occur.
+ */
+int __cachefiles_prepare_write(struct cachefiles_object *object,
+ struct file *file,
+ loff_t *_start, size_t *_len,
+ bool no_space_allocated_yet)
+{
+ struct cachefiles_cache *cache = object->volume->cache;
+ loff_t start = *_start, pos;
+ size_t len = *_len, down;
+ int ret;
+
+ /* Round to DIO size */
+ down = start - round_down(start, PAGE_SIZE);
+ *_start = start - down;
+ *_len = round_up(down + len, PAGE_SIZE);
+
+ /* We need to work out whether there's sufficient disk space to perform
+ * the write - but we can skip that check if we have space already
+ * allocated.
+ */
+ if (no_space_allocated_yet)
+ goto check_space;
+
+ pos = cachefiles_inject_read_error();
+ if (pos == 0)
+ pos = vfs_llseek(file, *_start, SEEK_DATA);
+ if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
+ if (pos == -ENXIO)
+ goto check_space; /* Unallocated tail */
+ trace_cachefiles_io_error(object, file_inode(file), pos,
+ cachefiles_trace_seek_error);
+ return pos;
+ }
+ if ((u64)pos >= (u64)*_start + *_len)
+ goto check_space; /* Unallocated region */
+
+ /* We have a block that's at least partially filled - if we're low on
+ * space, we need to see if it's fully allocated. If it's not, we may
+ * want to cull it.
+ */
+ if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
+ cachefiles_has_space_check) == 0)
+ return 0; /* Enough space to simply overwrite the whole block */
+
+ pos = cachefiles_inject_read_error();
+ if (pos == 0)
+ pos = vfs_llseek(file, *_start, SEEK_HOLE);
+ if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
+ trace_cachefiles_io_error(object, file_inode(file), pos,
+ cachefiles_trace_seek_error);
+ return pos;
+ }
+ if ((u64)pos >= (u64)*_start + *_len)
+ return 0; /* Fully allocated */
+
+ /* Partially allocated, but insufficient space: cull. */
+ fscache_count_no_write_space();
+ ret = cachefiles_inject_remove_error();
+ if (ret == 0)
+ ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ *_start, *_len);
+ if (ret < 0) {
+ trace_cachefiles_io_error(object, file_inode(file), ret,
+ cachefiles_trace_fallocate_error);
+ cachefiles_io_error_obj(object,
+ "CacheFiles: fallocate failed (%d)\n", ret);
+ ret = -EIO;
+ }
+
+ return ret;
+
+check_space:
+ return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
+ cachefiles_has_space_for_write);
+}
+
+static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
+ loff_t *_start, size_t *_len, loff_t i_size,
+ bool no_space_allocated_yet)
+{
+ struct cachefiles_object *object = cachefiles_cres_object(cres);
+ struct cachefiles_cache *cache = object->volume->cache;
+ const struct cred *saved_cred;
+ int ret;
+
+ if (!cachefiles_cres_file(cres)) {
+ if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
+ return -ENOBUFS;
+ if (!cachefiles_cres_file(cres))
+ return -ENOBUFS;
+ }
+
+ cachefiles_begin_secure(cache, &saved_cred);
+ ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
+ _start, _len,
+ no_space_allocated_yet);
+ cachefiles_end_secure(cache, saved_cred);
+ return ret;
+}
+
+/*
+ * Clean up an operation.
+ */
+static void cachefiles_end_operation(struct netfs_cache_resources *cres)
+{
+ struct file *file = cachefiles_cres_file(cres);
+
+ if (file)
+ fput(file);
+ fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
+}
+
+static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
+ .end_operation = cachefiles_end_operation,
+ .read = cachefiles_read,
+ .write = cachefiles_write,
+ .prepare_read = cachefiles_prepare_read,
+ .prepare_write = cachefiles_prepare_write,
+ .query_occupancy = cachefiles_query_occupancy,
+};
+
+/*
+ * Open the cache file when beginning a cache operation.
+ */
+bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
+ enum fscache_want_state want_state)
+{
+ struct cachefiles_object *object = cachefiles_cres_object(cres);
+
+ if (!cachefiles_cres_file(cres)) {
+ cres->ops = &cachefiles_netfs_cache_ops;
+ if (object->file) {
+ spin_lock(&object->lock);
+ if (!cres->cache_priv2 && object->file)
+ cres->cache_priv2 = get_file(object->file);
+ spin_unlock(&object->lock);
+ }
+ }
+
+ if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
+ pr_err("failed to get cres->file\n");
+ return false;
+ }
+
+ return true;
+}
diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c
new file mode 100644
index 000000000..bf935e25b
--- /dev/null
+++ b/fs/cachefiles/key.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Key to pathname encoder
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include "internal.h"
+
+static const char cachefiles_charmap[64] =
+ "0123456789" /* 0 - 9 */
+ "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */
+ "_-" /* 62 - 63 */
+ ;
+
+static const char cachefiles_filecharmap[256] = {
+ /* we skip space and tab and control chars */
+ [33 ... 46] = 1, /* '!' -> '.' */
+ /* we skip '/' as it's significant to pathwalk */
+ [48 ... 127] = 1, /* '0' -> '~' */
+};
+
+static inline unsigned int how_many_hex_digits(unsigned int x)
+{
+ return x ? round_up(ilog2(x) + 1, 4) / 4 : 0;
+}
+
+/*
+ * turn the raw key into something cooked
+ * - the key may be up to NAME_MAX in length (including the length word)
+ * - "base64" encode the strange keys, mapping 3 bytes of raw to four of
+ * cooked
+ * - need to cut the cooked key into 252 char lengths (189 raw bytes)
+ */
+bool cachefiles_cook_key(struct cachefiles_object *object)
+{
+ const u8 *key = fscache_get_key(object->cookie), *kend;
+ unsigned char ch;
+ unsigned int acc, i, n, nle, nbe, keylen = object->cookie->key_len;
+ unsigned int b64len, len, print, pad;
+ char *name, sep;
+
+ _enter(",%u,%*phN", keylen, keylen, key);
+
+ BUG_ON(keylen > NAME_MAX - 3);
+
+ print = 1;
+ for (i = 0; i < keylen; i++) {
+ ch = key[i];
+ print &= cachefiles_filecharmap[ch];
+ }
+
+ /* If the path is usable ASCII, then we render it directly */
+ if (print) {
+ len = 1 + keylen;
+ name = kmalloc(len + 1, GFP_KERNEL);
+ if (!name)
+ return false;
+
+ name[0] = 'D'; /* Data object type, string encoding */
+ memcpy(name + 1, key, keylen);
+ goto success;
+ }
+
+ /* See if it makes sense to encode it as "hex,hex,hex" for each 32-bit
+ * chunk. We rely on the key having been padded out to a whole number
+ * of 32-bit words.
+ */
+ n = round_up(keylen, 4);
+ nbe = nle = 0;
+ for (i = 0; i < n; i += 4) {
+ u32 be = be32_to_cpu(*(__be32 *)(key + i));
+ u32 le = le32_to_cpu(*(__le32 *)(key + i));
+
+ nbe += 1 + how_many_hex_digits(be);
+ nle += 1 + how_many_hex_digits(le);
+ }
+
+ b64len = DIV_ROUND_UP(keylen, 3);
+ pad = b64len * 3 - keylen;
+ b64len = 2 + b64len * 4; /* Length if we base64-encode it */
+ _debug("len=%u nbe=%u nle=%u b64=%u", keylen, nbe, nle, b64len);
+ if (nbe < b64len || nle < b64len) {
+ unsigned int nlen = min(nbe, nle) + 1;
+ name = kmalloc(nlen, GFP_KERNEL);
+ if (!name)
+ return false;
+ sep = (nbe <= nle) ? 'S' : 'T'; /* Encoding indicator */
+ len = 0;
+ for (i = 0; i < n; i += 4) {
+ u32 x;
+ if (nbe <= nle)
+ x = be32_to_cpu(*(__be32 *)(key + i));
+ else
+ x = le32_to_cpu(*(__le32 *)(key + i));
+ name[len++] = sep;
+ if (x != 0)
+ len += snprintf(name + len, nlen - len, "%x", x);
+ sep = ',';
+ }
+ goto success;
+ }
+
+ /* We need to base64-encode it */
+ name = kmalloc(b64len + 1, GFP_KERNEL);
+ if (!name)
+ return false;
+
+ name[0] = 'E';
+ name[1] = '0' + pad;
+ len = 2;
+ kend = key + keylen;
+ do {
+ acc = *key++;
+ if (key < kend) {
+ acc |= *key++ << 8;
+ if (key < kend)
+ acc |= *key++ << 16;
+ }
+
+ name[len++] = cachefiles_charmap[acc & 63];
+ acc >>= 6;
+ name[len++] = cachefiles_charmap[acc & 63];
+ acc >>= 6;
+ name[len++] = cachefiles_charmap[acc & 63];
+ acc >>= 6;
+ name[len++] = cachefiles_charmap[acc & 63];
+ } while (key < kend);
+
+success:
+ name[len] = 0;
+ object->d_name = name;
+ object->d_name_len = len;
+ _leave(" = %s", object->d_name);
+ return true;
+}
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c
new file mode 100644
index 000000000..3f369c6f8
--- /dev/null
+++ b/fs/cachefiles/main.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Network filesystem caching backend to use cache files on a premounted
+ * filesystem
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/sysctl.h>
+#include <linux/miscdevice.h>
+#include <linux/netfs.h>
+#include <trace/events/netfs.h>
+#define CREATE_TRACE_POINTS
+#include "internal.h"
+
+unsigned cachefiles_debug;
+module_param_named(debug, cachefiles_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(cachefiles_debug, "CacheFiles debugging mask");
+
+MODULE_DESCRIPTION("Mounted-filesystem based cache");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+struct kmem_cache *cachefiles_object_jar;
+
+static struct miscdevice cachefiles_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "cachefiles",
+ .fops = &cachefiles_daemon_fops,
+};
+
+/*
+ * initialise the fs caching module
+ */
+static int __init cachefiles_init(void)
+{
+ int ret;
+
+ ret = cachefiles_register_error_injection();
+ if (ret < 0)
+ goto error_einj;
+ ret = misc_register(&cachefiles_dev);
+ if (ret < 0)
+ goto error_dev;
+
+ /* create an object jar */
+ ret = -ENOMEM;
+ cachefiles_object_jar =
+ kmem_cache_create("cachefiles_object_jar",
+ sizeof(struct cachefiles_object),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!cachefiles_object_jar) {
+ pr_notice("Failed to allocate an object jar\n");
+ goto error_object_jar;
+ }
+
+ pr_info("Loaded\n");
+ return 0;
+
+error_object_jar:
+ misc_deregister(&cachefiles_dev);
+error_dev:
+ cachefiles_unregister_error_injection();
+error_einj:
+ pr_err("failed to register: %d\n", ret);
+ return ret;
+}
+
+fs_initcall(cachefiles_init);
+
+/*
+ * clean up on module removal
+ */
+static void __exit cachefiles_exit(void)
+{
+ pr_info("Unloading\n");
+
+ kmem_cache_destroy(cachefiles_object_jar);
+ misc_deregister(&cachefiles_dev);
+ cachefiles_unregister_error_injection();
+}
+
+module_exit(cachefiles_exit);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
new file mode 100644
index 000000000..50b2ee163
--- /dev/null
+++ b/fs/cachefiles/namei.c
@@ -0,0 +1,862 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* CacheFiles path walking and related routines
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include "internal.h"
+
+/*
+ * Mark the backing file as being a cache file if it's not already in use. The
+ * mark tells the culling request command that it's not allowed to cull the
+ * file or directory. The caller must hold the inode lock.
+ */
+static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object,
+ struct inode *inode)
+{
+ bool can_use = false;
+
+ if (!(inode->i_flags & S_KERNEL_FILE)) {
+ inode->i_flags |= S_KERNEL_FILE;
+ trace_cachefiles_mark_active(object, inode);
+ can_use = true;
+ } else {
+ trace_cachefiles_mark_failed(object, inode);
+ }
+
+ return can_use;
+}
+
+static bool cachefiles_mark_inode_in_use(struct cachefiles_object *object,
+ struct inode *inode)
+{
+ bool can_use;
+
+ inode_lock(inode);
+ can_use = __cachefiles_mark_inode_in_use(object, inode);
+ inode_unlock(inode);
+ return can_use;
+}
+
+/*
+ * Unmark a backing inode. The caller must hold the inode lock.
+ */
+static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
+ struct inode *inode)
+{
+ inode->i_flags &= ~S_KERNEL_FILE;
+ trace_cachefiles_mark_inactive(object, inode);
+}
+
+static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object,
+ struct inode *inode)
+{
+ inode_lock(inode);
+ __cachefiles_unmark_inode_in_use(object, inode);
+ inode_unlock(inode);
+}
+
+/*
+ * Unmark a backing inode and tell cachefilesd that there's something that can
+ * be culled.
+ */
+void cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
+ struct file *file)
+{
+ struct cachefiles_cache *cache = object->volume->cache;
+ struct inode *inode = file_inode(file);
+
+ cachefiles_do_unmark_inode_in_use(object, inode);
+
+ if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
+ atomic_long_add(inode->i_blocks, &cache->b_released);
+ if (atomic_inc_return(&cache->f_released))
+ cachefiles_state_changed(cache);
+ }
+}
+
+/*
+ * get a subdirectory
+ */
+struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
+ struct dentry *dir,
+ const char *dirname,
+ bool *_is_new)
+{
+ struct dentry *subdir;
+ struct path path;
+ int ret;
+
+ _enter(",,%s", dirname);
+
+ /* search the current directory for the element name */
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+
+retry:
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ subdir = lookup_one_len(dirname, dir, strlen(dirname));
+ else
+ subdir = ERR_PTR(ret);
+ trace_cachefiles_lookup(NULL, dir, subdir);
+ if (IS_ERR(subdir)) {
+ trace_cachefiles_vfs_error(NULL, d_backing_inode(dir),
+ PTR_ERR(subdir),
+ cachefiles_trace_lookup_error);
+ if (PTR_ERR(subdir) == -ENOMEM)
+ goto nomem_d_alloc;
+ goto lookup_error;
+ }
+
+ _debug("subdir -> %pd %s",
+ subdir, d_backing_inode(subdir) ? "positive" : "negative");
+
+ /* we need to create the subdir if it doesn't exist yet */
+ if (d_is_negative(subdir)) {
+ ret = cachefiles_has_space(cache, 1, 0,
+ cachefiles_has_space_for_create);
+ if (ret < 0)
+ goto mkdir_error;
+
+ _debug("attempt mkdir");
+
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ ret = security_path_mkdir(&path, subdir, 0700);
+ if (ret < 0)
+ goto mkdir_error;
+ ret = cachefiles_inject_write_error();
+ if (ret == 0)
+ ret = vfs_mkdir(&init_user_ns, d_inode(dir), subdir, 0700);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(NULL, d_inode(dir), ret,
+ cachefiles_trace_mkdir_error);
+ goto mkdir_error;
+ }
+ trace_cachefiles_mkdir(dir, subdir);
+
+ if (unlikely(d_unhashed(subdir))) {
+ cachefiles_put_directory(subdir);
+ goto retry;
+ }
+ ASSERT(d_backing_inode(subdir));
+
+ _debug("mkdir -> %pd{ino=%lu}",
+ subdir, d_backing_inode(subdir)->i_ino);
+ if (_is_new)
+ *_is_new = true;
+ }
+
+ /* Tell rmdir() it's not allowed to delete the subdir */
+ inode_lock(d_inode(subdir));
+ inode_unlock(d_inode(dir));
+
+ if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) {
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ subdir, d_inode(subdir)->i_ino);
+ goto mark_error;
+ }
+
+ inode_unlock(d_inode(subdir));
+
+ /* we need to make sure the subdir is a directory */
+ ASSERT(d_backing_inode(subdir));
+
+ if (!d_can_lookup(subdir)) {
+ pr_err("%s is not a directory\n", dirname);
+ ret = -EIO;
+ goto check_error;
+ }
+
+ ret = -EPERM;
+ if (!(d_backing_inode(subdir)->i_opflags & IOP_XATTR) ||
+ !d_backing_inode(subdir)->i_op->lookup ||
+ !d_backing_inode(subdir)->i_op->mkdir ||
+ !d_backing_inode(subdir)->i_op->rename ||
+ !d_backing_inode(subdir)->i_op->rmdir ||
+ !d_backing_inode(subdir)->i_op->unlink)
+ goto check_error;
+
+ _leave(" = [%lu]", d_backing_inode(subdir)->i_ino);
+ return subdir;
+
+check_error:
+ cachefiles_put_directory(subdir);
+ _leave(" = %d [check]", ret);
+ return ERR_PTR(ret);
+
+mark_error:
+ inode_unlock(d_inode(subdir));
+ dput(subdir);
+ return ERR_PTR(-EBUSY);
+
+mkdir_error:
+ inode_unlock(d_inode(dir));
+ dput(subdir);
+ pr_err("mkdir %s failed with error %d\n", dirname, ret);
+ return ERR_PTR(ret);
+
+lookup_error:
+ inode_unlock(d_inode(dir));
+ ret = PTR_ERR(subdir);
+ pr_err("Lookup %s failed with error %d\n", dirname, ret);
+ return ERR_PTR(ret);
+
+nomem_d_alloc:
+ inode_unlock(d_inode(dir));
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Put a subdirectory.
+ */
+void cachefiles_put_directory(struct dentry *dir)
+{
+ if (dir) {
+ cachefiles_do_unmark_inode_in_use(NULL, d_inode(dir));
+ dput(dir);
+ }
+}
+
+/*
+ * Remove a regular file from the cache.
+ */
+static int cachefiles_unlink(struct cachefiles_cache *cache,
+ struct cachefiles_object *object,
+ struct dentry *dir, struct dentry *dentry,
+ enum fscache_why_object_killed why)
+{
+ struct path path = {
+ .mnt = cache->mnt,
+ .dentry = dir,
+ };
+ int ret;
+
+ trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why);
+ ret = security_path_unlink(&path, dentry);
+ if (ret < 0) {
+ cachefiles_io_error(cache, "Unlink security error");
+ return ret;
+ }
+
+ ret = cachefiles_inject_remove_error();
+ if (ret == 0) {
+ ret = vfs_unlink(&init_user_ns, d_backing_inode(dir), dentry, NULL);
+ if (ret == -EIO)
+ cachefiles_io_error(cache, "Unlink failed");
+ }
+ if (ret != 0)
+ trace_cachefiles_vfs_error(object, d_backing_inode(dir), ret,
+ cachefiles_trace_unlink_error);
+ return ret;
+}
+
+/*
+ * Delete an object representation from the cache
+ * - File backed objects are unlinked
+ * - Directory backed objects are stuffed into the graveyard for userspace to
+ * delete
+ */
+int cachefiles_bury_object(struct cachefiles_cache *cache,
+ struct cachefiles_object *object,
+ struct dentry *dir,
+ struct dentry *rep,
+ enum fscache_why_object_killed why)
+{
+ struct dentry *grave, *trap;
+ struct path path, path_to_graveyard;
+ char nbuffer[8 + 8 + 1];
+ int ret;
+
+ _enter(",'%pd','%pd'", dir, rep);
+
+ if (rep->d_parent != dir) {
+ inode_unlock(d_inode(dir));
+ _leave(" = -ESTALE");
+ return -ESTALE;
+ }
+
+ /* non-directories can just be unlinked */
+ if (!d_is_dir(rep)) {
+ dget(rep); /* Stop the dentry being negated if it's only pinned
+ * by a file struct.
+ */
+ ret = cachefiles_unlink(cache, object, dir, rep, why);
+ dput(rep);
+
+ inode_unlock(d_inode(dir));
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+ /* directories have to be moved to the graveyard */
+ _debug("move stale object to graveyard");
+ inode_unlock(d_inode(dir));
+
+try_again:
+ /* first step is to make up a grave dentry in the graveyard */
+ sprintf(nbuffer, "%08x%08x",
+ (uint32_t) ktime_get_real_seconds(),
+ (uint32_t) atomic_inc_return(&cache->gravecounter));
+
+ /* do the multiway lock magic */
+ trap = lock_rename(cache->graveyard, dir);
+
+ /* do some checks before getting the grave dentry */
+ if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
+ /* the entry was probably culled when we dropped the parent dir
+ * lock */
+ unlock_rename(cache->graveyard, dir);
+ _leave(" = 0 [culled?]");
+ return 0;
+ }
+
+ if (!d_can_lookup(cache->graveyard)) {
+ unlock_rename(cache->graveyard, dir);
+ cachefiles_io_error(cache, "Graveyard no longer a directory");
+ return -EIO;
+ }
+
+ if (trap == rep) {
+ unlock_rename(cache->graveyard, dir);
+ cachefiles_io_error(cache, "May not make directory loop");
+ return -EIO;
+ }
+
+ if (d_mountpoint(rep)) {
+ unlock_rename(cache->graveyard, dir);
+ cachefiles_io_error(cache, "Mountpoint in cache");
+ return -EIO;
+ }
+
+ grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer));
+ if (IS_ERR(grave)) {
+ unlock_rename(cache->graveyard, dir);
+ trace_cachefiles_vfs_error(object, d_inode(cache->graveyard),
+ PTR_ERR(grave),
+ cachefiles_trace_lookup_error);
+
+ if (PTR_ERR(grave) == -ENOMEM) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ cachefiles_io_error(cache, "Lookup error %ld", PTR_ERR(grave));
+ return -EIO;
+ }
+
+ if (d_is_positive(grave)) {
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ grave = NULL;
+ cond_resched();
+ goto try_again;
+ }
+
+ if (d_mountpoint(grave)) {
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ cachefiles_io_error(cache, "Mountpoint in graveyard");
+ return -EIO;
+ }
+
+ /* target should not be an ancestor of source */
+ if (trap == grave) {
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ cachefiles_io_error(cache, "May not make directory loop");
+ return -EIO;
+ }
+
+ /* attempt the rename */
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ path_to_graveyard.mnt = cache->mnt;
+ path_to_graveyard.dentry = cache->graveyard;
+ ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0);
+ if (ret < 0) {
+ cachefiles_io_error(cache, "Rename security error %d", ret);
+ } else {
+ struct renamedata rd = {
+ .old_mnt_userns = &init_user_ns,
+ .old_dir = d_inode(dir),
+ .old_dentry = rep,
+ .new_mnt_userns = &init_user_ns,
+ .new_dir = d_inode(cache->graveyard),
+ .new_dentry = grave,
+ };
+ trace_cachefiles_rename(object, d_inode(rep)->i_ino, why);
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ ret = vfs_rename(&rd);
+ if (ret != 0)
+ trace_cachefiles_vfs_error(object, d_inode(dir), ret,
+ cachefiles_trace_rename_error);
+ if (ret != 0 && ret != -ENOMEM)
+ cachefiles_io_error(cache,
+ "Rename failed with error %d", ret);
+ }
+
+ __cachefiles_unmark_inode_in_use(object, d_inode(rep));
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Delete a cache file.
+ */
+int cachefiles_delete_object(struct cachefiles_object *object,
+ enum fscache_why_object_killed why)
+{
+ struct cachefiles_volume *volume = object->volume;
+ struct dentry *dentry = object->file->f_path.dentry;
+ struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash];
+ int ret;
+
+ _enter(",OBJ%x{%pD}", object->debug_id, object->file);
+
+ /* Stop the dentry being negated if it's only pinned by a file struct. */
+ dget(dentry);
+
+ inode_lock_nested(d_backing_inode(fan), I_MUTEX_PARENT);
+ ret = cachefiles_unlink(volume->cache, object, fan, dentry, why);
+ inode_unlock(d_backing_inode(fan));
+ dput(dentry);
+ return ret;
+}
+
+/*
+ * Create a temporary file and leave it unattached and un-xattr'd until the
+ * time comes to discard the object from memory.
+ */
+struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
+{
+ struct cachefiles_volume *volume = object->volume;
+ struct cachefiles_cache *cache = volume->cache;
+ const struct cred *saved_cred;
+ struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash];
+ struct file *file;
+ const struct path parentpath = { .mnt = cache->mnt, .dentry = fan };
+ uint64_t ni_size;
+ long ret;
+
+
+ cachefiles_begin_secure(cache, &saved_cred);
+
+ ret = cachefiles_inject_write_error();
+ if (ret == 0) {
+ file = vfs_tmpfile_open(&init_user_ns, &parentpath, S_IFREG,
+ O_RDWR | O_LARGEFILE | O_DIRECT,
+ cache->cache_cred);
+ ret = PTR_ERR_OR_ZERO(file);
+ }
+ if (ret) {
+ trace_cachefiles_vfs_error(object, d_inode(fan), ret,
+ cachefiles_trace_tmpfile_error);
+ if (ret == -EIO)
+ cachefiles_io_error_obj(object, "Failed to create tmpfile");
+ goto err;
+ }
+
+ trace_cachefiles_tmpfile(object, file_inode(file));
+
+ /* This is a newly created file with no other possible user */
+ if (!cachefiles_mark_inode_in_use(object, file_inode(file)))
+ WARN_ON(1);
+
+ ret = cachefiles_ondemand_init_object(object);
+ if (ret < 0)
+ goto err_unuse;
+
+ ni_size = object->cookie->object_size;
+ ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE);
+
+ if (ni_size > 0) {
+ trace_cachefiles_trunc(object, file_inode(file), 0, ni_size,
+ cachefiles_trunc_expand_tmpfile);
+ ret = cachefiles_inject_write_error();
+ if (ret == 0)
+ ret = vfs_truncate(&file->f_path, ni_size);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(
+ object, file_inode(file), ret,
+ cachefiles_trace_trunc_error);
+ goto err_unuse;
+ }
+ }
+
+ ret = -EINVAL;
+ if (unlikely(!file->f_op->read_iter) ||
+ unlikely(!file->f_op->write_iter)) {
+ fput(file);
+ pr_notice("Cache does not support read_iter and write_iter\n");
+ goto err_unuse;
+ }
+out:
+ cachefiles_end_secure(cache, saved_cred);
+ return file;
+
+err_unuse:
+ cachefiles_do_unmark_inode_in_use(object, file_inode(file));
+ fput(file);
+err:
+ file = ERR_PTR(ret);
+ goto out;
+}
+
+/*
+ * Create a new file.
+ */
+static bool cachefiles_create_file(struct cachefiles_object *object)
+{
+ struct file *file;
+ int ret;
+
+ ret = cachefiles_has_space(object->volume->cache, 1, 0,
+ cachefiles_has_space_for_create);
+ if (ret < 0)
+ return false;
+
+ file = cachefiles_create_tmpfile(object);
+ if (IS_ERR(file))
+ return false;
+
+ set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags);
+ set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags);
+ _debug("create -> %pD{ino=%lu}", file, file_inode(file)->i_ino);
+ object->file = file;
+ return true;
+}
+
+/*
+ * Open an existing file, checking its attributes and replacing it if it is
+ * stale.
+ */
+static bool cachefiles_open_file(struct cachefiles_object *object,
+ struct dentry *dentry)
+{
+ struct cachefiles_cache *cache = object->volume->cache;
+ struct file *file;
+ struct path path;
+ int ret;
+
+ _enter("%pd", dentry);
+
+ if (!cachefiles_mark_inode_in_use(object, d_inode(dentry))) {
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ dentry, d_inode(dentry)->i_ino);
+ return false;
+ }
+
+ /* We need to open a file interface onto a data file now as we can't do
+ * it on demand because writeback called from do_exit() sees
+ * current->fs == NULL - which breaks d_path() called from ext4 open.
+ */
+ path.mnt = cache->mnt;
+ path.dentry = dentry;
+ file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
+ d_backing_inode(dentry), cache->cache_cred);
+ if (IS_ERR(file)) {
+ trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
+ PTR_ERR(file),
+ cachefiles_trace_open_error);
+ goto error;
+ }
+
+ if (unlikely(!file->f_op->read_iter) ||
+ unlikely(!file->f_op->write_iter)) {
+ pr_notice("Cache does not support read_iter and write_iter\n");
+ goto error_fput;
+ }
+ _debug("file -> %pd positive", dentry);
+
+ ret = cachefiles_ondemand_init_object(object);
+ if (ret < 0)
+ goto error_fput;
+
+ ret = cachefiles_check_auxdata(object, file);
+ if (ret < 0)
+ goto check_failed;
+
+ clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &object->cookie->flags);
+
+ object->file = file;
+
+ /* Always update the atime on an object we've just looked up (this is
+ * used to keep track of culling, and atimes are only updated by read,
+ * write and readdir but not lookup or open).
+ */
+ touch_atime(&file->f_path);
+ dput(dentry);
+ return true;
+
+check_failed:
+ fscache_cookie_lookup_negative(object->cookie);
+ cachefiles_unmark_inode_in_use(object, file);
+ fput(file);
+ dput(dentry);
+ if (ret == -ESTALE)
+ return cachefiles_create_file(object);
+ return false;
+
+error_fput:
+ fput(file);
+error:
+ cachefiles_do_unmark_inode_in_use(object, d_inode(dentry));
+ dput(dentry);
+ return false;
+}
+
+/*
+ * walk from the parent object to the child object through the backing
+ * filesystem, creating directories as we go
+ */
+bool cachefiles_look_up_object(struct cachefiles_object *object)
+{
+ struct cachefiles_volume *volume = object->volume;
+ struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash];
+ int ret;
+
+ _enter("OBJ%x,%s,", object->debug_id, object->d_name);
+
+ /* Look up path "cache/vol/fanout/file". */
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ dentry = lookup_positive_unlocked(object->d_name, fan,
+ object->d_name_len);
+ else
+ dentry = ERR_PTR(ret);
+ trace_cachefiles_lookup(object, fan, dentry);
+ if (IS_ERR(dentry)) {
+ if (dentry == ERR_PTR(-ENOENT))
+ goto new_file;
+ if (dentry == ERR_PTR(-EIO))
+ cachefiles_io_error_obj(object, "Lookup failed");
+ return false;
+ }
+
+ if (!d_is_reg(dentry)) {
+ pr_err("%pd is not a file\n", dentry);
+ inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
+ ret = cachefiles_bury_object(volume->cache, object, fan, dentry,
+ FSCACHE_OBJECT_IS_WEIRD);
+ dput(dentry);
+ if (ret < 0)
+ return false;
+ goto new_file;
+ }
+
+ if (!cachefiles_open_file(object, dentry))
+ return false;
+
+ _leave(" = t [%lu]", file_inode(object->file)->i_ino);
+ return true;
+
+new_file:
+ fscache_cookie_lookup_negative(object->cookie);
+ return cachefiles_create_file(object);
+}
+
+/*
+ * Attempt to link a temporary file into its rightful place in the cache.
+ */
+bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
+ struct cachefiles_object *object)
+{
+ struct cachefiles_volume *volume = object->volume;
+ struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash];
+ bool success = false;
+ int ret;
+
+ _enter(",%pD", object->file);
+
+ inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ dentry = lookup_one_len(object->d_name, fan, object->d_name_len);
+ else
+ dentry = ERR_PTR(ret);
+ if (IS_ERR(dentry)) {
+ trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
+ cachefiles_trace_lookup_error);
+ _debug("lookup fail %ld", PTR_ERR(dentry));
+ goto out_unlock;
+ }
+
+ if (!d_is_negative(dentry)) {
+ if (d_backing_inode(dentry) == file_inode(object->file)) {
+ success = true;
+ goto out_dput;
+ }
+
+ ret = cachefiles_unlink(volume->cache, object, fan, dentry,
+ FSCACHE_OBJECT_IS_STALE);
+ if (ret < 0)
+ goto out_dput;
+
+ dput(dentry);
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ dentry = lookup_one_len(object->d_name, fan, object->d_name_len);
+ else
+ dentry = ERR_PTR(ret);
+ if (IS_ERR(dentry)) {
+ trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
+ cachefiles_trace_lookup_error);
+ _debug("lookup fail %ld", PTR_ERR(dentry));
+ goto out_unlock;
+ }
+ }
+
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ ret = vfs_link(object->file->f_path.dentry, &init_user_ns,
+ d_inode(fan), dentry, NULL);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(object, d_inode(fan), ret,
+ cachefiles_trace_link_error);
+ _debug("link fail %d", ret);
+ } else {
+ trace_cachefiles_link(object, file_inode(object->file));
+ spin_lock(&object->lock);
+ /* TODO: Do we want to switch the file pointer to the new dentry? */
+ clear_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags);
+ spin_unlock(&object->lock);
+ success = true;
+ }
+
+out_dput:
+ dput(dentry);
+out_unlock:
+ inode_unlock(d_inode(fan));
+ _leave(" = %u", success);
+ return success;
+}
+
+/*
+ * Look up an inode to be checked or culled. Return -EBUSY if the inode is
+ * marked in use.
+ */
+static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
+ struct dentry *dir,
+ char *filename)
+{
+ struct dentry *victim;
+ int ret = -ENOENT;
+
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+
+ victim = lookup_one_len(filename, dir, strlen(filename));
+ if (IS_ERR(victim))
+ goto lookup_error;
+ if (d_is_negative(victim))
+ goto lookup_put;
+ if (d_inode(victim)->i_flags & S_KERNEL_FILE)
+ goto lookup_busy;
+ return victim;
+
+lookup_busy:
+ ret = -EBUSY;
+lookup_put:
+ inode_unlock(d_inode(dir));
+ dput(victim);
+ return ERR_PTR(ret);
+
+lookup_error:
+ inode_unlock(d_inode(dir));
+ ret = PTR_ERR(victim);
+ if (ret == -ENOENT)
+ return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
+
+ if (ret == -EIO) {
+ cachefiles_io_error(cache, "Lookup failed");
+ } else if (ret != -ENOMEM) {
+ pr_err("Internal error: %d\n", ret);
+ ret = -EIO;
+ }
+
+ return ERR_PTR(ret);
+}
+
+/*
+ * Cull an object if it's not in use
+ * - called only by cache manager daemon
+ */
+int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
+ char *filename)
+{
+ struct dentry *victim;
+ struct inode *inode;
+ int ret;
+
+ _enter(",%pd/,%s", dir, filename);
+
+ victim = cachefiles_lookup_for_cull(cache, dir, filename);
+ if (IS_ERR(victim))
+ return PTR_ERR(victim);
+
+ /* check to see if someone is using this object */
+ inode = d_inode(victim);
+ inode_lock(inode);
+ if (inode->i_flags & S_KERNEL_FILE) {
+ ret = -EBUSY;
+ } else {
+ /* Stop the cache from picking it back up */
+ inode->i_flags |= S_KERNEL_FILE;
+ ret = 0;
+ }
+ inode_unlock(inode);
+ if (ret < 0)
+ goto error_unlock;
+
+ ret = cachefiles_bury_object(cache, NULL, dir, victim,
+ FSCACHE_OBJECT_WAS_CULLED);
+ if (ret < 0)
+ goto error;
+
+ fscache_count_culled();
+ dput(victim);
+ _leave(" = 0");
+ return 0;
+
+error_unlock:
+ inode_unlock(d_inode(dir));
+error:
+ dput(victim);
+ if (ret == -ENOENT)
+ return -ESTALE; /* Probably got retired by the netfs */
+
+ if (ret != -ENOMEM) {
+ pr_err("Internal error: %d\n", ret);
+ ret = -EIO;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Find out if an object is in use or not
+ * - called only by cache manager daemon
+ * - returns -EBUSY or 0 to indicate whether an object is in use or not
+ */
+int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
+ char *filename)
+{
+ struct dentry *victim;
+ int ret = 0;
+
+ victim = cachefiles_lookup_for_cull(cache, dir, filename);
+ if (IS_ERR(victim))
+ return PTR_ERR(victim);
+
+ inode_unlock(d_inode(dir));
+ dput(victim);
+ return ret;
+}
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
new file mode 100644
index 000000000..0254ed39f
--- /dev/null
+++ b/fs/cachefiles/ondemand.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/fdtable.h>
+#include <linux/anon_inodes.h>
+#include <linux/uio.h>
+#include "internal.h"
+
+static int cachefiles_ondemand_fd_release(struct inode *inode,
+ struct file *file)
+{
+ struct cachefiles_object *object = file->private_data;
+ struct cachefiles_cache *cache = object->volume->cache;
+ int object_id = object->ondemand_id;
+ struct cachefiles_req *req;
+ XA_STATE(xas, &cache->reqs, 0);
+
+ xa_lock(&cache->reqs);
+ object->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED;
+
+ /*
+ * Flush all pending READ requests since their completion depends on
+ * anon_fd.
+ */
+ xas_for_each(&xas, req, ULONG_MAX) {
+ if (req->msg.object_id == object_id &&
+ req->msg.opcode == CACHEFILES_OP_READ) {
+ req->error = -EIO;
+ complete(&req->done);
+ xas_store(&xas, NULL);
+ }
+ }
+ xa_unlock(&cache->reqs);
+
+ xa_erase(&cache->ondemand_ids, object_id);
+ trace_cachefiles_ondemand_fd_release(object, object_id);
+ cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd);
+ cachefiles_put_unbind_pincount(cache);
+ return 0;
+}
+
+static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb,
+ struct iov_iter *iter)
+{
+ struct cachefiles_object *object = kiocb->ki_filp->private_data;
+ struct cachefiles_cache *cache = object->volume->cache;
+ struct file *file = object->file;
+ size_t len = iter->count;
+ loff_t pos = kiocb->ki_pos;
+ const struct cred *saved_cred;
+ int ret;
+
+ if (!file)
+ return -ENOBUFS;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+ ret = __cachefiles_prepare_write(object, file, &pos, &len, true);
+ cachefiles_end_secure(cache, saved_cred);
+ if (ret < 0)
+ return ret;
+
+ trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len);
+ ret = __cachefiles_write(object, file, pos, iter, NULL, NULL);
+ if (!ret)
+ ret = len;
+
+ return ret;
+}
+
+static loff_t cachefiles_ondemand_fd_llseek(struct file *filp, loff_t pos,
+ int whence)
+{
+ struct cachefiles_object *object = filp->private_data;
+ struct file *file = object->file;
+
+ if (!file)
+ return -ENOBUFS;
+
+ return vfs_llseek(file, pos, whence);
+}
+
+static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct cachefiles_object *object = filp->private_data;
+ struct cachefiles_cache *cache = object->volume->cache;
+ struct cachefiles_req *req;
+ unsigned long id;
+
+ if (ioctl != CACHEFILES_IOC_READ_COMPLETE)
+ return -EINVAL;
+
+ if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags))
+ return -EOPNOTSUPP;
+
+ id = arg;
+ req = xa_erase(&cache->reqs, id);
+ if (!req)
+ return -EINVAL;
+
+ trace_cachefiles_ondemand_cread(object, id);
+ complete(&req->done);
+ return 0;
+}
+
+static const struct file_operations cachefiles_ondemand_fd_fops = {
+ .owner = THIS_MODULE,
+ .release = cachefiles_ondemand_fd_release,
+ .write_iter = cachefiles_ondemand_fd_write_iter,
+ .llseek = cachefiles_ondemand_fd_llseek,
+ .unlocked_ioctl = cachefiles_ondemand_fd_ioctl,
+};
+
+/*
+ * OPEN request Completion (copen)
+ * - command: "copen <id>,<cache_size>"
+ * <cache_size> indicates the object size if >=0, error code if negative
+ */
+int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
+{
+ struct cachefiles_req *req;
+ struct fscache_cookie *cookie;
+ char *pid, *psize;
+ unsigned long id;
+ long size;
+ int ret;
+
+ if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags))
+ return -EOPNOTSUPP;
+
+ if (!*args) {
+ pr_err("Empty id specified\n");
+ return -EINVAL;
+ }
+
+ pid = args;
+ psize = strchr(args, ',');
+ if (!psize) {
+ pr_err("Cache size is not specified\n");
+ return -EINVAL;
+ }
+
+ *psize = 0;
+ psize++;
+
+ ret = kstrtoul(pid, 0, &id);
+ if (ret)
+ return ret;
+
+ req = xa_erase(&cache->reqs, id);
+ if (!req)
+ return -EINVAL;
+
+ /* fail OPEN request if copen format is invalid */
+ ret = kstrtol(psize, 0, &size);
+ if (ret) {
+ req->error = ret;
+ goto out;
+ }
+
+ /* fail OPEN request if daemon reports an error */
+ if (size < 0) {
+ if (!IS_ERR_VALUE(size)) {
+ req->error = -EINVAL;
+ ret = -EINVAL;
+ } else {
+ req->error = size;
+ ret = 0;
+ }
+ goto out;
+ }
+
+ cookie = req->object->cookie;
+ cookie->object_size = size;
+ if (size)
+ clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags);
+ else
+ set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags);
+ trace_cachefiles_ondemand_copen(req->object, id, size);
+
+out:
+ complete(&req->done);
+ return ret;
+}
+
+static int cachefiles_ondemand_get_fd(struct cachefiles_req *req)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ struct cachefiles_open *load;
+ struct file *file;
+ u32 object_id;
+ int ret, fd;
+
+ object = cachefiles_grab_object(req->object,
+ cachefiles_obj_get_ondemand_fd);
+ cache = object->volume->cache;
+
+ ret = xa_alloc_cyclic(&cache->ondemand_ids, &object_id, NULL,
+ XA_LIMIT(1, INT_MAX),
+ &cache->ondemand_id_next, GFP_KERNEL);
+ if (ret < 0)
+ goto err;
+
+ fd = get_unused_fd_flags(O_WRONLY);
+ if (fd < 0) {
+ ret = fd;
+ goto err_free_id;
+ }
+
+ file = anon_inode_getfile("[cachefiles]", &cachefiles_ondemand_fd_fops,
+ object, O_WRONLY);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto err_put_fd;
+ }
+
+ file->f_mode |= FMODE_PWRITE | FMODE_LSEEK;
+ fd_install(fd, file);
+
+ load = (void *)req->msg.data;
+ load->fd = fd;
+ req->msg.object_id = object_id;
+ object->ondemand_id = object_id;
+
+ cachefiles_get_unbind_pincount(cache);
+ trace_cachefiles_ondemand_open(object, &req->msg, load);
+ return 0;
+
+err_put_fd:
+ put_unused_fd(fd);
+err_free_id:
+ xa_erase(&cache->ondemand_ids, object_id);
+err:
+ cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd);
+ return ret;
+}
+
+ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
+ char __user *_buffer, size_t buflen)
+{
+ struct cachefiles_req *req;
+ struct cachefiles_msg *msg;
+ unsigned long id = 0;
+ size_t n;
+ int ret = 0;
+ XA_STATE(xas, &cache->reqs, cache->req_id_next);
+
+ /*
+ * Cyclically search for a request that has not ever been processed,
+ * to prevent requests from being processed repeatedly, and make
+ * request distribution fair.
+ */
+ xa_lock(&cache->reqs);
+ req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW);
+ if (!req && cache->req_id_next > 0) {
+ xas_set(&xas, 0);
+ req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW);
+ }
+ if (!req) {
+ xa_unlock(&cache->reqs);
+ return 0;
+ }
+
+ msg = &req->msg;
+ n = msg->len;
+
+ if (n > buflen) {
+ xa_unlock(&cache->reqs);
+ return -EMSGSIZE;
+ }
+
+ xas_clear_mark(&xas, CACHEFILES_REQ_NEW);
+ cache->req_id_next = xas.xa_index + 1;
+ xa_unlock(&cache->reqs);
+
+ id = xas.xa_index;
+ msg->msg_id = id;
+
+ if (msg->opcode == CACHEFILES_OP_OPEN) {
+ ret = cachefiles_ondemand_get_fd(req);
+ if (ret)
+ goto error;
+ }
+
+ if (copy_to_user(_buffer, msg, n) != 0) {
+ ret = -EFAULT;
+ goto err_put_fd;
+ }
+
+ /* CLOSE request has no reply */
+ if (msg->opcode == CACHEFILES_OP_CLOSE) {
+ xa_erase(&cache->reqs, id);
+ complete(&req->done);
+ }
+
+ return n;
+
+err_put_fd:
+ if (msg->opcode == CACHEFILES_OP_OPEN)
+ close_fd(((struct cachefiles_open *)msg->data)->fd);
+error:
+ xa_erase(&cache->reqs, id);
+ req->error = ret;
+ complete(&req->done);
+ return ret;
+}
+
+typedef int (*init_req_fn)(struct cachefiles_req *req, void *private);
+
+static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
+ enum cachefiles_opcode opcode,
+ size_t data_len,
+ init_req_fn init_req,
+ void *private)
+{
+ struct cachefiles_cache *cache = object->volume->cache;
+ struct cachefiles_req *req;
+ XA_STATE(xas, &cache->reqs, 0);
+ int ret;
+
+ if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags))
+ return 0;
+
+ if (test_bit(CACHEFILES_DEAD, &cache->flags))
+ return -EIO;
+
+ req = kzalloc(sizeof(*req) + data_len, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ req->object = object;
+ init_completion(&req->done);
+ req->msg.opcode = opcode;
+ req->msg.len = sizeof(struct cachefiles_msg) + data_len;
+
+ ret = init_req(req, private);
+ if (ret)
+ goto out;
+
+ do {
+ /*
+ * Stop enqueuing the request when daemon is dying. The
+ * following two operations need to be atomic as a whole.
+ * 1) check cache state, and
+ * 2) enqueue request if cache is alive.
+ * Otherwise the request may be enqueued after xarray has been
+ * flushed, leaving the orphan request never being completed.
+ *
+ * CPU 1 CPU 2
+ * ===== =====
+ * test CACHEFILES_DEAD bit
+ * set CACHEFILES_DEAD bit
+ * flush requests in the xarray
+ * enqueue the request
+ */
+ xas_lock(&xas);
+
+ if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+ xas_unlock(&xas);
+ ret = -EIO;
+ goto out;
+ }
+
+ /* coupled with the barrier in cachefiles_flush_reqs() */
+ smp_mb();
+
+ if (opcode != CACHEFILES_OP_OPEN && object->ondemand_id <= 0) {
+ WARN_ON_ONCE(object->ondemand_id == 0);
+ xas_unlock(&xas);
+ ret = -EIO;
+ goto out;
+ }
+
+ xas.xa_index = 0;
+ xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART)
+ xas_set_err(&xas, -EBUSY);
+ xas_store(&xas, req);
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ xas_set_mark(&xas, CACHEFILES_REQ_NEW);
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ ret = xas_error(&xas);
+ if (ret)
+ goto out;
+
+ wake_up_all(&cache->daemon_pollwq);
+ wait_for_completion(&req->done);
+ ret = req->error;
+out:
+ kfree(req);
+ return ret;
+}
+
+static int cachefiles_ondemand_init_open_req(struct cachefiles_req *req,
+ void *private)
+{
+ struct cachefiles_object *object = req->object;
+ struct fscache_cookie *cookie = object->cookie;
+ struct fscache_volume *volume = object->volume->vcookie;
+ struct cachefiles_open *load = (void *)req->msg.data;
+ size_t volume_key_size, cookie_key_size;
+ void *volume_key, *cookie_key;
+
+ /*
+ * Volume key is a NUL-terminated string. key[0] stores strlen() of the
+ * string, followed by the content of the string (excluding '\0').
+ */
+ volume_key_size = volume->key[0] + 1;
+ volume_key = volume->key + 1;
+
+ /* Cookie key is binary data, which is netfs specific. */
+ cookie_key_size = cookie->key_len;
+ cookie_key = fscache_get_key(cookie);
+
+ if (!(object->cookie->advice & FSCACHE_ADV_WANT_CACHE_SIZE)) {
+ pr_err("WANT_CACHE_SIZE is needed for on-demand mode\n");
+ return -EINVAL;
+ }
+
+ load->volume_key_size = volume_key_size;
+ load->cookie_key_size = cookie_key_size;
+ memcpy(load->data, volume_key, volume_key_size);
+ memcpy(load->data + volume_key_size, cookie_key, cookie_key_size);
+
+ return 0;
+}
+
+static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req,
+ void *private)
+{
+ struct cachefiles_object *object = req->object;
+ int object_id = object->ondemand_id;
+
+ /*
+ * It's possible that object id is still 0 if the cookie looking up
+ * phase failed before OPEN request has ever been sent. Also avoid
+ * sending CLOSE request for CACHEFILES_ONDEMAND_ID_CLOSED, which means
+ * anon_fd has already been closed.
+ */
+ if (object_id <= 0)
+ return -ENOENT;
+
+ req->msg.object_id = object_id;
+ trace_cachefiles_ondemand_close(object, &req->msg);
+ return 0;
+}
+
+struct cachefiles_read_ctx {
+ loff_t off;
+ size_t len;
+};
+
+static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req,
+ void *private)
+{
+ struct cachefiles_object *object = req->object;
+ struct cachefiles_read *load = (void *)req->msg.data;
+ struct cachefiles_read_ctx *read_ctx = private;
+ int object_id = object->ondemand_id;
+
+ /* Stop enqueuing requests when daemon has closed anon_fd. */
+ if (object_id <= 0) {
+ WARN_ON_ONCE(object_id == 0);
+ pr_info_once("READ: anonymous fd closed prematurely.\n");
+ return -EIO;
+ }
+
+ req->msg.object_id = object_id;
+ load->off = read_ctx->off;
+ load->len = read_ctx->len;
+ trace_cachefiles_ondemand_read(object, &req->msg, load);
+ return 0;
+}
+
+int cachefiles_ondemand_init_object(struct cachefiles_object *object)
+{
+ struct fscache_cookie *cookie = object->cookie;
+ struct fscache_volume *volume = object->volume->vcookie;
+ size_t volume_key_size, cookie_key_size, data_len;
+
+ /*
+ * CacheFiles will firstly check the cache file under the root cache
+ * directory. If the coherency check failed, it will fallback to
+ * creating a new tmpfile as the cache file. Reuse the previously
+ * allocated object ID if any.
+ */
+ if (object->ondemand_id > 0)
+ return 0;
+
+ volume_key_size = volume->key[0] + 1;
+ cookie_key_size = cookie->key_len;
+ data_len = sizeof(struct cachefiles_open) +
+ volume_key_size + cookie_key_size;
+
+ return cachefiles_ondemand_send_req(object, CACHEFILES_OP_OPEN,
+ data_len, cachefiles_ondemand_init_open_req, NULL);
+}
+
+void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
+{
+ cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0,
+ cachefiles_ondemand_init_close_req, NULL);
+}
+
+int cachefiles_ondemand_read(struct cachefiles_object *object,
+ loff_t pos, size_t len)
+{
+ struct cachefiles_read_ctx read_ctx = {pos, len};
+
+ return cachefiles_ondemand_send_req(object, CACHEFILES_OP_READ,
+ sizeof(struct cachefiles_read),
+ cachefiles_ondemand_init_read_req, &read_ctx);
+}
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
new file mode 100644
index 000000000..fe777164f
--- /dev/null
+++ b/fs/cachefiles/security.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* CacheFiles security management
+ *
+ * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/cred.h>
+#include "internal.h"
+
+/*
+ * determine the security context within which we access the cache from within
+ * the kernel
+ */
+int cachefiles_get_security_ID(struct cachefiles_cache *cache)
+{
+ struct cred *new;
+ int ret;
+
+ _enter("{%s}", cache->secctx);
+
+ new = prepare_kernel_cred(current);
+ if (!new) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (cache->secctx) {
+ ret = set_security_override_from_ctx(new, cache->secctx);
+ if (ret < 0) {
+ put_cred(new);
+ pr_err("Security denies permission to nominate security context: error %d\n",
+ ret);
+ goto error;
+ }
+ }
+
+ cache->cache_cred = new;
+ ret = 0;
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * see if mkdir and create can be performed in the root directory
+ */
+static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
+ struct dentry *root)
+{
+ int ret;
+
+ ret = security_inode_mkdir(d_backing_inode(root), root, 0);
+ if (ret < 0) {
+ pr_err("Security denies permission to make dirs: error %d",
+ ret);
+ return ret;
+ }
+
+ ret = security_inode_create(d_backing_inode(root), root, 0);
+ if (ret < 0)
+ pr_err("Security denies permission to create files: error %d",
+ ret);
+
+ return ret;
+}
+
+/*
+ * check the security details of the on-disk cache
+ * - must be called with security override in force
+ * - must return with a security override in force - even in the case of an
+ * error
+ */
+int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
+ struct dentry *root,
+ const struct cred **_saved_cred)
+{
+ struct cred *new;
+ int ret;
+
+ _enter("");
+
+ /* duplicate the cache creds for COW (the override is currently in
+ * force, so we can use prepare_creds() to do this) */
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+ cachefiles_end_secure(cache, *_saved_cred);
+
+ /* use the cache root dir's security context as the basis with
+ * which create files */
+ ret = set_create_files_as(new, d_backing_inode(root));
+ if (ret < 0) {
+ abort_creds(new);
+ cachefiles_begin_secure(cache, _saved_cred);
+ _leave(" = %d [cfa]", ret);
+ return ret;
+ }
+
+ put_cred(cache->cache_cred);
+ cache->cache_cred = new;
+
+ cachefiles_begin_secure(cache, _saved_cred);
+ ret = cachefiles_check_cache_dir(cache, root);
+
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c
new file mode 100644
index 000000000..89df0ba8b
--- /dev/null
+++ b/fs/cachefiles/volume.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Volume handling.
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include "internal.h"
+#include <trace/events/fscache.h>
+
+/*
+ * Allocate and set up a volume representation. We make sure all the fanout
+ * directories are created and pinned.
+ */
+void cachefiles_acquire_volume(struct fscache_volume *vcookie)
+{
+ struct cachefiles_volume *volume;
+ struct cachefiles_cache *cache = vcookie->cache->cache_priv;
+ const struct cred *saved_cred;
+ struct dentry *vdentry, *fan;
+ size_t len;
+ char *name;
+ bool is_new = false;
+ int ret, n_accesses, i;
+
+ _enter("");
+
+ volume = kzalloc(sizeof(struct cachefiles_volume), GFP_KERNEL);
+ if (!volume)
+ return;
+ volume->vcookie = vcookie;
+ volume->cache = cache;
+ INIT_LIST_HEAD(&volume->cache_link);
+
+ cachefiles_begin_secure(cache, &saved_cred);
+
+ len = vcookie->key[0];
+ name = kmalloc(len + 3, GFP_NOFS);
+ if (!name)
+ goto error_vol;
+ name[0] = 'I';
+ memcpy(name + 1, vcookie->key + 1, len);
+ name[len + 1] = 0;
+
+retry:
+ vdentry = cachefiles_get_directory(cache, cache->store, name, &is_new);
+ if (IS_ERR(vdentry))
+ goto error_name;
+ volume->dentry = vdentry;
+
+ if (is_new) {
+ if (!cachefiles_set_volume_xattr(volume))
+ goto error_dir;
+ } else {
+ ret = cachefiles_check_volume_xattr(volume);
+ if (ret < 0) {
+ if (ret != -ESTALE)
+ goto error_dir;
+ inode_lock_nested(d_inode(cache->store), I_MUTEX_PARENT);
+ cachefiles_bury_object(cache, NULL, cache->store, vdentry,
+ FSCACHE_VOLUME_IS_WEIRD);
+ cachefiles_put_directory(volume->dentry);
+ cond_resched();
+ goto retry;
+ }
+ }
+
+ for (i = 0; i < 256; i++) {
+ sprintf(name, "@%02x", i);
+ fan = cachefiles_get_directory(cache, vdentry, name, NULL);
+ if (IS_ERR(fan))
+ goto error_fan;
+ volume->fanout[i] = fan;
+ }
+
+ cachefiles_end_secure(cache, saved_cred);
+
+ vcookie->cache_priv = volume;
+ n_accesses = atomic_inc_return(&vcookie->n_accesses); /* Stop wakeups on dec-to-0 */
+ trace_fscache_access_volume(vcookie->debug_id, 0,
+ refcount_read(&vcookie->ref),
+ n_accesses, fscache_access_cache_pin);
+
+ spin_lock(&cache->object_list_lock);
+ list_add(&volume->cache_link, &volume->cache->volumes);
+ spin_unlock(&cache->object_list_lock);
+
+ kfree(name);
+ return;
+
+error_fan:
+ for (i = 0; i < 256; i++)
+ cachefiles_put_directory(volume->fanout[i]);
+error_dir:
+ cachefiles_put_directory(volume->dentry);
+error_name:
+ kfree(name);
+error_vol:
+ kfree(volume);
+ cachefiles_end_secure(cache, saved_cred);
+}
+
+/*
+ * Release a volume representation.
+ */
+static void __cachefiles_free_volume(struct cachefiles_volume *volume)
+{
+ int i;
+
+ _enter("");
+
+ volume->vcookie->cache_priv = NULL;
+
+ for (i = 0; i < 256; i++)
+ cachefiles_put_directory(volume->fanout[i]);
+ cachefiles_put_directory(volume->dentry);
+ kfree(volume);
+}
+
+void cachefiles_free_volume(struct fscache_volume *vcookie)
+{
+ struct cachefiles_volume *volume = vcookie->cache_priv;
+
+ if (volume) {
+ spin_lock(&volume->cache->object_list_lock);
+ list_del_init(&volume->cache_link);
+ spin_unlock(&volume->cache->object_list_lock);
+ __cachefiles_free_volume(volume);
+ }
+}
+
+void cachefiles_withdraw_volume(struct cachefiles_volume *volume)
+{
+ fscache_withdraw_volume(volume->vcookie);
+ cachefiles_set_volume_xattr(volume);
+ __cachefiles_free_volume(volume);
+}
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
new file mode 100644
index 000000000..00b087c14
--- /dev/null
+++ b/fs/cachefiles/xattr.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* CacheFiles extended attribute management
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/xattr.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+#define CACHEFILES_COOKIE_TYPE_DATA 1
+
+struct cachefiles_xattr {
+ __be64 object_size; /* Actual size of the object */
+ __be64 zero_point; /* Size after which server has no data not written by us */
+ __u8 type; /* Type of object */
+ __u8 content; /* Content presence (enum cachefiles_content) */
+ __u8 data[]; /* netfs coherency data */
+} __packed;
+
+static const char cachefiles_xattr_cache[] =
+ XATTR_USER_PREFIX "CacheFiles.cache";
+
+struct cachefiles_vol_xattr {
+ __be32 reserved; /* Reserved, should be 0 */
+ __u8 data[]; /* netfs volume coherency data */
+} __packed;
+
+/*
+ * set the state xattr on a cache file
+ */
+int cachefiles_set_object_xattr(struct cachefiles_object *object)
+{
+ struct cachefiles_xattr *buf;
+ struct dentry *dentry;
+ struct file *file = object->file;
+ unsigned int len = object->cookie->aux_len;
+ int ret;
+
+ if (!file)
+ return -ESTALE;
+ dentry = file->f_path.dentry;
+
+ _enter("%x,#%d", object->debug_id, len);
+
+ buf = kmalloc(sizeof(struct cachefiles_xattr) + len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->object_size = cpu_to_be64(object->cookie->object_size);
+ buf->zero_point = 0;
+ buf->type = CACHEFILES_COOKIE_TYPE_DATA;
+ buf->content = object->content_info;
+ if (test_bit(FSCACHE_COOKIE_LOCAL_WRITE, &object->cookie->flags))
+ buf->content = CACHEFILES_CONTENT_DIRTY;
+ if (len > 0)
+ memcpy(buf->data, fscache_get_aux(object->cookie), len);
+
+ ret = cachefiles_inject_write_error();
+ if (ret == 0)
+ ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
+ buf, sizeof(struct cachefiles_xattr) + len, 0);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(object, file_inode(file), ret,
+ cachefiles_trace_setxattr_error);
+ trace_cachefiles_coherency(object, file_inode(file)->i_ino,
+ buf->content,
+ cachefiles_coherency_set_fail);
+ if (ret != -ENOMEM)
+ cachefiles_io_error_obj(
+ object,
+ "Failed to set xattr with error %d", ret);
+ } else {
+ trace_cachefiles_coherency(object, file_inode(file)->i_ino,
+ buf->content,
+ cachefiles_coherency_set_ok);
+ }
+
+ kfree(buf);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * check the consistency between the backing cache and the FS-Cache cookie
+ */
+int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file)
+{
+ struct cachefiles_xattr *buf;
+ struct dentry *dentry = file->f_path.dentry;
+ unsigned int len = object->cookie->aux_len, tlen;
+ const void *p = fscache_get_aux(object->cookie);
+ enum cachefiles_coherency_trace why;
+ ssize_t xlen;
+ int ret = -ESTALE;
+
+ tlen = sizeof(struct cachefiles_xattr) + len;
+ buf = kmalloc(tlen, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ xlen = cachefiles_inject_read_error();
+ if (xlen == 0)
+ xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, tlen);
+ if (xlen != tlen) {
+ if (xlen < 0)
+ trace_cachefiles_vfs_error(object, file_inode(file), xlen,
+ cachefiles_trace_getxattr_error);
+ if (xlen == -EIO)
+ cachefiles_io_error_obj(
+ object,
+ "Failed to read aux with error %zd", xlen);
+ why = cachefiles_coherency_check_xattr;
+ } else if (buf->type != CACHEFILES_COOKIE_TYPE_DATA) {
+ why = cachefiles_coherency_check_type;
+ } else if (memcmp(buf->data, p, len) != 0) {
+ why = cachefiles_coherency_check_aux;
+ } else if (be64_to_cpu(buf->object_size) != object->cookie->object_size) {
+ why = cachefiles_coherency_check_objsize;
+ } else if (buf->content == CACHEFILES_CONTENT_DIRTY) {
+ // TODO: Begin conflict resolution
+ pr_warn("Dirty object in cache\n");
+ why = cachefiles_coherency_check_dirty;
+ } else {
+ why = cachefiles_coherency_check_ok;
+ ret = 0;
+ }
+
+ trace_cachefiles_coherency(object, file_inode(file)->i_ino,
+ buf->content, why);
+ kfree(buf);
+ return ret;
+}
+
+/*
+ * remove the object's xattr to mark it stale
+ */
+int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
+ struct cachefiles_object *object,
+ struct dentry *dentry)
+{
+ int ret;
+
+ ret = cachefiles_inject_remove_error();
+ if (ret == 0)
+ ret = vfs_removexattr(&init_user_ns, dentry, cachefiles_xattr_cache);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(object, d_inode(dentry), ret,
+ cachefiles_trace_remxattr_error);
+ if (ret == -ENOENT || ret == -ENODATA)
+ ret = 0;
+ else if (ret != -ENOMEM)
+ cachefiles_io_error(cache,
+ "Can't remove xattr from %lu"
+ " (error %d)",
+ d_backing_inode(dentry)->i_ino, -ret);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Stick a marker on the cache object to indicate that it's dirty.
+ */
+void cachefiles_prepare_to_write(struct fscache_cookie *cookie)
+{
+ const struct cred *saved_cred;
+ struct cachefiles_object *object = cookie->cache_priv;
+ struct cachefiles_cache *cache = object->volume->cache;
+
+ _enter("c=%08x", object->cookie->debug_id);
+
+ if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
+ cachefiles_begin_secure(cache, &saved_cred);
+ cachefiles_set_object_xattr(object);
+ cachefiles_end_secure(cache, saved_cred);
+ }
+}
+
+/*
+ * Set the state xattr on a volume directory.
+ */
+bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
+{
+ struct cachefiles_vol_xattr *buf;
+ unsigned int len = volume->vcookie->coherency_len;
+ const void *p = volume->vcookie->coherency;
+ struct dentry *dentry = volume->dentry;
+ int ret;
+
+ _enter("%x,#%d", volume->vcookie->debug_id, len);
+
+ len += sizeof(*buf);
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ return false;
+ buf->reserved = cpu_to_be32(0);
+ memcpy(buf->data, p, volume->vcookie->coherency_len);
+
+ ret = cachefiles_inject_write_error();
+ if (ret == 0)
+ ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
+ buf, len, 0);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret,
+ cachefiles_trace_setxattr_error);
+ trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino,
+ cachefiles_coherency_vol_set_fail);
+ if (ret != -ENOMEM)
+ cachefiles_io_error(
+ volume->cache, "Failed to set xattr with error %d", ret);
+ } else {
+ trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino,
+ cachefiles_coherency_vol_set_ok);
+ }
+
+ kfree(buf);
+ _leave(" = %d", ret);
+ return ret == 0;
+}
+
+/*
+ * Check the consistency between the backing cache and the volume cookie.
+ */
+int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
+{
+ struct cachefiles_vol_xattr *buf;
+ struct dentry *dentry = volume->dentry;
+ unsigned int len = volume->vcookie->coherency_len;
+ const void *p = volume->vcookie->coherency;
+ enum cachefiles_coherency_trace why;
+ ssize_t xlen;
+ int ret = -ESTALE;
+
+ _enter("");
+
+ len += sizeof(*buf);
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ xlen = cachefiles_inject_read_error();
+ if (xlen == 0)
+ xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, len);
+ if (xlen != len) {
+ if (xlen < 0) {
+ trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen,
+ cachefiles_trace_getxattr_error);
+ if (xlen == -EIO)
+ cachefiles_io_error(
+ volume->cache,
+ "Failed to read xattr with error %zd", xlen);
+ }
+ why = cachefiles_coherency_vol_check_xattr;
+ } else if (buf->reserved != cpu_to_be32(0)) {
+ why = cachefiles_coherency_vol_check_resv;
+ } else if (memcmp(buf->data, p, len - sizeof(*buf)) != 0) {
+ why = cachefiles_coherency_vol_check_cmp;
+ } else {
+ why = cachefiles_coherency_vol_check_ok;
+ ret = 0;
+ }
+
+ trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, why);
+ kfree(buf);
+ _leave(" = %d", ret);
+ return ret;
+}