summaryrefslogtreecommitdiffstats
path: root/fs/afs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:49:45 +0000
commit2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch)
tree848558de17fb3008cdf4d861b01ac7781903ce39 /fs/afs
parentInitial commit. (diff)
downloadlinux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz
linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--fs/afs/Kconfig43
-rw-r--r--fs/afs/Makefile41
-rw-r--r--fs/afs/addr_list.c404
-rw-r--r--fs/afs/afs.h205
-rw-r--r--fs/afs/afs_cm.h29
-rw-r--r--fs/afs/afs_fs.h62
-rw-r--r--fs/afs/afs_vl.h146
-rw-r--r--fs/afs/callback.c228
-rw-r--r--fs/afs/cell.c954
-rw-r--r--fs/afs/cmservice.c672
-rw-r--r--fs/afs/dir.c2053
-rw-r--r--fs/afs/dir_edit.c493
-rw-r--r--fs/afs/dir_silly.c282
-rw-r--r--fs/afs/dynroot.c397
-rw-r--r--fs/afs/file.c598
-rw-r--r--fs/afs/flock.c877
-rw-r--r--fs/afs/fs_operation.c259
-rw-r--r--fs/afs/fs_probe.c478
-rw-r--r--fs/afs/fsclient.c2083
-rw-r--r--fs/afs/inode.c972
-rw-r--r--fs/afs/internal.h1785
-rw-r--r--fs/afs/main.c244
-rw-r--r--fs/afs/misc.c174
-rw-r--r--fs/afs/mntpt.c219
-rw-r--r--fs/afs/proc.c705
-rw-r--r--fs/afs/protocol_afs.h15
-rw-r--r--fs/afs/protocol_uae.h132
-rw-r--r--fs/afs/protocol_yfs.h176
-rw-r--r--fs/afs/rotate.c518
-rw-r--r--fs/afs/rxrpc.c938
-rw-r--r--fs/afs/security.c487
-rw-r--r--fs/afs/server.c725
-rw-r--r--fs/afs/server_list.c129
-rw-r--r--fs/afs/super.c778
-rw-r--r--fs/afs/vl_alias.c383
-rw-r--r--fs/afs/vl_list.c328
-rw-r--r--fs/afs/vl_probe.c292
-rw-r--r--fs/afs/vl_rotate.c358
-rw-r--r--fs/afs/vlclient.c759
-rw-r--r--fs/afs/volume.c452
-rw-r--r--fs/afs/write.c1040
-rw-r--r--fs/afs/xattr.c363
-rw-r--r--fs/afs/xdr_fs.h116
-rw-r--r--fs/afs/yfsclient.c1953
44 files changed, 24345 insertions, 0 deletions
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
new file mode 100644
index 000000000..fc8ba9142
--- /dev/null
+++ b/fs/afs/Kconfig
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config AFS_FS
+ tristate "Andrew File System support (AFS)"
+ depends on INET
+ select AF_RXRPC
+ select DNS_RESOLVER
+ select NETFS_SUPPORT
+ help
+ If you say Y here, you will get an experimental Andrew File System
+ driver. It currently only supports unsecured read-only AFS access.
+
+ See <file:Documentation/filesystems/afs.rst> for more information.
+
+ If unsure, say N.
+
+config AFS_DEBUG
+ bool "AFS dynamic debugging"
+ depends on AFS_FS
+ help
+ Say Y here to make runtime controllable debugging messages appear.
+
+ See <file:Documentation/filesystems/afs.rst> for more information.
+
+ If unsure, say N.
+
+config AFS_FSCACHE
+ bool "Provide AFS client caching support"
+ depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y
+ help
+ Say Y here if you want AFS data to be cached locally on disk through
+ the generic filesystem cache manager
+
+config AFS_DEBUG_CURSOR
+ bool "AFS server cursor debugging"
+ depends on AFS_FS
+ help
+ Say Y here to cause the contents of a server cursor to be dumped to
+ the dmesg log if the server rotation algorithm fails to successfully
+ contact a server.
+
+ See <file:Documentation/filesystems/afs.rst> for more information.
+
+ If unsure, say N.
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
new file mode 100644
index 000000000..e8956b65d
--- /dev/null
+++ b/fs/afs/Makefile
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Red Hat Linux AFS client.
+#
+
+kafs-y := \
+ addr_list.o \
+ callback.o \
+ cell.o \
+ cmservice.o \
+ dir.o \
+ dir_edit.o \
+ dir_silly.o \
+ dynroot.o \
+ file.o \
+ flock.o \
+ fsclient.o \
+ fs_operation.o \
+ fs_probe.o \
+ inode.o \
+ main.o \
+ misc.o \
+ mntpt.o \
+ rotate.o \
+ rxrpc.o \
+ security.o \
+ server.o \
+ server_list.o \
+ super.o \
+ vlclient.o \
+ vl_alias.o \
+ vl_list.o \
+ vl_probe.o \
+ vl_rotate.o \
+ volume.o \
+ write.o \
+ xattr.o \
+ yfsclient.o
+
+kafs-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_AFS_FS) := kafs.o
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
new file mode 100644
index 000000000..de1ae0bea
--- /dev/null
+++ b/fs/afs/addr_list.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Server address list management
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/dns_resolver.h>
+#include <linux/inet.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+#include "afs_fs.h"
+
+/*
+ * Release an address list.
+ */
+void afs_put_addrlist(struct afs_addr_list *alist)
+{
+ if (alist && refcount_dec_and_test(&alist->usage))
+ kfree_rcu(alist, rcu);
+}
+
+/*
+ * Allocate an address list.
+ */
+struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
+ unsigned short service,
+ unsigned short port)
+{
+ struct afs_addr_list *alist;
+ unsigned int i;
+
+ _enter("%u,%u,%u", nr, service, port);
+
+ if (nr > AFS_MAX_ADDRESSES)
+ nr = AFS_MAX_ADDRESSES;
+
+ alist = kzalloc(struct_size(alist, addrs, nr), GFP_KERNEL);
+ if (!alist)
+ return NULL;
+
+ refcount_set(&alist->usage, 1);
+ alist->max_addrs = nr;
+
+ for (i = 0; i < nr; i++) {
+ struct sockaddr_rxrpc *srx = &alist->addrs[i];
+ srx->srx_family = AF_RXRPC;
+ srx->srx_service = service;
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin6);
+ srx->transport.sin6.sin6_family = AF_INET6;
+ srx->transport.sin6.sin6_port = htons(port);
+ }
+
+ return alist;
+}
+
+/*
+ * Parse a text string consisting of delimited addresses.
+ */
+struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
+ const char *text, size_t len,
+ char delim,
+ unsigned short service,
+ unsigned short port)
+{
+ struct afs_vlserver_list *vllist;
+ struct afs_addr_list *alist;
+ const char *p, *end = text + len;
+ const char *problem;
+ unsigned int nr = 0;
+ int ret = -ENOMEM;
+
+ _enter("%*.*s,%c", (int)len, (int)len, text, delim);
+
+ if (!len) {
+ _leave(" = -EDESTADDRREQ [empty]");
+ return ERR_PTR(-EDESTADDRREQ);
+ }
+
+ if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
+ delim = ',';
+
+ /* Count the addresses */
+ p = text;
+ do {
+ if (!*p) {
+ problem = "nul";
+ goto inval;
+ }
+ if (*p == delim)
+ continue;
+ nr++;
+ if (*p == '[') {
+ p++;
+ if (p == end) {
+ problem = "brace1";
+ goto inval;
+ }
+ p = memchr(p, ']', end - p);
+ if (!p) {
+ problem = "brace2";
+ goto inval;
+ }
+ p++;
+ if (p >= end)
+ break;
+ }
+
+ p = memchr(p, delim, end - p);
+ if (!p)
+ break;
+ p++;
+ } while (p < end);
+
+ _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
+
+ vllist = afs_alloc_vlserver_list(1);
+ if (!vllist)
+ return ERR_PTR(-ENOMEM);
+
+ vllist->nr_servers = 1;
+ vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT);
+ if (!vllist->servers[0].server)
+ goto error_vl;
+
+ alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
+ if (!alist)
+ goto error;
+
+ /* Extract the addresses */
+ p = text;
+ do {
+ const char *q, *stop;
+ unsigned int xport = port;
+ __be32 x[4];
+ int family;
+
+ if (*p == delim) {
+ p++;
+ continue;
+ }
+
+ if (*p == '[') {
+ p++;
+ q = memchr(p, ']', end - p);
+ } else {
+ for (q = p; q < end; q++)
+ if (*q == '+' || *q == delim)
+ break;
+ }
+
+ if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) {
+ family = AF_INET;
+ } else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) {
+ family = AF_INET6;
+ } else {
+ problem = "family";
+ goto bad_address;
+ }
+
+ p = q;
+ if (stop != p) {
+ problem = "nostop";
+ goto bad_address;
+ }
+
+ if (q < end && *q == ']')
+ p++;
+
+ if (p < end) {
+ if (*p == '+') {
+ /* Port number specification "+1234" */
+ xport = 0;
+ p++;
+ if (p >= end || !isdigit(*p)) {
+ problem = "port";
+ goto bad_address;
+ }
+ do {
+ xport *= 10;
+ xport += *p - '0';
+ if (xport > 65535) {
+ problem = "pval";
+ goto bad_address;
+ }
+ p++;
+ } while (p < end && isdigit(*p));
+ } else if (*p == delim) {
+ p++;
+ } else {
+ problem = "weird";
+ goto bad_address;
+ }
+ }
+
+ if (family == AF_INET)
+ afs_merge_fs_addr4(alist, x[0], xport);
+ else
+ afs_merge_fs_addr6(alist, x, xport);
+
+ } while (p < end);
+
+ rcu_assign_pointer(vllist->servers[0].server->addresses, alist);
+ _leave(" = [nr %u]", alist->nr_addrs);
+ return vllist;
+
+inval:
+ _leave(" = -EINVAL [%s %zu %*.*s]",
+ problem, p - text, (int)len, (int)len, text);
+ return ERR_PTR(-EINVAL);
+bad_address:
+ _leave(" = -EINVAL [%s %zu %*.*s]",
+ problem, p - text, (int)len, (int)len, text);
+ ret = -EINVAL;
+error:
+ afs_put_addrlist(alist);
+error_vl:
+ afs_put_vlserverlist(net, vllist);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Compare old and new address lists to see if there's been any change.
+ * - How to do this in better than O(Nlog(N)) time?
+ * - We don't really want to sort the address list, but would rather take the
+ * list as we got it so as not to undo record rotation by the DNS server.
+ */
+#if 0
+static int afs_cmp_addr_list(const struct afs_addr_list *a1,
+ const struct afs_addr_list *a2)
+{
+}
+#endif
+
+/*
+ * Perform a DNS query for VL servers and build a up an address list.
+ */
+struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+{
+ struct afs_vlserver_list *vllist;
+ char *result = NULL;
+ int ret;
+
+ _enter("%s", cell->name);
+
+ ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len,
+ "srv=1", &result, _expiry, true);
+ if (ret < 0) {
+ _leave(" = %d [dns]", ret);
+ return ERR_PTR(ret);
+ }
+
+ if (*_expiry == 0)
+ *_expiry = ktime_get_real_seconds() + 60;
+
+ if (ret > 1 && result[0] == 0)
+ vllist = afs_extract_vlserver_list(cell, result, ret);
+ else
+ vllist = afs_parse_text_addrs(cell->net, result, ret, ',',
+ VL_SERVICE, AFS_VL_PORT);
+ kfree(result);
+ if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM))
+ pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist));
+
+ return vllist;
+}
+
+/*
+ * Merge an IPv4 entry into a fileserver address list.
+ */
+void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
+{
+ struct sockaddr_rxrpc *srx;
+ u32 addr = ntohl(xdr);
+ int i;
+
+ if (alist->nr_addrs >= alist->max_addrs)
+ return;
+
+ for (i = 0; i < alist->nr_ipv4; i++) {
+ struct sockaddr_in *a = &alist->addrs[i].transport.sin;
+ u32 a_addr = ntohl(a->sin_addr.s_addr);
+ u16 a_port = ntohs(a->sin_port);
+
+ if (addr == a_addr && port == a_port)
+ return;
+ if (addr == a_addr && port < a_port)
+ break;
+ if (addr < a_addr)
+ break;
+ }
+
+ if (i < alist->nr_addrs)
+ memmove(alist->addrs + i + 1,
+ alist->addrs + i,
+ sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
+
+ srx = &alist->addrs[i];
+ srx->srx_family = AF_RXRPC;
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.sin.sin_family = AF_INET;
+ srx->transport.sin.sin_port = htons(port);
+ srx->transport.sin.sin_addr.s_addr = xdr;
+ alist->nr_ipv4++;
+ alist->nr_addrs++;
+}
+
+/*
+ * Merge an IPv6 entry into a fileserver address list.
+ */
+void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
+{
+ struct sockaddr_rxrpc *srx;
+ int i, diff;
+
+ if (alist->nr_addrs >= alist->max_addrs)
+ return;
+
+ for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+ struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6;
+ u16 a_port = ntohs(a->sin6_port);
+
+ diff = memcmp(xdr, &a->sin6_addr, 16);
+ if (diff == 0 && port == a_port)
+ return;
+ if (diff == 0 && port < a_port)
+ break;
+ if (diff < 0)
+ break;
+ }
+
+ if (i < alist->nr_addrs)
+ memmove(alist->addrs + i + 1,
+ alist->addrs + i,
+ sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
+
+ srx = &alist->addrs[i];
+ srx->srx_family = AF_RXRPC;
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin6);
+ srx->transport.sin6.sin6_family = AF_INET6;
+ srx->transport.sin6.sin6_port = htons(port);
+ memcpy(&srx->transport.sin6.sin6_addr, xdr, 16);
+ alist->nr_addrs++;
+}
+
+/*
+ * Get an address to try.
+ */
+bool afs_iterate_addresses(struct afs_addr_cursor *ac)
+{
+ unsigned long set, failed;
+ int index;
+
+ if (!ac->alist)
+ return false;
+
+ set = ac->alist->responded;
+ failed = ac->alist->failed;
+ _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
+
+ ac->nr_iterations++;
+
+ set &= ~(failed | ac->tried);
+
+ if (!set)
+ return false;
+
+ index = READ_ONCE(ac->alist->preferred);
+ if (test_bit(index, &set))
+ goto selected;
+
+ index = __ffs(set);
+
+selected:
+ ac->index = index;
+ set_bit(index, &ac->tried);
+ ac->responded = false;
+ return true;
+}
+
+/*
+ * Release an address list cursor.
+ */
+int afs_end_cursor(struct afs_addr_cursor *ac)
+{
+ struct afs_addr_list *alist;
+
+ alist = ac->alist;
+ if (alist) {
+ if (ac->responded &&
+ ac->index != alist->preferred &&
+ test_bit(ac->alist->preferred, &ac->tried))
+ WRITE_ONCE(alist->preferred, ac->index);
+ afs_put_addrlist(alist);
+ ac->alist = NULL;
+ }
+
+ return ac->error;
+}
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
new file mode 100644
index 000000000..432cb4b23
--- /dev/null
+++ b/fs/afs/afs.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AFS common types
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef AFS_H
+#define AFS_H
+
+#include <linux/in.h>
+
+#define AFS_MAXCELLNAME 256 /* Maximum length of a cell name */
+#define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */
+#define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */
+#define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */
+#define AFS_MAXTYPES 3 /* Maximum number of volume types */
+#define AFSNAMEMAX 256 /* Maximum length of a filename plus NUL */
+#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */
+#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */
+
+#define AFS_VL_MAX_LIFESPAN (120 * HZ)
+#define AFS_PROBE_MAX_LIFESPAN (30 * HZ)
+
+typedef u64 afs_volid_t;
+typedef u64 afs_vnodeid_t;
+typedef u64 afs_dataversion_t;
+
+typedef enum {
+ AFSVL_RWVOL, /* read/write volume */
+ AFSVL_ROVOL, /* read-only volume */
+ AFSVL_BACKVOL, /* backup volume */
+} __attribute__((packed)) afs_voltype_t;
+
+typedef enum {
+ AFS_FTYPE_INVALID = 0,
+ AFS_FTYPE_FILE = 1,
+ AFS_FTYPE_DIR = 2,
+ AFS_FTYPE_SYMLINK = 3,
+} afs_file_type_t;
+
+typedef enum {
+ AFS_LOCK_READ = 0, /* read lock request */
+ AFS_LOCK_WRITE = 1, /* write lock request */
+} afs_lock_type_t;
+
+#define AFS_LOCKWAIT (5 * 60) /* time until a lock times out (seconds) */
+
+/*
+ * AFS file identifier
+ */
+struct afs_fid {
+ afs_volid_t vid; /* volume ID */
+ afs_vnodeid_t vnode; /* Lower 64-bits of file index within volume */
+ u32 vnode_hi; /* Upper 32-bits of file index */
+ u32 unique; /* unique ID number (file index version) */
+};
+
+/*
+ * AFS callback notification
+ */
+typedef enum {
+ AFSCM_CB_UNTYPED = 0, /* no type set on CB break */
+ AFSCM_CB_EXCLUSIVE = 1, /* CB exclusive to CM [not implemented] */
+ AFSCM_CB_SHARED = 2, /* CB shared by other CM's */
+ AFSCM_CB_DROPPED = 3, /* CB promise cancelled by file server */
+} afs_callback_type_t;
+
+struct afs_callback {
+ time64_t expires_at; /* Time at which expires */
+ //unsigned version; /* Callback version */
+ //afs_callback_type_t type; /* Type of callback */
+};
+
+struct afs_callback_break {
+ struct afs_fid fid; /* File identifier */
+ //struct afs_callback cb; /* Callback details */
+};
+
+#define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */
+
+struct afs_uuid {
+ __be32 time_low; /* low part of timestamp */
+ __be16 time_mid; /* mid part of timestamp */
+ __be16 time_hi_and_version; /* high part of timestamp and version */
+ __s8 clock_seq_hi_and_reserved; /* clock seq hi and variant */
+ __s8 clock_seq_low; /* clock seq low */
+ __s8 node[6]; /* spatially unique node ID (MAC addr) */
+};
+
+/*
+ * AFS volume information
+ */
+struct afs_volume_info {
+ afs_volid_t vid; /* volume ID */
+ afs_voltype_t type; /* type of this volume */
+ afs_volid_t type_vids[5]; /* volume ID's for possible types for this vol */
+
+ /* list of fileservers serving this volume */
+ size_t nservers; /* number of entries used in servers[] */
+ struct {
+ struct in_addr addr; /* fileserver address */
+ } servers[8];
+};
+
+/*
+ * AFS security ACE access mask
+ */
+typedef u32 afs_access_t;
+#define AFS_ACE_READ 0x00000001U /* - permission to read a file/dir */
+#define AFS_ACE_WRITE 0x00000002U /* - permission to write/chmod a file */
+#define AFS_ACE_INSERT 0x00000004U /* - permission to create dirent in a dir */
+#define AFS_ACE_LOOKUP 0x00000008U /* - permission to lookup a file/dir in a dir */
+#define AFS_ACE_DELETE 0x00000010U /* - permission to delete a dirent from a dir */
+#define AFS_ACE_LOCK 0x00000020U /* - permission to lock a file */
+#define AFS_ACE_ADMINISTER 0x00000040U /* - permission to change ACL */
+#define AFS_ACE_USER_A 0x01000000U /* - 'A' user-defined permission */
+#define AFS_ACE_USER_B 0x02000000U /* - 'B' user-defined permission */
+#define AFS_ACE_USER_C 0x04000000U /* - 'C' user-defined permission */
+#define AFS_ACE_USER_D 0x08000000U /* - 'D' user-defined permission */
+#define AFS_ACE_USER_E 0x10000000U /* - 'E' user-defined permission */
+#define AFS_ACE_USER_F 0x20000000U /* - 'F' user-defined permission */
+#define AFS_ACE_USER_G 0x40000000U /* - 'G' user-defined permission */
+#define AFS_ACE_USER_H 0x80000000U /* - 'H' user-defined permission */
+
+/*
+ * AFS file status information
+ */
+struct afs_file_status {
+ u64 size; /* file size */
+ afs_dataversion_t data_version; /* current data version */
+ struct timespec64 mtime_client; /* Last time client changed data */
+ struct timespec64 mtime_server; /* Last time server changed data */
+ s64 author; /* author ID */
+ s64 owner; /* owner ID */
+ s64 group; /* group ID */
+ afs_access_t caller_access; /* access rights for authenticated caller */
+ afs_access_t anon_access; /* access rights for unauthenticated caller */
+ umode_t mode; /* UNIX mode */
+ afs_file_type_t type; /* file type */
+ u32 nlink; /* link count */
+ s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
+ u32 abort_code; /* Abort if bulk-fetching this failed */
+};
+
+struct afs_status_cb {
+ struct afs_file_status status;
+ struct afs_callback callback;
+ bool have_status; /* True if status record was retrieved */
+ bool have_cb; /* True if cb record was retrieved */
+ bool have_error; /* True if status.abort_code indicates an error */
+};
+
+/*
+ * AFS file status change request
+ */
+
+#define AFS_SET_MTIME 0x01 /* set the mtime */
+#define AFS_SET_OWNER 0x02 /* set the owner ID */
+#define AFS_SET_GROUP 0x04 /* set the group ID (unsupported?) */
+#define AFS_SET_MODE 0x08 /* set the UNIX mode */
+#define AFS_SET_SEG_SIZE 0x10 /* set the segment size (unsupported) */
+
+/*
+ * AFS volume synchronisation information
+ */
+struct afs_volsync {
+ time64_t creation; /* volume creation time */
+};
+
+/*
+ * AFS volume status record
+ */
+struct afs_volume_status {
+ afs_volid_t vid; /* volume ID */
+ afs_volid_t parent_id; /* parent volume ID */
+ u8 online; /* true if volume currently online and available */
+ u8 in_service; /* true if volume currently in service */
+ u8 blessed; /* same as in_service */
+ u8 needs_salvage; /* true if consistency checking required */
+ u32 type; /* volume type (afs_voltype_t) */
+ u64 min_quota; /* minimum space set aside (blocks) */
+ u64 max_quota; /* maximum space this volume may occupy (blocks) */
+ u64 blocks_in_use; /* space this volume currently occupies (blocks) */
+ u64 part_blocks_avail; /* space available in volume's partition */
+ u64 part_max_blocks; /* size of volume's partition */
+ s64 vol_copy_date;
+ s64 vol_backup_date;
+};
+
+#define AFS_BLOCK_SIZE 1024
+
+/*
+ * XDR encoding of UUID in AFS.
+ */
+struct afs_uuid__xdr {
+ __be32 time_low;
+ __be32 time_mid;
+ __be32 time_hi_and_version;
+ __be32 clock_seq_hi_and_reserved;
+ __be32 clock_seq_low;
+ __be32 node[6];
+};
+
+#endif /* AFS_H */
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
new file mode 100644
index 000000000..565cbe0a8
--- /dev/null
+++ b/fs/afs/afs_cm.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AFS Cache Manager definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef AFS_CM_H
+#define AFS_CM_H
+
+#define AFS_CM_PORT 7001 /* AFS file server port */
+#define CM_SERVICE 1 /* AFS File Service ID */
+
+enum AFS_CM_Operations {
+ CBCallBack = 204, /* break callback promises */
+ CBInitCallBackState = 205, /* initialise callback state */
+ CBProbe = 206, /* probe client */
+ CBGetLock = 207, /* get contents of CM lock table */
+ CBGetCE = 208, /* get cache file description */
+ CBGetXStatsVersion = 209, /* get version of extended statistics */
+ CBGetXStats = 210, /* get contents of extended statistics data */
+ CBInitCallBackState3 = 213, /* initialise callback state, version 3 */
+ CBProbeUuid = 214, /* check the client hasn't rebooted */
+ CBTellMeAboutYourself = 65538, /* get client capabilities */
+};
+
+#define AFS_CAP_ERROR_TRANSLATION 0x1
+
+#endif /* AFS_FS_H */
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
new file mode 100644
index 000000000..20ab344ba
--- /dev/null
+++ b/fs/afs/afs_fs.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AFS File Service definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef AFS_FS_H
+#define AFS_FS_H
+
+#define AFS_FS_PORT 7000 /* AFS file server port */
+#define FS_SERVICE 1 /* AFS File Service ID */
+
+enum AFS_FS_Operations {
+ FSFETCHDATA = 130, /* AFS Fetch file data */
+ FSFETCHACL = 131, /* AFS Fetch file ACL */
+ FSFETCHSTATUS = 132, /* AFS Fetch file status */
+ FSSTOREDATA = 133, /* AFS Store file data */
+ FSSTOREACL = 134, /* AFS Store file ACL */
+ FSSTORESTATUS = 135, /* AFS Store file status */
+ FSREMOVEFILE = 136, /* AFS Remove a file */
+ FSCREATEFILE = 137, /* AFS Create a file */
+ FSRENAME = 138, /* AFS Rename or move a file or directory */
+ FSSYMLINK = 139, /* AFS Create a symbolic link */
+ FSLINK = 140, /* AFS Create a hard link */
+ FSMAKEDIR = 141, /* AFS Create a directory */
+ FSREMOVEDIR = 142, /* AFS Remove a directory */
+ FSGIVEUPCALLBACKS = 147, /* AFS Discard callback promises */
+ FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */
+ FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */
+ FSGETROOTVOLUME = 151, /* AFS Get root volume name */
+ FSBULKSTATUS = 155, /* AFS Fetch multiple file statuses */
+ FSSETLOCK = 156, /* AFS Request a file lock */
+ FSEXTENDLOCK = 157, /* AFS Extend a file lock */
+ FSRELEASELOCK = 158, /* AFS Release a file lock */
+ FSLOOKUP = 161, /* AFS lookup file in directory */
+ FSINLINEBULKSTATUS = 65536, /* AFS Fetch multiple file statuses with inline errors */
+ FSFETCHDATA64 = 65537, /* AFS Fetch file data */
+ FSSTOREDATA64 = 65538, /* AFS Store file data */
+ FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */
+ FSGETCAPABILITIES = 65540, /* Probe and get the capabilities of a fileserver */
+};
+
+enum AFS_FS_Errors {
+ VRESTARTING = -100, /* Server is restarting */
+ VSALVAGE = 101, /* volume needs salvaging */
+ VNOVNODE = 102, /* no such file/dir (vnode) */
+ VNOVOL = 103, /* no such volume or volume unavailable */
+ VVOLEXISTS = 104, /* volume name already exists */
+ VNOSERVICE = 105, /* volume not currently in service */
+ VOFFLINE = 106, /* volume is currently offline (more info available [VVL-spec]) */
+ VONLINE = 107, /* volume is already online */
+ VDISKFULL = 108, /* disk partition is full */
+ VOVERQUOTA = 109, /* volume's maximum quota exceeded */
+ VBUSY = 110, /* volume is temporarily unavailable */
+ VMOVED = 111, /* volume moved to new server - ask this FS where */
+ VIO = 112, /* I/O error in volume */
+ VSALVAGING = 113, /* Volume is being salvaged */
+ VRESTRICTED = 120, /* Volume is restricted from using */
+};
+
+#endif /* AFS_FS_H */
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
new file mode 100644
index 000000000..9c65ffb8a
--- /dev/null
+++ b/fs/afs/afs_vl.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AFS Volume Location Service client interface
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef AFS_VL_H
+#define AFS_VL_H
+
+#include "afs.h"
+
+#define AFS_VL_PORT 7003 /* volume location service port */
+#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */
+#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */
+
+enum AFSVL_Operations {
+ VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */
+ VLGETENTRYBYNAME = 504, /* AFS Get VLDB entry by name */
+ VLPROBE = 514, /* AFS probe VL service */
+ VLGETENTRYBYIDU = 526, /* AFS Get VLDB entry by ID (UUID-variant) */
+ VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */
+ VLGETADDRSU = 533, /* AFS Get addrs for fileserver */
+ YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */
+ YVLGETCELLNAME = 64014, /* YFS Get actual cell name */
+ VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */
+};
+
+enum AFSVL_Errors {
+ AFSVL_IDEXIST = 363520, /* Volume Id entry exists in vl database */
+ AFSVL_IO = 363521, /* I/O related error */
+ AFSVL_NAMEEXIST = 363522, /* Volume name entry exists in vl database */
+ AFSVL_CREATEFAIL = 363523, /* Internal creation failure */
+ AFSVL_NOENT = 363524, /* No such entry */
+ AFSVL_EMPTY = 363525, /* Vl database is empty */
+ AFSVL_ENTDELETED = 363526, /* Entry is deleted (soft delete) */
+ AFSVL_BADNAME = 363527, /* Volume name is illegal */
+ AFSVL_BADINDEX = 363528, /* Index is out of range */
+ AFSVL_BADVOLTYPE = 363529, /* Bad volume type */
+ AFSVL_BADSERVER = 363530, /* Illegal server number (out of range) */
+ AFSVL_BADPARTITION = 363531, /* Bad partition number */
+ AFSVL_REPSFULL = 363532, /* Run out of space for Replication sites */
+ AFSVL_NOREPSERVER = 363533, /* No such Replication server site exists */
+ AFSVL_DUPREPSERVER = 363534, /* Replication site already exists */
+ AFSVL_RWNOTFOUND = 363535, /* Parent R/W entry not found */
+ AFSVL_BADREFCOUNT = 363536, /* Illegal Reference Count number */
+ AFSVL_SIZEEXCEEDED = 363537, /* Vl size for attributes exceeded */
+ AFSVL_BADENTRY = 363538, /* Bad incoming vl entry */
+ AFSVL_BADVOLIDBUMP = 363539, /* Illegal max volid increment */
+ AFSVL_IDALREADYHASHED = 363540, /* RO/BACK id already hashed */
+ AFSVL_ENTRYLOCKED = 363541, /* Vl entry is already locked */
+ AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */
+ AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */
+ AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */
+ AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server flag */
+ AFSVL_PERM = 363546, /* No permission access */
+ AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
+};
+
+enum {
+ YFS_SERVER_INDEX = 0,
+ YFS_SERVER_UUID = 1,
+ YFS_SERVER_ENDPOINT = 2,
+};
+
+enum {
+ YFS_ENDPOINT_IPV4 = 0,
+ YFS_ENDPOINT_IPV6 = 1,
+};
+
+#define YFS_MAXENDPOINTS 16
+
+/*
+ * maps to "struct vldbentry" in vvl-spec.pdf
+ */
+struct afs_vldbentry {
+ char name[65]; /* name of volume (with NUL char) */
+ afs_voltype_t type; /* volume type */
+ unsigned num_servers; /* num servers that hold instances of this vol */
+ unsigned clone_id; /* cloning ID */
+
+ unsigned flags;
+#define AFS_VLF_RWEXISTS 0x1000 /* R/W volume exists */
+#define AFS_VLF_ROEXISTS 0x2000 /* R/O volume exists */
+#define AFS_VLF_BACKEXISTS 0x4000 /* backup volume exists */
+
+ afs_volid_t volume_ids[3]; /* volume IDs */
+
+ struct {
+ struct in_addr addr; /* server address */
+ unsigned partition; /* partition ID on this server */
+ unsigned flags; /* server specific flags */
+#define AFS_VLSF_NEWREPSITE 0x0001 /* Ignore all 'non-new' servers */
+#define AFS_VLSF_ROVOL 0x0002 /* this server holds a R/O instance of the volume */
+#define AFS_VLSF_RWVOL 0x0004 /* this server holds a R/W instance of the volume */
+#define AFS_VLSF_BACKVOL 0x0008 /* this server holds a backup instance of the volume */
+#define AFS_VLSF_UUID 0x0010 /* This server is referred to by its UUID */
+#define AFS_VLSF_DONTUSE 0x0020 /* This server ref should be ignored */
+ } servers[8];
+};
+
+#define AFS_VLDB_MAXNAMELEN 65
+
+
+struct afs_ListAddrByAttributes__xdr {
+ __be32 Mask;
+#define AFS_VLADDR_IPADDR 0x1 /* Match by ->ipaddr */
+#define AFS_VLADDR_INDEX 0x2 /* Match by ->index */
+#define AFS_VLADDR_UUID 0x4 /* Match by ->uuid */
+ __be32 ipaddr;
+ __be32 index;
+ __be32 spare;
+ struct afs_uuid__xdr uuid;
+};
+
+struct afs_uvldbentry__xdr {
+ __be32 name[AFS_VLDB_MAXNAMELEN];
+ __be32 nServers;
+ struct afs_uuid__xdr serverNumber[AFS_NMAXNSERVERS];
+ __be32 serverUnique[AFS_NMAXNSERVERS];
+ __be32 serverPartition[AFS_NMAXNSERVERS];
+ __be32 serverFlags[AFS_NMAXNSERVERS];
+ __be32 volumeId[AFS_MAXTYPES];
+ __be32 cloneId;
+ __be32 flags;
+ __be32 spares1;
+ __be32 spares2;
+ __be32 spares3;
+ __be32 spares4;
+ __be32 spares5;
+ __be32 spares6;
+ __be32 spares7;
+ __be32 spares8;
+ __be32 spares9;
+};
+
+struct afs_address_list {
+ refcount_t usage;
+ unsigned int version;
+ unsigned int nr_addrs;
+ struct sockaddr_rxrpc addrs[];
+};
+
+extern void afs_put_address_list(struct afs_address_list *alist);
+
+#endif /* AFS_VL_H */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
new file mode 100644
index 000000000..a484fa642
--- /dev/null
+++ b/fs/afs/callback.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ * David Howells <dhowells@redhat.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/circ_buf.h>
+#include <linux/sched.h>
+#include "internal.h"
+
+/*
+ * Handle invalidation of an mmap'd file. We invalidate all the PTEs referring
+ * to the pages in this file's pagecache, forcing the kernel to go through
+ * ->fault() or ->page_mkwrite() - at which point we can handle invalidation
+ * more fully.
+ */
+void afs_invalidate_mmap_work(struct work_struct *work)
+{
+ struct afs_vnode *vnode = container_of(work, struct afs_vnode, cb_work);
+
+ unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false);
+}
+
+void afs_server_init_callback_work(struct work_struct *work)
+{
+ struct afs_server *server = container_of(work, struct afs_server, initcb_work);
+ struct afs_vnode *vnode;
+ struct afs_cell *cell = server->cell;
+
+ down_read(&cell->fs_open_mmaps_lock);
+
+ list_for_each_entry(vnode, &cell->fs_open_mmaps, cb_mmap_link) {
+ if (vnode->cb_server == server) {
+ clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ queue_work(system_unbound_wq, &vnode->cb_work);
+ }
+ }
+
+ up_read(&cell->fs_open_mmaps_lock);
+}
+
+/*
+ * Allow the fileserver to request callback state (re-)initialisation.
+ * Unfortunately, UUIDs are not guaranteed unique.
+ */
+void afs_init_callback_state(struct afs_server *server)
+{
+ rcu_read_lock();
+ do {
+ server->cb_s_break++;
+ atomic_inc(&server->cell->fs_s_break);
+ if (!list_empty(&server->cell->fs_open_mmaps))
+ queue_work(system_unbound_wq, &server->initcb_work);
+
+ } while ((server = rcu_dereference(server->uuid_next)));
+ rcu_read_unlock();
+}
+
+/*
+ * actually break a callback
+ */
+void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
+{
+ _enter("");
+
+ clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+ if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ vnode->cb_break++;
+ vnode->cb_v_break = vnode->volume->cb_v_break;
+ afs_clear_permits(vnode);
+
+ if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
+ afs_lock_may_be_available(vnode);
+
+ if (reason != afs_cb_break_for_deleted &&
+ vnode->status.type == AFS_FTYPE_FILE &&
+ atomic_read(&vnode->cb_nr_mmap))
+ queue_work(system_unbound_wq, &vnode->cb_work);
+
+ trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true);
+ } else {
+ trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false);
+ }
+}
+
+void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
+{
+ write_seqlock(&vnode->cb_lock);
+ __afs_break_callback(vnode, reason);
+ write_sequnlock(&vnode->cb_lock);
+}
+
+/*
+ * Look up a volume by volume ID under RCU conditions.
+ */
+static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell,
+ afs_volid_t vid)
+{
+ struct afs_volume *volume = NULL;
+ struct rb_node *p;
+ int seq = 0;
+
+ do {
+ /* Unfortunately, rbtree walking doesn't give reliable results
+ * under just the RCU read lock, so we have to check for
+ * changes.
+ */
+ read_seqbegin_or_lock(&cell->volume_lock, &seq);
+
+ p = rcu_dereference_raw(cell->volumes.rb_node);
+ while (p) {
+ volume = rb_entry(p, struct afs_volume, cell_node);
+
+ if (volume->vid < vid)
+ p = rcu_dereference_raw(p->rb_left);
+ else if (volume->vid > vid)
+ p = rcu_dereference_raw(p->rb_right);
+ else
+ break;
+ volume = NULL;
+ }
+
+ } while (need_seqretry(&cell->volume_lock, seq));
+
+ done_seqretry(&cell->volume_lock, seq);
+ return volume;
+}
+
+/*
+ * allow the fileserver to explicitly break one callback
+ * - happens when
+ * - the backing file is changed
+ * - a lock is released
+ */
+static void afs_break_one_callback(struct afs_volume *volume,
+ struct afs_fid *fid)
+{
+ struct super_block *sb;
+ struct afs_vnode *vnode;
+ struct inode *inode;
+
+ if (fid->vnode == 0 && fid->unique == 0) {
+ /* The callback break applies to an entire volume. */
+ write_lock(&volume->cb_v_break_lock);
+ volume->cb_v_break++;
+ trace_afs_cb_break(fid, volume->cb_v_break,
+ afs_cb_break_for_volume_callback, false);
+ write_unlock(&volume->cb_v_break_lock);
+ return;
+ }
+
+ /* See if we can find a matching inode - even an I_NEW inode needs to
+ * be marked as it can have its callback broken before we finish
+ * setting up the local inode.
+ */
+ sb = rcu_dereference(volume->sb);
+ if (!sb)
+ return;
+
+ inode = find_inode_rcu(sb, fid->vnode, afs_ilookup5_test_by_fid, fid);
+ if (inode) {
+ vnode = AFS_FS_I(inode);
+ afs_break_callback(vnode, afs_cb_break_for_callback);
+ } else {
+ trace_afs_cb_miss(fid, afs_cb_break_for_callback);
+ }
+}
+
+static void afs_break_some_callbacks(struct afs_server *server,
+ struct afs_callback_break *cbb,
+ size_t *_count)
+{
+ struct afs_callback_break *residue = cbb;
+ struct afs_volume *volume;
+ afs_volid_t vid = cbb->fid.vid;
+ size_t i;
+
+ volume = afs_lookup_volume_rcu(server->cell, vid);
+
+ /* TODO: Find all matching volumes if we couldn't match the server and
+ * break them anyway.
+ */
+
+ for (i = *_count; i > 0; cbb++, i--) {
+ if (cbb->fid.vid == vid) {
+ _debug("- Fid { vl=%08llx n=%llu u=%u }",
+ cbb->fid.vid,
+ cbb->fid.vnode,
+ cbb->fid.unique);
+ --*_count;
+ if (volume)
+ afs_break_one_callback(volume, &cbb->fid);
+ } else {
+ *residue++ = *cbb;
+ }
+ }
+}
+
+/*
+ * allow the fileserver to break callback promises
+ */
+void afs_break_callbacks(struct afs_server *server, size_t count,
+ struct afs_callback_break *callbacks)
+{
+ _enter("%p,%zu,", server, count);
+
+ ASSERT(server != NULL);
+
+ rcu_read_lock();
+
+ while (count > 0)
+ afs_break_some_callbacks(server, callbacks, &count);
+
+ rcu_read_unlock();
+ return;
+}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
new file mode 100644
index 000000000..926cb1188
--- /dev/null
+++ b/fs/afs/cell.c
@@ -0,0 +1,954 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS cell and server record management
+ *
+ * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/key.h>
+#include <linux/ctype.h>
+#include <linux/dns_resolver.h>
+#include <linux/sched.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+
+static unsigned __read_mostly afs_cell_gc_delay = 10;
+static unsigned __read_mostly afs_cell_min_ttl = 10 * 60;
+static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60;
+static atomic_t cell_debug_id;
+
+static void afs_queue_cell_manager(struct afs_net *);
+static void afs_manage_cell_work(struct work_struct *);
+
+static void afs_dec_cells_outstanding(struct afs_net *net)
+{
+ if (atomic_dec_and_test(&net->cells_outstanding))
+ wake_up_var(&net->cells_outstanding);
+}
+
+/*
+ * Set the cell timer to fire after a given delay, assuming it's not already
+ * set for an earlier time.
+ */
+static void afs_set_cell_timer(struct afs_net *net, time64_t delay)
+{
+ if (net->live) {
+ atomic_inc(&net->cells_outstanding);
+ if (timer_reduce(&net->cells_timer, jiffies + delay * HZ))
+ afs_dec_cells_outstanding(net);
+ } else {
+ afs_queue_cell_manager(net);
+ }
+}
+
+/*
+ * Look up and get an activation reference on a cell record. The caller must
+ * hold net->cells_lock at least read-locked.
+ */
+static struct afs_cell *afs_find_cell_locked(struct afs_net *net,
+ const char *name, unsigned int namesz,
+ enum afs_cell_trace reason)
+{
+ struct afs_cell *cell = NULL;
+ struct rb_node *p;
+ int n;
+
+ _enter("%*.*s", namesz, namesz, name);
+
+ if (name && namesz == 0)
+ return ERR_PTR(-EINVAL);
+ if (namesz > AFS_MAXCELLNAME)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ if (!name) {
+ cell = net->ws_cell;
+ if (!cell)
+ return ERR_PTR(-EDESTADDRREQ);
+ goto found;
+ }
+
+ p = net->cells.rb_node;
+ while (p) {
+ cell = rb_entry(p, struct afs_cell, net_node);
+
+ n = strncasecmp(cell->name, name,
+ min_t(size_t, cell->name_len, namesz));
+ if (n == 0)
+ n = cell->name_len - namesz;
+ if (n < 0)
+ p = p->rb_left;
+ else if (n > 0)
+ p = p->rb_right;
+ else
+ goto found;
+ }
+
+ return ERR_PTR(-ENOENT);
+
+found:
+ return afs_use_cell(cell, reason);
+}
+
+/*
+ * Look up and get an activation reference on a cell record.
+ */
+struct afs_cell *afs_find_cell(struct afs_net *net,
+ const char *name, unsigned int namesz,
+ enum afs_cell_trace reason)
+{
+ struct afs_cell *cell;
+
+ down_read(&net->cells_lock);
+ cell = afs_find_cell_locked(net, name, namesz, reason);
+ up_read(&net->cells_lock);
+ return cell;
+}
+
+/*
+ * Set up a cell record and fill in its name, VL server address list and
+ * allocate an anonymous key
+ */
+static struct afs_cell *afs_alloc_cell(struct afs_net *net,
+ const char *name, unsigned int namelen,
+ const char *addresses)
+{
+ struct afs_vlserver_list *vllist;
+ struct afs_cell *cell;
+ int i, ret;
+
+ ASSERT(name);
+ if (namelen == 0)
+ return ERR_PTR(-EINVAL);
+ if (namelen > AFS_MAXCELLNAME) {
+ _leave(" = -ENAMETOOLONG");
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+
+ /* Prohibit cell names that contain unprintable chars, '/' and '@' or
+ * that begin with a dot. This also precludes "@cell".
+ */
+ if (name[0] == '.')
+ return ERR_PTR(-EINVAL);
+ for (i = 0; i < namelen; i++) {
+ char ch = name[i];
+ if (!isprint(ch) || ch == '/' || ch == '@')
+ return ERR_PTR(-EINVAL);
+ }
+
+ _enter("%*.*s,%s", namelen, namelen, name, addresses);
+
+ cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
+ if (!cell) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ cell->name = kmalloc(namelen + 1, GFP_KERNEL);
+ if (!cell->name) {
+ kfree(cell);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ cell->net = net;
+ cell->name_len = namelen;
+ for (i = 0; i < namelen; i++)
+ cell->name[i] = tolower(name[i]);
+ cell->name[i] = 0;
+
+ refcount_set(&cell->ref, 1);
+ atomic_set(&cell->active, 0);
+ INIT_WORK(&cell->manager, afs_manage_cell_work);
+ cell->volumes = RB_ROOT;
+ INIT_HLIST_HEAD(&cell->proc_volumes);
+ seqlock_init(&cell->volume_lock);
+ cell->fs_servers = RB_ROOT;
+ seqlock_init(&cell->fs_lock);
+ INIT_LIST_HEAD(&cell->fs_open_mmaps);
+ init_rwsem(&cell->fs_open_mmaps_lock);
+ rwlock_init(&cell->vl_servers_lock);
+ cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);
+
+ /* Provide a VL server list, filling it in if we were given a list of
+ * addresses to use.
+ */
+ if (addresses) {
+ vllist = afs_parse_text_addrs(net,
+ addresses, strlen(addresses), ':',
+ VL_SERVICE, AFS_VL_PORT);
+ if (IS_ERR(vllist)) {
+ ret = PTR_ERR(vllist);
+ goto parse_failed;
+ }
+
+ vllist->source = DNS_RECORD_FROM_CONFIG;
+ vllist->status = DNS_LOOKUP_NOT_DONE;
+ cell->dns_expiry = TIME64_MAX;
+ } else {
+ ret = -ENOMEM;
+ vllist = afs_alloc_vlserver_list(0);
+ if (!vllist)
+ goto error;
+ vllist->source = DNS_RECORD_UNAVAILABLE;
+ vllist->status = DNS_LOOKUP_NOT_DONE;
+ cell->dns_expiry = ktime_get_real_seconds();
+ }
+
+ rcu_assign_pointer(cell->vl_servers, vllist);
+
+ cell->dns_source = vllist->source;
+ cell->dns_status = vllist->status;
+ smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */
+ atomic_inc(&net->cells_outstanding);
+ cell->debug_id = atomic_inc_return(&cell_debug_id);
+ trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc);
+
+ _leave(" = %p", cell);
+ return cell;
+
+parse_failed:
+ if (ret == -EINVAL)
+ printk(KERN_ERR "kAFS: bad VL server IP address\n");
+error:
+ kfree(cell->name);
+ kfree(cell);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * afs_lookup_cell - Look up or create a cell record.
+ * @net: The network namespace
+ * @name: The name of the cell.
+ * @namesz: The strlen of the cell name.
+ * @vllist: A colon/comma separated list of numeric IP addresses or NULL.
+ * @excl: T if an error should be given if the cell name already exists.
+ *
+ * Look up a cell record by name and query the DNS for VL server addresses if
+ * needed. Note that that actual DNS query is punted off to the manager thread
+ * so that this function can return immediately if interrupted whilst allowing
+ * cell records to be shared even if not yet fully constructed.
+ */
+struct afs_cell *afs_lookup_cell(struct afs_net *net,
+ const char *name, unsigned int namesz,
+ const char *vllist, bool excl)
+{
+ struct afs_cell *cell, *candidate, *cursor;
+ struct rb_node *parent, **pp;
+ enum afs_cell_state state;
+ int ret, n;
+
+ _enter("%s,%s", name, vllist);
+
+ if (!excl) {
+ cell = afs_find_cell(net, name, namesz, afs_cell_trace_use_lookup);
+ if (!IS_ERR(cell))
+ goto wait_for_cell;
+ }
+
+ /* Assume we're probably going to create a cell and preallocate and
+ * mostly set up a candidate record. We can then use this to stash the
+ * name, the net namespace and VL server addresses.
+ *
+ * We also want to do this before we hold any locks as it may involve
+ * upcalling to userspace to make DNS queries.
+ */
+ candidate = afs_alloc_cell(net, name, namesz, vllist);
+ if (IS_ERR(candidate)) {
+ _leave(" = %ld", PTR_ERR(candidate));
+ return candidate;
+ }
+
+ /* Find the insertion point and check to see if someone else added a
+ * cell whilst we were allocating.
+ */
+ down_write(&net->cells_lock);
+
+ pp = &net->cells.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ cursor = rb_entry(parent, struct afs_cell, net_node);
+
+ n = strncasecmp(cursor->name, name,
+ min_t(size_t, cursor->name_len, namesz));
+ if (n == 0)
+ n = cursor->name_len - namesz;
+ if (n < 0)
+ pp = &(*pp)->rb_left;
+ else if (n > 0)
+ pp = &(*pp)->rb_right;
+ else
+ goto cell_already_exists;
+ }
+
+ cell = candidate;
+ candidate = NULL;
+ atomic_set(&cell->active, 2);
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 2, afs_cell_trace_insert);
+ rb_link_node_rcu(&cell->net_node, parent, pp);
+ rb_insert_color(&cell->net_node, &net->cells);
+ up_write(&net->cells_lock);
+
+ afs_queue_cell(cell, afs_cell_trace_get_queue_new);
+
+wait_for_cell:
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), atomic_read(&cell->active),
+ afs_cell_trace_wait);
+ _debug("wait_for_cell");
+ wait_var_event(&cell->state,
+ ({
+ state = smp_load_acquire(&cell->state); /* vs error */
+ state == AFS_CELL_ACTIVE || state == AFS_CELL_REMOVED;
+ }));
+
+ /* Check the state obtained from the wait check. */
+ if (state == AFS_CELL_REMOVED) {
+ ret = cell->error;
+ goto error;
+ }
+
+ _leave(" = %p [cell]", cell);
+ return cell;
+
+cell_already_exists:
+ _debug("cell exists");
+ cell = cursor;
+ if (excl) {
+ ret = -EEXIST;
+ } else {
+ afs_use_cell(cursor, afs_cell_trace_use_lookup);
+ ret = 0;
+ }
+ up_write(&net->cells_lock);
+ if (candidate)
+ afs_put_cell(candidate, afs_cell_trace_put_candidate);
+ if (ret == 0)
+ goto wait_for_cell;
+ goto error_noput;
+error:
+ afs_unuse_cell(net, cell, afs_cell_trace_unuse_lookup);
+error_noput:
+ _leave(" = %d [error]", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * set the root cell information
+ * - can be called with a module parameter string
+ * - can be called from a write to /proc/fs/afs/rootcell
+ */
+int afs_cell_init(struct afs_net *net, const char *rootcell)
+{
+ struct afs_cell *old_root, *new_root;
+ const char *cp, *vllist;
+ size_t len;
+
+ _enter("");
+
+ if (!rootcell) {
+ /* module is loaded with no parameters, or built statically.
+ * - in the future we might initialize cell DB here.
+ */
+ _leave(" = 0 [no root]");
+ return 0;
+ }
+
+ cp = strchr(rootcell, ':');
+ if (!cp) {
+ _debug("kAFS: no VL server IP addresses specified");
+ vllist = NULL;
+ len = strlen(rootcell);
+ } else {
+ vllist = cp + 1;
+ len = cp - rootcell;
+ }
+
+ /* allocate a cell record for the root cell */
+ new_root = afs_lookup_cell(net, rootcell, len, vllist, false);
+ if (IS_ERR(new_root)) {
+ _leave(" = %ld", PTR_ERR(new_root));
+ return PTR_ERR(new_root);
+ }
+
+ if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags))
+ afs_use_cell(new_root, afs_cell_trace_use_pin);
+
+ /* install the new cell */
+ down_write(&net->cells_lock);
+ afs_see_cell(new_root, afs_cell_trace_see_ws);
+ old_root = net->ws_cell;
+ net->ws_cell = new_root;
+ up_write(&net->cells_lock);
+
+ afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws);
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Update a cell's VL server address list from the DNS.
+ */
+static int afs_update_cell(struct afs_cell *cell)
+{
+ struct afs_vlserver_list *vllist, *old = NULL, *p;
+ unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl);
+ unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl);
+ time64_t now, expiry = 0;
+ int ret = 0;
+
+ _enter("%s", cell->name);
+
+ vllist = afs_dns_query(cell, &expiry);
+ if (IS_ERR(vllist)) {
+ ret = PTR_ERR(vllist);
+
+ _debug("%s: fail %d", cell->name, ret);
+ if (ret == -ENOMEM)
+ goto out_wake;
+
+ vllist = afs_alloc_vlserver_list(0);
+ if (!vllist) {
+ if (ret >= 0)
+ ret = -ENOMEM;
+ goto out_wake;
+ }
+
+ switch (ret) {
+ case -ENODATA:
+ case -EDESTADDRREQ:
+ vllist->status = DNS_LOOKUP_GOT_NOT_FOUND;
+ break;
+ case -EAGAIN:
+ case -ECONNREFUSED:
+ vllist->status = DNS_LOOKUP_GOT_TEMP_FAILURE;
+ break;
+ default:
+ vllist->status = DNS_LOOKUP_GOT_LOCAL_FAILURE;
+ break;
+ }
+ }
+
+ _debug("%s: got list %d %d", cell->name, vllist->source, vllist->status);
+ cell->dns_status = vllist->status;
+
+ now = ktime_get_real_seconds();
+ if (min_ttl > max_ttl)
+ max_ttl = min_ttl;
+ if (expiry < now + min_ttl)
+ expiry = now + min_ttl;
+ else if (expiry > now + max_ttl)
+ expiry = now + max_ttl;
+
+ _debug("%s: status %d", cell->name, vllist->status);
+ if (vllist->source == DNS_RECORD_UNAVAILABLE) {
+ switch (vllist->status) {
+ case DNS_LOOKUP_GOT_NOT_FOUND:
+ /* The DNS said that the cell does not exist or there
+ * weren't any addresses to be had.
+ */
+ cell->dns_expiry = expiry;
+ break;
+
+ case DNS_LOOKUP_BAD:
+ case DNS_LOOKUP_GOT_LOCAL_FAILURE:
+ case DNS_LOOKUP_GOT_TEMP_FAILURE:
+ case DNS_LOOKUP_GOT_NS_FAILURE:
+ default:
+ cell->dns_expiry = now + 10;
+ break;
+ }
+ } else {
+ cell->dns_expiry = expiry;
+ }
+
+ /* Replace the VL server list if the new record has servers or the old
+ * record doesn't.
+ */
+ write_lock(&cell->vl_servers_lock);
+ p = rcu_dereference_protected(cell->vl_servers, true);
+ if (vllist->nr_servers > 0 || p->nr_servers == 0) {
+ rcu_assign_pointer(cell->vl_servers, vllist);
+ cell->dns_source = vllist->source;
+ old = p;
+ }
+ write_unlock(&cell->vl_servers_lock);
+ afs_put_vlserverlist(cell->net, old);
+
+out_wake:
+ smp_store_release(&cell->dns_lookup_count,
+ cell->dns_lookup_count + 1); /* vs source/status */
+ wake_up_var(&cell->dns_lookup_count);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Destroy a cell record
+ */
+static void afs_cell_destroy(struct rcu_head *rcu)
+{
+ struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu);
+ struct afs_net *net = cell->net;
+ int r;
+
+ _enter("%p{%s}", cell, cell->name);
+
+ r = refcount_read(&cell->ref);
+ ASSERTCMP(r, ==, 0);
+ trace_afs_cell(cell->debug_id, r, atomic_read(&cell->active), afs_cell_trace_free);
+
+ afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers));
+ afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias);
+ key_put(cell->anonymous_key);
+ kfree(cell->name);
+ kfree(cell);
+
+ afs_dec_cells_outstanding(net);
+ _leave(" [destroyed]");
+}
+
+/*
+ * Queue the cell manager.
+ */
+static void afs_queue_cell_manager(struct afs_net *net)
+{
+ int outstanding = atomic_inc_return(&net->cells_outstanding);
+
+ _enter("%d", outstanding);
+
+ if (!queue_work(afs_wq, &net->cells_manager))
+ afs_dec_cells_outstanding(net);
+}
+
+/*
+ * Cell management timer. We have an increment on cells_outstanding that we
+ * need to pass along to the work item.
+ */
+void afs_cells_timer(struct timer_list *timer)
+{
+ struct afs_net *net = container_of(timer, struct afs_net, cells_timer);
+
+ _enter("");
+ if (!queue_work(afs_wq, &net->cells_manager))
+ afs_dec_cells_outstanding(net);
+}
+
+/*
+ * Get a reference on a cell record.
+ */
+struct afs_cell *afs_get_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ int r;
+
+ __refcount_inc(&cell->ref, &r);
+ trace_afs_cell(cell->debug_id, r + 1, atomic_read(&cell->active), reason);
+ return cell;
+}
+
+/*
+ * Drop a reference on a cell record.
+ */
+void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ if (cell) {
+ unsigned int debug_id = cell->debug_id;
+ unsigned int a;
+ bool zero;
+ int r;
+
+ a = atomic_read(&cell->active);
+ zero = __refcount_dec_and_test(&cell->ref, &r);
+ trace_afs_cell(debug_id, r - 1, a, reason);
+ if (zero) {
+ a = atomic_read(&cell->active);
+ WARN(a != 0, "Cell active count %u > 0\n", a);
+ call_rcu(&cell->rcu, afs_cell_destroy);
+ }
+ }
+}
+
+/*
+ * Note a cell becoming more active.
+ */
+struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ int r, a;
+
+ r = refcount_read(&cell->ref);
+ WARN_ON(r == 0);
+ a = atomic_inc_return(&cell->active);
+ trace_afs_cell(cell->debug_id, r, a, reason);
+ return cell;
+}
+
+/*
+ * Record a cell becoming less active. When the active counter reaches 1, it
+ * is scheduled for destruction, but may get reactivated.
+ */
+void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ unsigned int debug_id;
+ time64_t now, expire_delay;
+ int r, a;
+
+ if (!cell)
+ return;
+
+ _enter("%s", cell->name);
+
+ now = ktime_get_real_seconds();
+ cell->last_inactive = now;
+ expire_delay = 0;
+ if (cell->vl_servers->nr_servers)
+ expire_delay = afs_cell_gc_delay;
+
+ debug_id = cell->debug_id;
+ r = refcount_read(&cell->ref);
+ a = atomic_dec_return(&cell->active);
+ trace_afs_cell(debug_id, r, a, reason);
+ WARN_ON(a == 0);
+ if (a == 1)
+ /* 'cell' may now be garbage collected. */
+ afs_set_cell_timer(net, expire_delay);
+}
+
+/*
+ * Note that a cell has been seen.
+ */
+void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ int r, a;
+
+ r = refcount_read(&cell->ref);
+ a = atomic_read(&cell->active);
+ trace_afs_cell(cell->debug_id, r, a, reason);
+}
+
+/*
+ * Queue a cell for management, giving the workqueue a ref to hold.
+ */
+void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+ afs_get_cell(cell, reason);
+ if (!queue_work(afs_wq, &cell->manager))
+ afs_put_cell(cell, afs_cell_trace_put_queue_fail);
+}
+
+/*
+ * Allocate a key to use as a placeholder for anonymous user security.
+ */
+static int afs_alloc_anon_key(struct afs_cell *cell)
+{
+ struct key *key;
+ char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp;
+
+ /* Create a key to represent an anonymous user. */
+ memcpy(keyname, "afs@", 4);
+ dp = keyname + 4;
+ cp = cell->name;
+ do {
+ *dp++ = tolower(*cp);
+ } while (*cp++);
+
+ key = rxrpc_get_null_key(keyname);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+
+ cell->anonymous_key = key;
+
+ _debug("anon key %p{%x}",
+ cell->anonymous_key, key_serial(cell->anonymous_key));
+ return 0;
+}
+
+/*
+ * Activate a cell.
+ */
+static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
+{
+ struct hlist_node **p;
+ struct afs_cell *pcell;
+ int ret;
+
+ if (!cell->anonymous_key) {
+ ret = afs_alloc_anon_key(cell);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = afs_proc_cell_setup(cell);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&net->proc_cells_lock);
+ for (p = &net->proc_cells.first; *p; p = &(*p)->next) {
+ pcell = hlist_entry(*p, struct afs_cell, proc_link);
+ if (strcmp(cell->name, pcell->name) < 0)
+ break;
+ }
+
+ cell->proc_link.pprev = p;
+ cell->proc_link.next = *p;
+ rcu_assign_pointer(*p, &cell->proc_link.next);
+ if (cell->proc_link.next)
+ cell->proc_link.next->pprev = &cell->proc_link.next;
+
+ afs_dynroot_mkdir(net, cell);
+ mutex_unlock(&net->proc_cells_lock);
+ return 0;
+}
+
+/*
+ * Deactivate a cell.
+ */
+static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
+{
+ _enter("%s", cell->name);
+
+ afs_proc_cell_remove(cell);
+
+ mutex_lock(&net->proc_cells_lock);
+ hlist_del_rcu(&cell->proc_link);
+ afs_dynroot_rmdir(net, cell);
+ mutex_unlock(&net->proc_cells_lock);
+
+ _leave("");
+}
+
+/*
+ * Manage a cell record, initialising and destroying it, maintaining its DNS
+ * records.
+ */
+static void afs_manage_cell(struct afs_cell *cell)
+{
+ struct afs_net *net = cell->net;
+ int ret, active;
+
+ _enter("%s", cell->name);
+
+again:
+ _debug("state %u", cell->state);
+ switch (cell->state) {
+ case AFS_CELL_INACTIVE:
+ case AFS_CELL_FAILED:
+ down_write(&net->cells_lock);
+ active = 1;
+ if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) {
+ rb_erase(&cell->net_node, &net->cells);
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 0,
+ afs_cell_trace_unuse_delete);
+ smp_store_release(&cell->state, AFS_CELL_REMOVED);
+ }
+ up_write(&net->cells_lock);
+ if (cell->state == AFS_CELL_REMOVED) {
+ wake_up_var(&cell->state);
+ goto final_destruction;
+ }
+ if (cell->state == AFS_CELL_FAILED)
+ goto done;
+ smp_store_release(&cell->state, AFS_CELL_UNSET);
+ wake_up_var(&cell->state);
+ goto again;
+
+ case AFS_CELL_UNSET:
+ smp_store_release(&cell->state, AFS_CELL_ACTIVATING);
+ wake_up_var(&cell->state);
+ goto again;
+
+ case AFS_CELL_ACTIVATING:
+ ret = afs_activate_cell(net, cell);
+ if (ret < 0)
+ goto activation_failed;
+
+ smp_store_release(&cell->state, AFS_CELL_ACTIVE);
+ wake_up_var(&cell->state);
+ goto again;
+
+ case AFS_CELL_ACTIVE:
+ if (atomic_read(&cell->active) > 1) {
+ if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
+ ret = afs_update_cell(cell);
+ if (ret < 0)
+ cell->error = ret;
+ }
+ goto done;
+ }
+ smp_store_release(&cell->state, AFS_CELL_DEACTIVATING);
+ wake_up_var(&cell->state);
+ goto again;
+
+ case AFS_CELL_DEACTIVATING:
+ if (atomic_read(&cell->active) > 1)
+ goto reverse_deactivation;
+ afs_deactivate_cell(net, cell);
+ smp_store_release(&cell->state, AFS_CELL_INACTIVE);
+ wake_up_var(&cell->state);
+ goto again;
+
+ case AFS_CELL_REMOVED:
+ goto done;
+
+ default:
+ break;
+ }
+ _debug("bad state %u", cell->state);
+ BUG(); /* Unhandled state */
+
+activation_failed:
+ cell->error = ret;
+ afs_deactivate_cell(net, cell);
+
+ smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */
+ wake_up_var(&cell->state);
+ goto again;
+
+reverse_deactivation:
+ smp_store_release(&cell->state, AFS_CELL_ACTIVE);
+ wake_up_var(&cell->state);
+ _leave(" [deact->act]");
+ return;
+
+done:
+ _leave(" [done %u]", cell->state);
+ return;
+
+final_destruction:
+ /* The root volume is pinning the cell */
+ afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root);
+ cell->root_volume = NULL;
+ afs_put_cell(cell, afs_cell_trace_put_destroy);
+}
+
+static void afs_manage_cell_work(struct work_struct *work)
+{
+ struct afs_cell *cell = container_of(work, struct afs_cell, manager);
+
+ afs_manage_cell(cell);
+ afs_put_cell(cell, afs_cell_trace_put_queue_work);
+}
+
+/*
+ * Manage the records of cells known to a network namespace. This includes
+ * updating the DNS records and garbage collecting unused cells that were
+ * automatically added.
+ *
+ * Note that constructed cell records may only be removed from net->cells by
+ * this work item, so it is safe for this work item to stash a cursor pointing
+ * into the tree and then return to caller (provided it skips cells that are
+ * still under construction).
+ *
+ * Note also that we were given an increment on net->cells_outstanding by
+ * whoever queued us that we need to deal with before returning.
+ */
+void afs_manage_cells(struct work_struct *work)
+{
+ struct afs_net *net = container_of(work, struct afs_net, cells_manager);
+ struct rb_node *cursor;
+ time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
+ bool purging = !net->live;
+
+ _enter("");
+
+ /* Trawl the cell database looking for cells that have expired from
+ * lack of use and cells whose DNS results have expired and dispatch
+ * their managers.
+ */
+ down_read(&net->cells_lock);
+
+ for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) {
+ struct afs_cell *cell =
+ rb_entry(cursor, struct afs_cell, net_node);
+ unsigned active;
+ bool sched_cell = false;
+
+ active = atomic_read(&cell->active);
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref),
+ active, afs_cell_trace_manage);
+
+ ASSERTCMP(active, >=, 1);
+
+ if (purging) {
+ if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) {
+ active = atomic_dec_return(&cell->active);
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref),
+ active, afs_cell_trace_unuse_pin);
+ }
+ }
+
+ if (active == 1) {
+ struct afs_vlserver_list *vllist;
+ time64_t expire_at = cell->last_inactive;
+
+ read_lock(&cell->vl_servers_lock);
+ vllist = rcu_dereference_protected(
+ cell->vl_servers,
+ lockdep_is_held(&cell->vl_servers_lock));
+ if (vllist->nr_servers > 0)
+ expire_at += afs_cell_gc_delay;
+ read_unlock(&cell->vl_servers_lock);
+ if (purging || expire_at <= now)
+ sched_cell = true;
+ else if (expire_at < next_manage)
+ next_manage = expire_at;
+ }
+
+ if (!purging) {
+ if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags))
+ sched_cell = true;
+ }
+
+ if (sched_cell)
+ afs_queue_cell(cell, afs_cell_trace_get_queue_manage);
+ }
+
+ up_read(&net->cells_lock);
+
+ /* Update the timer on the way out. We have to pass an increment on
+ * cells_outstanding in the namespace that we are in to the timer or
+ * the work scheduler.
+ */
+ if (!purging && next_manage < TIME64_MAX) {
+ now = ktime_get_real_seconds();
+
+ if (next_manage - now <= 0) {
+ if (queue_work(afs_wq, &net->cells_manager))
+ atomic_inc(&net->cells_outstanding);
+ } else {
+ afs_set_cell_timer(net, next_manage - now);
+ }
+ }
+
+ afs_dec_cells_outstanding(net);
+ _leave(" [%d]", atomic_read(&net->cells_outstanding));
+}
+
+/*
+ * Purge in-memory cell database.
+ */
+void afs_cell_purge(struct afs_net *net)
+{
+ struct afs_cell *ws;
+
+ _enter("");
+
+ down_write(&net->cells_lock);
+ ws = net->ws_cell;
+ net->ws_cell = NULL;
+ up_write(&net->cells_lock);
+ afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws);
+
+ _debug("del timer");
+ if (del_timer_sync(&net->cells_timer))
+ atomic_dec(&net->cells_outstanding);
+
+ _debug("kick mgr");
+ afs_queue_cell_manager(net);
+
+ _debug("wait");
+ wait_var_event(&net->cells_outstanding,
+ !atomic_read(&net->cells_outstanding));
+ _leave("");
+}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
new file mode 100644
index 000000000..7dcd59693
--- /dev/null
+++ b/fs/afs/cmservice.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS Cache Manager Service
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/ip.h>
+#include "internal.h"
+#include "afs_cm.h"
+#include "protocol_yfs.h"
+
+static int afs_deliver_cb_init_call_back_state(struct afs_call *);
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
+static int afs_deliver_cb_probe(struct afs_call *);
+static int afs_deliver_cb_callback(struct afs_call *);
+static int afs_deliver_cb_probe_uuid(struct afs_call *);
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *);
+static void afs_cm_destructor(struct afs_call *);
+static void SRXAFSCB_CallBack(struct work_struct *);
+static void SRXAFSCB_InitCallBackState(struct work_struct *);
+static void SRXAFSCB_Probe(struct work_struct *);
+static void SRXAFSCB_ProbeUuid(struct work_struct *);
+static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
+
+static int afs_deliver_yfs_cb_callback(struct afs_call *);
+
+/*
+ * CB.CallBack operation type
+ */
+static const struct afs_call_type afs_SRXCBCallBack = {
+ .name = "CB.CallBack",
+ .deliver = afs_deliver_cb_callback,
+ .destructor = afs_cm_destructor,
+ .work = SRXAFSCB_CallBack,
+};
+
+/*
+ * CB.InitCallBackState operation type
+ */
+static const struct afs_call_type afs_SRXCBInitCallBackState = {
+ .name = "CB.InitCallBackState",
+ .deliver = afs_deliver_cb_init_call_back_state,
+ .destructor = afs_cm_destructor,
+ .work = SRXAFSCB_InitCallBackState,
+};
+
+/*
+ * CB.InitCallBackState3 operation type
+ */
+static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
+ .name = "CB.InitCallBackState3",
+ .deliver = afs_deliver_cb_init_call_back_state3,
+ .destructor = afs_cm_destructor,
+ .work = SRXAFSCB_InitCallBackState,
+};
+
+/*
+ * CB.Probe operation type
+ */
+static const struct afs_call_type afs_SRXCBProbe = {
+ .name = "CB.Probe",
+ .deliver = afs_deliver_cb_probe,
+ .destructor = afs_cm_destructor,
+ .work = SRXAFSCB_Probe,
+};
+
+/*
+ * CB.ProbeUuid operation type
+ */
+static const struct afs_call_type afs_SRXCBProbeUuid = {
+ .name = "CB.ProbeUuid",
+ .deliver = afs_deliver_cb_probe_uuid,
+ .destructor = afs_cm_destructor,
+ .work = SRXAFSCB_ProbeUuid,
+};
+
+/*
+ * CB.TellMeAboutYourself operation type
+ */
+static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
+ .name = "CB.TellMeAboutYourself",
+ .deliver = afs_deliver_cb_tell_me_about_yourself,
+ .destructor = afs_cm_destructor,
+ .work = SRXAFSCB_TellMeAboutYourself,
+};
+
+/*
+ * YFS CB.CallBack operation type
+ */
+static const struct afs_call_type afs_SRXYFSCB_CallBack = {
+ .name = "YFSCB.CallBack",
+ .deliver = afs_deliver_yfs_cb_callback,
+ .destructor = afs_cm_destructor,
+ .work = SRXAFSCB_CallBack,
+};
+
+/*
+ * route an incoming cache manager call
+ * - return T if supported, F if not
+ */
+bool afs_cm_incoming_call(struct afs_call *call)
+{
+ _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
+
+ switch (call->operation_ID) {
+ case CBCallBack:
+ call->type = &afs_SRXCBCallBack;
+ return true;
+ case CBInitCallBackState:
+ call->type = &afs_SRXCBInitCallBackState;
+ return true;
+ case CBInitCallBackState3:
+ call->type = &afs_SRXCBInitCallBackState3;
+ return true;
+ case CBProbe:
+ call->type = &afs_SRXCBProbe;
+ return true;
+ case CBProbeUuid:
+ call->type = &afs_SRXCBProbeUuid;
+ return true;
+ case CBTellMeAboutYourself:
+ call->type = &afs_SRXCBTellMeAboutYourself;
+ return true;
+ case YFSCBCallBack:
+ if (call->service_id != YFS_CM_SERVICE)
+ return false;
+ call->type = &afs_SRXYFSCB_CallBack;
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * Find the server record by peer address and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_peer(struct afs_call *call)
+{
+ struct sockaddr_rxrpc srx;
+ struct afs_server *server;
+
+ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+
+ server = afs_find_server(call->net, &srx);
+ if (!server) {
+ trace_afs_cm_no_server(call, &srx);
+ return 0;
+ }
+
+ call->server = server;
+ return 0;
+}
+
+/*
+ * Find the server record by server UUID and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_uuid(struct afs_call *call,
+ struct afs_uuid *uuid)
+{
+ struct afs_server *server;
+
+ rcu_read_lock();
+ server = afs_find_server_by_uuid(call->net, call->request);
+ rcu_read_unlock();
+ if (!server) {
+ trace_afs_cm_no_server_u(call, call->request);
+ return 0;
+ }
+
+ call->server = server;
+ return 0;
+}
+
+/*
+ * Clean up a cache manager call.
+ */
+static void afs_cm_destructor(struct afs_call *call)
+{
+ kfree(call->buffer);
+ call->buffer = NULL;
+}
+
+/*
+ * Abort a service call from within an action function.
+ */
+static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error,
+ const char *why)
+{
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, error, why);
+ afs_set_call_complete(call, error, 0);
+}
+
+/*
+ * The server supplied a list of callbacks that it wanted to break.
+ */
+static void SRXAFSCB_CallBack(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, work);
+
+ _enter("");
+
+ /* We need to break the callbacks before sending the reply as the
+ * server holds up change visibility till it receives our reply so as
+ * to maintain cache coherency.
+ */
+ if (call->server) {
+ trace_afs_server(call->server->debug_id,
+ refcount_read(&call->server->ref),
+ atomic_read(&call->server->active),
+ afs_server_trace_callback);
+ afs_break_callbacks(call->server, call->count, call->request);
+ }
+
+ afs_send_empty_reply(call);
+ afs_put_call(call);
+ _leave("");
+}
+
+/*
+ * deliver request data to a CB.CallBack call
+ */
+static int afs_deliver_cb_callback(struct afs_call *call)
+{
+ struct afs_callback_break *cb;
+ __be32 *bp;
+ int ret, loop;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ /* extract the FID array and its count in two steps */
+ fallthrough;
+ case 1:
+ _debug("extract FID count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("FID count: %u", call->count);
+ if (call->count > AFSCBMAX)
+ return afs_protocol_error(call, afs_eproto_cb_fid_count);
+
+ call->buffer = kmalloc(array3_size(call->count, 3, 4),
+ GFP_KERNEL);
+ if (!call->buffer)
+ return -ENOMEM;
+ afs_extract_to_buf(call, call->count * 3 * 4);
+ call->unmarshall++;
+
+ fallthrough;
+ case 2:
+ _debug("extract FID array");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ _debug("unmarshall FID array");
+ call->request = kcalloc(call->count,
+ sizeof(struct afs_callback_break),
+ GFP_KERNEL);
+ if (!call->request)
+ return -ENOMEM;
+
+ cb = call->request;
+ bp = call->buffer;
+ for (loop = call->count; loop > 0; loop--, cb++) {
+ cb->fid.vid = ntohl(*bp++);
+ cb->fid.vnode = ntohl(*bp++);
+ cb->fid.unique = ntohl(*bp++);
+ }
+
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ /* extract the callback array and its count in two steps */
+ fallthrough;
+ case 3:
+ _debug("extract CB count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count2 = ntohl(call->tmp);
+ _debug("CB count: %u", call->count2);
+ if (call->count2 != call->count && call->count2 != 0)
+ return afs_protocol_error(call, afs_eproto_cb_count);
+ call->iter = &call->def_iter;
+ iov_iter_discard(&call->def_iter, ITER_DEST, call->count2 * 3 * 4);
+ call->unmarshall++;
+
+ fallthrough;
+ case 4:
+ _debug("extract discard %zu/%u",
+ iov_iter_count(call->iter), call->count2 * 3 * 4);
+
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 5:
+ break;
+ }
+
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return afs_io_error(call, afs_io_error_cm_reply);
+
+ /* we'll need the file server record as that tells us which set of
+ * vnodes to operate upon */
+ return afs_find_cm_server_by_peer(call);
+}
+
+/*
+ * allow the fileserver to request callback state (re-)initialisation
+ */
+static void SRXAFSCB_InitCallBackState(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, work);
+
+ _enter("{%p}", call->server);
+
+ if (call->server)
+ afs_init_callback_state(call->server);
+ afs_send_empty_reply(call);
+ afs_put_call(call);
+ _leave("");
+}
+
+/*
+ * deliver request data to a CB.InitCallBackState call
+ */
+static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
+{
+ int ret;
+
+ _enter("");
+
+ afs_extract_discard(call, 0);
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ /* we'll need the file server record as that tells us which set of
+ * vnodes to operate upon */
+ return afs_find_cm_server_by_peer(call);
+}
+
+/*
+ * deliver request data to a CB.InitCallBackState3 call
+ */
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
+{
+ struct afs_uuid *r;
+ unsigned loop;
+ __be32 *b;
+ int ret;
+
+ _enter("");
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
+ if (!call->buffer)
+ return -ENOMEM;
+ afs_extract_to_buf(call, 11 * sizeof(__be32));
+ call->unmarshall++;
+
+ fallthrough;
+ case 1:
+ _debug("extract UUID");
+ ret = afs_extract_data(call, false);
+ switch (ret) {
+ case 0: break;
+ case -EAGAIN: return 0;
+ default: return ret;
+ }
+
+ _debug("unmarshall UUID");
+ call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
+ if (!call->request)
+ return -ENOMEM;
+
+ b = call->buffer;
+ r = call->request;
+ r->time_low = b[0];
+ r->time_mid = htons(ntohl(b[1]));
+ r->time_hi_and_version = htons(ntohl(b[2]));
+ r->clock_seq_hi_and_reserved = ntohl(b[3]);
+ r->clock_seq_low = ntohl(b[4]);
+
+ for (loop = 0; loop < 6; loop++)
+ r->node[loop] = ntohl(b[loop + 5]);
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 2:
+ break;
+ }
+
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return afs_io_error(call, afs_io_error_cm_reply);
+
+ /* we'll need the file server record as that tells us which set of
+ * vnodes to operate upon */
+ return afs_find_cm_server_by_uuid(call, call->request);
+}
+
+/*
+ * allow the fileserver to see if the cache manager is still alive
+ */
+static void SRXAFSCB_Probe(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, work);
+
+ _enter("");
+ afs_send_empty_reply(call);
+ afs_put_call(call);
+ _leave("");
+}
+
+/*
+ * deliver request data to a CB.Probe call
+ */
+static int afs_deliver_cb_probe(struct afs_call *call)
+{
+ int ret;
+
+ _enter("");
+
+ afs_extract_discard(call, 0);
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return afs_io_error(call, afs_io_error_cm_reply);
+ return afs_find_cm_server_by_peer(call);
+}
+
+/*
+ * Allow the fileserver to quickly find out if the cache manager has been
+ * rebooted.
+ */
+static void SRXAFSCB_ProbeUuid(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, work);
+ struct afs_uuid *r = call->request;
+
+ _enter("");
+
+ if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
+ afs_send_empty_reply(call);
+ else
+ afs_abort_service_call(call, 1, 1, "K-1");
+
+ afs_put_call(call);
+ _leave("");
+}
+
+/*
+ * deliver request data to a CB.ProbeUuid call
+ */
+static int afs_deliver_cb_probe_uuid(struct afs_call *call)
+{
+ struct afs_uuid *r;
+ unsigned loop;
+ __be32 *b;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
+ if (!call->buffer)
+ return -ENOMEM;
+ afs_extract_to_buf(call, 11 * sizeof(__be32));
+ call->unmarshall++;
+
+ fallthrough;
+ case 1:
+ _debug("extract UUID");
+ ret = afs_extract_data(call, false);
+ switch (ret) {
+ case 0: break;
+ case -EAGAIN: return 0;
+ default: return ret;
+ }
+
+ _debug("unmarshall UUID");
+ call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
+ if (!call->request)
+ return -ENOMEM;
+
+ b = call->buffer;
+ r = call->request;
+ r->time_low = b[0];
+ r->time_mid = htons(ntohl(b[1]));
+ r->time_hi_and_version = htons(ntohl(b[2]));
+ r->clock_seq_hi_and_reserved = ntohl(b[3]);
+ r->clock_seq_low = ntohl(b[4]);
+
+ for (loop = 0; loop < 6; loop++)
+ r->node[loop] = ntohl(b[loop + 5]);
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 2:
+ break;
+ }
+
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return afs_io_error(call, afs_io_error_cm_reply);
+ return afs_find_cm_server_by_peer(call);
+}
+
+/*
+ * allow the fileserver to ask about the cache manager's capabilities
+ */
+static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, work);
+ int loop;
+
+ struct {
+ struct /* InterfaceAddr */ {
+ __be32 nifs;
+ __be32 uuid[11];
+ __be32 ifaddr[32];
+ __be32 netmask[32];
+ __be32 mtu[32];
+ } ia;
+ struct /* Capabilities */ {
+ __be32 capcount;
+ __be32 caps[1];
+ } cap;
+ } reply;
+
+ _enter("");
+
+ memset(&reply, 0, sizeof(reply));
+
+ reply.ia.uuid[0] = call->net->uuid.time_low;
+ reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid));
+ reply.ia.uuid[2] = htonl(ntohs(call->net->uuid.time_hi_and_version));
+ reply.ia.uuid[3] = htonl((s8) call->net->uuid.clock_seq_hi_and_reserved);
+ reply.ia.uuid[4] = htonl((s8) call->net->uuid.clock_seq_low);
+ for (loop = 0; loop < 6; loop++)
+ reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]);
+
+ reply.cap.capcount = htonl(1);
+ reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION);
+ afs_send_simple_reply(call, &reply, sizeof(reply));
+ afs_put_call(call);
+ _leave("");
+}
+
+/*
+ * deliver request data to a CB.TellMeAboutYourself call
+ */
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
+{
+ int ret;
+
+ _enter("");
+
+ afs_extract_discard(call, 0);
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return afs_io_error(call, afs_io_error_cm_reply);
+ return afs_find_cm_server_by_peer(call);
+}
+
+/*
+ * deliver request data to a YFS CB.CallBack call
+ */
+static int afs_deliver_yfs_cb_callback(struct afs_call *call)
+{
+ struct afs_callback_break *cb;
+ struct yfs_xdr_YFSFid *bp;
+ size_t size;
+ int ret, loop;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ /* extract the FID array and its count in two steps */
+ fallthrough;
+ case 1:
+ _debug("extract FID count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("FID count: %u", call->count);
+ if (call->count > YFSCBMAX)
+ return afs_protocol_error(call, afs_eproto_cb_fid_count);
+
+ size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid));
+ call->buffer = kmalloc(size, GFP_KERNEL);
+ if (!call->buffer)
+ return -ENOMEM;
+ afs_extract_to_buf(call, size);
+ call->unmarshall++;
+
+ fallthrough;
+ case 2:
+ _debug("extract FID array");
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ _debug("unmarshall FID array");
+ call->request = kcalloc(call->count,
+ sizeof(struct afs_callback_break),
+ GFP_KERNEL);
+ if (!call->request)
+ return -ENOMEM;
+
+ cb = call->request;
+ bp = call->buffer;
+ for (loop = call->count; loop > 0; loop--, cb++) {
+ cb->fid.vid = xdr_to_u64(bp->volume);
+ cb->fid.vnode = xdr_to_u64(bp->vnode.lo);
+ cb->fid.vnode_hi = ntohl(bp->vnode.hi);
+ cb->fid.unique = ntohl(bp->vnode.unique);
+ bp++;
+ }
+
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ case 3:
+ break;
+ }
+
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return afs_io_error(call, afs_io_error_cm_reply);
+
+ /* We'll need the file server record as that tells us which set of
+ * vnodes to operate upon.
+ */
+ return afs_find_cm_server_by_peer(call);
+}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
new file mode 100644
index 000000000..cf811b77e
--- /dev/null
+++ b/fs/afs/dir.c
@@ -0,0 +1,2053 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* dir.c: AFS filesystem directory handling
+ *
+ * Copyright (C) 2002, 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/ctype.h>
+#include <linux/sched.h>
+#include <linux/task_io_accounting_ops.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+
+static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags);
+static int afs_dir_open(struct inode *inode, struct file *file);
+static int afs_readdir(struct file *file, struct dir_context *ctx);
+static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
+static int afs_d_delete(const struct dentry *dentry);
+static void afs_d_iput(struct dentry *dentry, struct inode *inode);
+static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
+ loff_t fpos, u64 ino, unsigned dtype);
+static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
+ loff_t fpos, u64 ino, unsigned dtype);
+static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl);
+static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode);
+static int afs_rmdir(struct inode *dir, struct dentry *dentry);
+static int afs_unlink(struct inode *dir, struct dentry *dentry);
+static int afs_link(struct dentry *from, struct inode *dir,
+ struct dentry *dentry);
+static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *content);
+static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags);
+static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags);
+static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length);
+
+static bool afs_dir_dirty_folio(struct address_space *mapping,
+ struct folio *folio)
+{
+ BUG(); /* This should never happen. */
+}
+
+const struct file_operations afs_dir_file_operations = {
+ .open = afs_dir_open,
+ .release = afs_release,
+ .iterate_shared = afs_readdir,
+ .lock = afs_lock,
+ .llseek = generic_file_llseek,
+};
+
+const struct inode_operations afs_dir_inode_operations = {
+ .create = afs_create,
+ .lookup = afs_lookup,
+ .link = afs_link,
+ .unlink = afs_unlink,
+ .symlink = afs_symlink,
+ .mkdir = afs_mkdir,
+ .rmdir = afs_rmdir,
+ .rename = afs_rename,
+ .permission = afs_permission,
+ .getattr = afs_getattr,
+ .setattr = afs_setattr,
+};
+
+const struct address_space_operations afs_dir_aops = {
+ .dirty_folio = afs_dir_dirty_folio,
+ .release_folio = afs_dir_release_folio,
+ .invalidate_folio = afs_dir_invalidate_folio,
+};
+
+const struct dentry_operations afs_fs_dentry_operations = {
+ .d_revalidate = afs_d_revalidate,
+ .d_delete = afs_d_delete,
+ .d_release = afs_d_release,
+ .d_automount = afs_d_automount,
+ .d_iput = afs_d_iput,
+};
+
+struct afs_lookup_one_cookie {
+ struct dir_context ctx;
+ struct qstr name;
+ bool found;
+ struct afs_fid fid;
+};
+
+struct afs_lookup_cookie {
+ struct dir_context ctx;
+ struct qstr name;
+ bool found;
+ bool one_only;
+ unsigned short nr_fids;
+ struct afs_fid fids[50];
+};
+
+/*
+ * Drop the refs that we're holding on the folios we were reading into. We've
+ * got refs on the first nr_pages pages.
+ */
+static void afs_dir_read_cleanup(struct afs_read *req)
+{
+ struct address_space *mapping = req->vnode->netfs.inode.i_mapping;
+ struct folio *folio;
+ pgoff_t last = req->nr_pages - 1;
+
+ XA_STATE(xas, &mapping->i_pages, 0);
+
+ if (unlikely(!req->nr_pages))
+ return;
+
+ rcu_read_lock();
+ xas_for_each(&xas, folio, last) {
+ if (xas_retry(&xas, folio))
+ continue;
+ BUG_ON(xa_is_value(folio));
+ ASSERTCMP(folio_file_mapping(folio), ==, mapping);
+
+ folio_put(folio);
+ }
+
+ rcu_read_unlock();
+}
+
+/*
+ * check that a directory folio is valid
+ */
+static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio,
+ loff_t i_size)
+{
+ union afs_xdr_dir_block *block;
+ size_t offset, size;
+ loff_t pos;
+
+ /* Determine how many magic numbers there should be in this folio, but
+ * we must take care because the directory may change size under us.
+ */
+ pos = folio_pos(folio);
+ if (i_size <= pos)
+ goto checked;
+
+ size = min_t(loff_t, folio_size(folio), i_size - pos);
+ for (offset = 0; offset < size; offset += sizeof(*block)) {
+ block = kmap_local_folio(folio, offset);
+ if (block->hdr.magic != AFS_DIR_MAGIC) {
+ printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n",
+ __func__, dvnode->netfs.inode.i_ino,
+ pos, offset, size, ntohs(block->hdr.magic));
+ trace_afs_dir_check_failed(dvnode, pos + offset, i_size);
+ kunmap_local(block);
+ trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
+ goto error;
+ }
+
+ /* Make sure each block is NUL terminated so we can reasonably
+ * use string functions on it. The filenames in the folio
+ * *should* be NUL-terminated anyway.
+ */
+ ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
+
+ kunmap_local(block);
+ }
+checked:
+ afs_stat_v(dvnode, n_read_dir);
+ return true;
+
+error:
+ return false;
+}
+
+/*
+ * Dump the contents of a directory.
+ */
+static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
+{
+ union afs_xdr_dir_block *block;
+ struct address_space *mapping = dvnode->netfs.inode.i_mapping;
+ struct folio *folio;
+ pgoff_t last = req->nr_pages - 1;
+ size_t offset, size;
+
+ XA_STATE(xas, &mapping->i_pages, 0);
+
+ pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
+ dvnode->fid.vid, dvnode->fid.vnode,
+ req->file_size, req->len, req->actual_len);
+ pr_warn("DIR %llx %x %zx %zx\n",
+ req->pos, req->nr_pages,
+ req->iter->iov_offset, iov_iter_count(req->iter));
+
+ xas_for_each(&xas, folio, last) {
+ if (xas_retry(&xas, folio))
+ continue;
+
+ BUG_ON(folio_file_mapping(folio) != mapping);
+
+ size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
+ for (offset = 0; offset < size; offset += sizeof(*block)) {
+ block = kmap_local_folio(folio, offset);
+ pr_warn("[%02lx] %32phN\n", folio_index(folio) + offset, block);
+ kunmap_local(block);
+ }
+ }
+}
+
+/*
+ * Check all the blocks in a directory. All the folios are held pinned.
+ */
+static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
+{
+ struct address_space *mapping = dvnode->netfs.inode.i_mapping;
+ struct folio *folio;
+ pgoff_t last = req->nr_pages - 1;
+ int ret = 0;
+
+ XA_STATE(xas, &mapping->i_pages, 0);
+
+ if (unlikely(!req->nr_pages))
+ return 0;
+
+ rcu_read_lock();
+ xas_for_each(&xas, folio, last) {
+ if (xas_retry(&xas, folio))
+ continue;
+
+ BUG_ON(folio_file_mapping(folio) != mapping);
+
+ if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
+ afs_dir_dump(dvnode, req);
+ ret = -EIO;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ return ret;
+}
+
+/*
+ * open an AFS directory file
+ */
+static int afs_dir_open(struct inode *inode, struct file *file)
+{
+ _enter("{%lu}", inode->i_ino);
+
+ BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
+ BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
+
+ if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags))
+ return -ENOENT;
+
+ return afs_open(inode, file);
+}
+
+/*
+ * Read the directory into the pagecache in one go, scrubbing the previous
+ * contents. The list of folios is returned, pinning them so that they don't
+ * get reclaimed during the iteration.
+ */
+static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
+ __acquires(&dvnode->validate_lock)
+{
+ struct address_space *mapping = dvnode->netfs.inode.i_mapping;
+ struct afs_read *req;
+ loff_t i_size;
+ int nr_pages, i;
+ int ret;
+ loff_t remote_size = 0;
+
+ _enter("");
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return ERR_PTR(-ENOMEM);
+
+ refcount_set(&req->usage, 1);
+ req->vnode = dvnode;
+ req->key = key_get(key);
+ req->cleanup = afs_dir_read_cleanup;
+
+expand:
+ i_size = i_size_read(&dvnode->netfs.inode);
+ if (i_size < remote_size)
+ i_size = remote_size;
+ if (i_size < 2048) {
+ ret = afs_bad(dvnode, afs_file_error_dir_small);
+ goto error;
+ }
+ if (i_size > 2048 * 1024) {
+ trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ ret = -EFBIG;
+ goto error;
+ }
+
+ _enter("%llu", i_size);
+
+ nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
+
+ req->actual_len = i_size; /* May change */
+ req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
+ req->data_version = dvnode->status.data_version; /* May change */
+ iov_iter_xarray(&req->def_iter, ITER_DEST, &dvnode->netfs.inode.i_mapping->i_pages,
+ 0, i_size);
+ req->iter = &req->def_iter;
+
+ /* Fill in any gaps that we might find where the memory reclaimer has
+ * been at work and pin all the folios. If there are any gaps, we will
+ * need to reread the entire directory contents.
+ */
+ i = req->nr_pages;
+ while (i < nr_pages) {
+ struct folio *folio;
+
+ folio = filemap_get_folio(mapping, i);
+ if (!folio) {
+ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ afs_stat_v(dvnode, n_inval);
+
+ ret = -ENOMEM;
+ folio = __filemap_get_folio(mapping,
+ i, FGP_LOCK | FGP_CREAT,
+ mapping->gfp_mask);
+ if (!folio)
+ goto error;
+ folio_attach_private(folio, (void *)1);
+ folio_unlock(folio);
+ }
+
+ req->nr_pages += folio_nr_pages(folio);
+ i += folio_nr_pages(folio);
+ }
+
+ /* If we're going to reload, we need to lock all the pages to prevent
+ * races.
+ */
+ ret = -ERESTARTSYS;
+ if (down_read_killable(&dvnode->validate_lock) < 0)
+ goto error;
+
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ goto success;
+
+ up_read(&dvnode->validate_lock);
+ if (down_write_killable(&dvnode->validate_lock) < 0)
+ goto error;
+
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+ trace_afs_reload_dir(dvnode);
+ ret = afs_fetch_data(dvnode, req);
+ if (ret < 0)
+ goto error_unlock;
+
+ task_io_account_read(PAGE_SIZE * req->nr_pages);
+
+ if (req->len < req->file_size) {
+ /* The content has grown, so we need to expand the
+ * buffer.
+ */
+ up_write(&dvnode->validate_lock);
+ remote_size = req->file_size;
+ goto expand;
+ }
+
+ /* Validate the data we just read. */
+ ret = afs_dir_check(dvnode, req);
+ if (ret < 0)
+ goto error_unlock;
+
+ // TODO: Trim excess pages
+
+ set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+ }
+
+ downgrade_write(&dvnode->validate_lock);
+success:
+ return req;
+
+error_unlock:
+ up_write(&dvnode->validate_lock);
+error:
+ afs_put_read(req);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * deal with one block in an AFS directory
+ */
+static int afs_dir_iterate_block(struct afs_vnode *dvnode,
+ struct dir_context *ctx,
+ union afs_xdr_dir_block *block,
+ unsigned blkoff)
+{
+ union afs_xdr_dirent *dire;
+ unsigned offset, next, curr, nr_slots;
+ size_t nlen;
+ int tmp;
+
+ _enter("%llx,%x", ctx->pos, blkoff);
+
+ curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent);
+
+ /* walk through the block, an entry at a time */
+ for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+ offset < AFS_DIR_SLOTS_PER_BLOCK;
+ offset = next
+ ) {
+ /* skip entries marked unused in the bitmap */
+ if (!(block->hdr.bitmap[offset / 8] &
+ (1 << (offset % 8)))) {
+ _debug("ENT[%zu.%u]: unused",
+ blkoff / sizeof(union afs_xdr_dir_block), offset);
+ next = offset + 1;
+ if (offset >= curr)
+ ctx->pos = blkoff +
+ next * sizeof(union afs_xdr_dirent);
+ continue;
+ }
+
+ /* got a valid entry */
+ dire = &block->dirents[offset];
+ nlen = strnlen(dire->u.name,
+ sizeof(*block) -
+ offset * sizeof(union afs_xdr_dirent));
+ if (nlen > AFSNAMEMAX - 1) {
+ _debug("ENT[%zu]: name too long (len %u/%zu)",
+ blkoff / sizeof(union afs_xdr_dir_block),
+ offset, nlen);
+ return afs_bad(dvnode, afs_file_error_dir_name_too_long);
+ }
+
+ _debug("ENT[%zu.%u]: %s %zu \"%s\"",
+ blkoff / sizeof(union afs_xdr_dir_block), offset,
+ (offset < curr ? "skip" : "fill"),
+ nlen, dire->u.name);
+
+ nr_slots = afs_dir_calc_slots(nlen);
+ next = offset + nr_slots;
+ if (next > AFS_DIR_SLOTS_PER_BLOCK) {
+ _debug("ENT[%zu.%u]:"
+ " %u extends beyond end dir block"
+ " (len %zu)",
+ blkoff / sizeof(union afs_xdr_dir_block),
+ offset, next, nlen);
+ return afs_bad(dvnode, afs_file_error_dir_over_end);
+ }
+
+ /* Check that the name-extension dirents are all allocated */
+ for (tmp = 1; tmp < nr_slots; tmp++) {
+ unsigned int ix = offset + tmp;
+ if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) {
+ _debug("ENT[%zu.u]:"
+ " %u unmarked extension (%u/%u)",
+ blkoff / sizeof(union afs_xdr_dir_block),
+ offset, tmp, nr_slots);
+ return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
+ }
+ }
+
+ /* skip if starts before the current position */
+ if (offset < curr) {
+ if (next > curr)
+ ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ continue;
+ }
+
+ /* Don't expose silly rename entries to userspace. */
+ if (nlen > 6 &&
+ dire->u.name[0] == '.' &&
+ ctx->actor != afs_lookup_filldir &&
+ ctx->actor != afs_lookup_one_filldir &&
+ memcmp(dire->u.name, ".__afs", 6) == 0)
+ continue;
+
+ /* found the next entry */
+ if (!dir_emit(ctx, dire->u.name, nlen,
+ ntohl(dire->u.vnode),
+ (ctx->actor == afs_lookup_filldir ||
+ ctx->actor == afs_lookup_one_filldir)?
+ ntohl(dire->u.unique) : DT_UNKNOWN)) {
+ _leave(" = 0 [full]");
+ return 0;
+ }
+
+ ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ }
+
+ _leave(" = 1 [more]");
+ return 1;
+}
+
+/*
+ * iterate through the data blob that lists the contents of an AFS directory
+ */
+static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
+ struct key *key, afs_dataversion_t *_dir_version)
+{
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ union afs_xdr_dir_block *dblock;
+ struct afs_read *req;
+ struct folio *folio;
+ unsigned offset, size;
+ int ret;
+
+ _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
+
+ if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
+ _leave(" = -ESTALE");
+ return -ESTALE;
+ }
+
+ req = afs_read_dir(dvnode, key);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ *_dir_version = req->data_version;
+
+ /* round the file position up to the next entry boundary */
+ ctx->pos += sizeof(union afs_xdr_dirent) - 1;
+ ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1);
+
+ /* walk through the blocks in sequence */
+ ret = 0;
+ while (ctx->pos < req->actual_len) {
+ /* Fetch the appropriate folio from the directory and re-add it
+ * to the LRU. We have all the pages pinned with an extra ref.
+ */
+ folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE,
+ FGP_ACCESSED, 0);
+ if (!folio) {
+ ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
+ break;
+ }
+
+ offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio);
+ size = min_t(loff_t, folio_size(folio),
+ req->actual_len - folio_file_pos(folio));
+
+ do {
+ dblock = kmap_local_folio(folio, offset);
+ ret = afs_dir_iterate_block(dvnode, ctx, dblock,
+ folio_file_pos(folio) + offset);
+ kunmap_local(dblock);
+ if (ret != 1)
+ goto out;
+
+ } while (offset += sizeof(*dblock), offset < size);
+
+ ret = 0;
+ }
+
+out:
+ up_read(&dvnode->validate_lock);
+ afs_put_read(req);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * read an AFS directory
+ */
+static int afs_readdir(struct file *file, struct dir_context *ctx)
+{
+ afs_dataversion_t dir_version;
+
+ return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file),
+ &dir_version);
+}
+
+/*
+ * Search the directory for a single name
+ * - if afs_dir_iterate_block() spots this function, it'll pass the FID
+ * uniquifier through dtype
+ */
+static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
+ int nlen, loff_t fpos, u64 ino, unsigned dtype)
+{
+ struct afs_lookup_one_cookie *cookie =
+ container_of(ctx, struct afs_lookup_one_cookie, ctx);
+
+ _enter("{%s,%u},%s,%u,,%llu,%u",
+ cookie->name.name, cookie->name.len, name, nlen,
+ (unsigned long long) ino, dtype);
+
+ /* insanity checks first */
+ BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
+ BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
+
+ if (cookie->name.len != nlen ||
+ memcmp(cookie->name.name, name, nlen) != 0) {
+ _leave(" = true [keep looking]");
+ return true;
+ }
+
+ cookie->fid.vnode = ino;
+ cookie->fid.unique = dtype;
+ cookie->found = 1;
+
+ _leave(" = false [found]");
+ return false;
+}
+
+/*
+ * Do a lookup of a single name in a directory
+ * - just returns the FID the dentry name maps to if found
+ */
+static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
+ struct afs_fid *fid, struct key *key,
+ afs_dataversion_t *_dir_version)
+{
+ struct afs_super_info *as = dir->i_sb->s_fs_info;
+ struct afs_lookup_one_cookie cookie = {
+ .ctx.actor = afs_lookup_one_filldir,
+ .name = dentry->d_name,
+ .fid.vid = as->volume->vid
+ };
+ int ret;
+
+ _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
+
+ /* search the directory */
+ ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version);
+ if (ret < 0) {
+ _leave(" = %d [iter]", ret);
+ return ret;
+ }
+
+ if (!cookie.found) {
+ _leave(" = -ENOENT [not found]");
+ return -ENOENT;
+ }
+
+ *fid = cookie.fid;
+ _leave(" = 0 { vn=%llu u=%u }", fid->vnode, fid->unique);
+ return 0;
+}
+
+/*
+ * search the directory for a name
+ * - if afs_dir_iterate_block() spots this function, it'll pass the FID
+ * uniquifier through dtype
+ */
+static bool afs_lookup_filldir(struct dir_context *ctx, const char *name,
+ int nlen, loff_t fpos, u64 ino, unsigned dtype)
+{
+ struct afs_lookup_cookie *cookie =
+ container_of(ctx, struct afs_lookup_cookie, ctx);
+
+ _enter("{%s,%u},%s,%u,,%llu,%u",
+ cookie->name.name, cookie->name.len, name, nlen,
+ (unsigned long long) ino, dtype);
+
+ /* insanity checks first */
+ BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
+ BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
+
+ if (cookie->found) {
+ if (cookie->nr_fids < 50) {
+ cookie->fids[cookie->nr_fids].vnode = ino;
+ cookie->fids[cookie->nr_fids].unique = dtype;
+ cookie->nr_fids++;
+ }
+ } else if (cookie->name.len == nlen &&
+ memcmp(cookie->name.name, name, nlen) == 0) {
+ cookie->fids[1].vnode = ino;
+ cookie->fids[1].unique = dtype;
+ cookie->found = 1;
+ if (cookie->one_only)
+ return false;
+ }
+
+ return cookie->nr_fids < 50;
+}
+
+/*
+ * Deal with the result of a successful lookup operation. Turn all the files
+ * into inodes and save the first one - which is the one we actually want.
+ */
+static void afs_do_lookup_success(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp;
+ struct afs_vnode *vnode;
+ struct inode *inode;
+ u32 abort_code;
+ int i;
+
+ _enter("");
+
+ for (i = 0; i < op->nr_files; i++) {
+ switch (i) {
+ case 0:
+ vp = &op->file[0];
+ abort_code = vp->scb.status.abort_code;
+ if (abort_code != 0) {
+ op->ac.abort_code = abort_code;
+ op->error = afs_abort_to_error(abort_code);
+ }
+ break;
+
+ case 1:
+ vp = &op->file[1];
+ break;
+
+ default:
+ vp = &op->more_files[i - 2];
+ break;
+ }
+
+ if (!vp->scb.have_status && !vp->scb.have_error)
+ continue;
+
+ _debug("do [%u]", i);
+ if (vp->vnode) {
+ if (!test_bit(AFS_VNODE_UNSET, &vp->vnode->flags))
+ afs_vnode_commit_status(op, vp);
+ } else if (vp->scb.status.abort_code == 0) {
+ inode = afs_iget(op, vp);
+ if (!IS_ERR(inode)) {
+ vnode = AFS_FS_I(inode);
+ afs_cache_permit(vnode, op->key,
+ 0 /* Assume vnode->cb_break is 0 */ +
+ op->cb_v_break,
+ &vp->scb);
+ vp->vnode = vnode;
+ vp->put_vnode = true;
+ }
+ } else {
+ _debug("- abort %d %llx:%llx.%x",
+ vp->scb.status.abort_code,
+ vp->fid.vid, vp->fid.vnode, vp->fid.unique);
+ }
+ }
+
+ _leave("");
+}
+
+static const struct afs_operation_ops afs_inline_bulk_status_operation = {
+ .issue_afs_rpc = afs_fs_inline_bulk_status,
+ .issue_yfs_rpc = yfs_fs_inline_bulk_status,
+ .success = afs_do_lookup_success,
+};
+
+static const struct afs_operation_ops afs_lookup_fetch_status_operation = {
+ .issue_afs_rpc = afs_fs_fetch_status,
+ .issue_yfs_rpc = yfs_fs_fetch_status,
+ .success = afs_do_lookup_success,
+ .aborted = afs_check_for_remote_deletion,
+};
+
+/*
+ * See if we know that the server we expect to use doesn't support
+ * FS.InlineBulkStatus.
+ */
+static bool afs_server_supports_ibulk(struct afs_vnode *dvnode)
+{
+ struct afs_server_list *slist;
+ struct afs_volume *volume = dvnode->volume;
+ struct afs_server *server;
+ bool ret = true;
+ int i;
+
+ if (!test_bit(AFS_VOLUME_MAYBE_NO_IBULK, &volume->flags))
+ return true;
+
+ rcu_read_lock();
+ slist = rcu_dereference(volume->servers);
+
+ for (i = 0; i < slist->nr_servers; i++) {
+ server = slist->servers[i].server;
+ if (server == dvnode->cb_server) {
+ if (test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags))
+ ret = false;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ return ret;
+}
+
+/*
+ * Do a lookup in a directory. We make use of bulk lookup to query a slew of
+ * files in one go and create inodes for them. The inode of the file we were
+ * asked for is returned.
+ */
+static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
+ struct key *key)
+{
+ struct afs_lookup_cookie *cookie;
+ struct afs_vnode_param *vp;
+ struct afs_operation *op;
+ struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
+ struct inode *inode = NULL, *ti;
+ afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
+ long ret;
+ int i;
+
+ _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
+
+ cookie = kzalloc(sizeof(struct afs_lookup_cookie), GFP_KERNEL);
+ if (!cookie)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < ARRAY_SIZE(cookie->fids); i++)
+ cookie->fids[i].vid = dvnode->fid.vid;
+ cookie->ctx.actor = afs_lookup_filldir;
+ cookie->name = dentry->d_name;
+ cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want
+ * and slot 1 for the directory */
+
+ if (!afs_server_supports_ibulk(dvnode))
+ cookie->one_only = true;
+
+ /* search the directory */
+ ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version);
+ if (ret < 0)
+ goto out;
+
+ dentry->d_fsdata = (void *)(unsigned long)data_version;
+
+ ret = -ENOENT;
+ if (!cookie->found)
+ goto out;
+
+ /* Check to see if we already have an inode for the primary fid. */
+ inode = ilookup5(dir->i_sb, cookie->fids[1].vnode,
+ afs_ilookup5_test_by_fid, &cookie->fids[1]);
+ if (inode)
+ goto out; /* We do */
+
+ /* Okay, we didn't find it. We need to query the server - and whilst
+ * we're doing that, we're going to attempt to look up a bunch of other
+ * vnodes also.
+ */
+ op = afs_alloc_operation(NULL, dvnode->volume);
+ if (IS_ERR(op)) {
+ ret = PTR_ERR(op);
+ goto out;
+ }
+
+ afs_op_set_vnode(op, 0, dvnode);
+ afs_op_set_fid(op, 1, &cookie->fids[1]);
+
+ op->nr_files = cookie->nr_fids;
+ _debug("nr_files %u", op->nr_files);
+
+ /* Need space for examining all the selected files */
+ op->error = -ENOMEM;
+ if (op->nr_files > 2) {
+ op->more_files = kvcalloc(op->nr_files - 2,
+ sizeof(struct afs_vnode_param),
+ GFP_KERNEL);
+ if (!op->more_files)
+ goto out_op;
+
+ for (i = 2; i < op->nr_files; i++) {
+ vp = &op->more_files[i - 2];
+ vp->fid = cookie->fids[i];
+
+ /* Find any inodes that already exist and get their
+ * callback counters.
+ */
+ ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
+ afs_ilookup5_test_by_fid, &vp->fid);
+ if (!IS_ERR_OR_NULL(ti)) {
+ vnode = AFS_FS_I(ti);
+ vp->dv_before = vnode->status.data_version;
+ vp->cb_break_before = afs_calc_vnode_cb_break(vnode);
+ vp->vnode = vnode;
+ vp->put_vnode = true;
+ vp->speculative = true; /* vnode not locked */
+ }
+ }
+ }
+
+ /* Try FS.InlineBulkStatus first. Abort codes for the individual
+ * lookups contained therein are stored in the reply without aborting
+ * the whole operation.
+ */
+ op->error = -ENOTSUPP;
+ if (!cookie->one_only) {
+ op->ops = &afs_inline_bulk_status_operation;
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+ }
+
+ if (op->error == -ENOTSUPP) {
+ /* We could try FS.BulkStatus next, but this aborts the entire
+ * op if any of the lookups fails - so, for the moment, revert
+ * to FS.FetchStatus for op->file[1].
+ */
+ op->fetch_status.which = 1;
+ op->ops = &afs_lookup_fetch_status_operation;
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+ }
+ inode = ERR_PTR(op->error);
+
+out_op:
+ if (op->error == 0) {
+ inode = &op->file[1].vnode->netfs.inode;
+ op->file[1].vnode = NULL;
+ }
+
+ if (op->file[0].scb.have_status)
+ dentry->d_fsdata = (void *)(unsigned long)op->file[0].scb.status.data_version;
+ else
+ dentry->d_fsdata = (void *)(unsigned long)op->file[0].dv_before;
+ ret = afs_put_operation(op);
+out:
+ kfree(cookie);
+ _leave("");
+ return inode ?: ERR_PTR(ret);
+}
+
+/*
+ * Look up an entry in a directory with @sys substitution.
+ */
+static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry,
+ struct key *key)
+{
+ struct afs_sysnames *subs;
+ struct afs_net *net = afs_i2net(dir);
+ struct dentry *ret;
+ char *buf, *p, *name;
+ int len, i;
+
+ _enter("");
+
+ ret = ERR_PTR(-ENOMEM);
+ p = buf = kmalloc(AFSNAMEMAX, GFP_KERNEL);
+ if (!buf)
+ goto out_p;
+ if (dentry->d_name.len > 4) {
+ memcpy(p, dentry->d_name.name, dentry->d_name.len - 4);
+ p += dentry->d_name.len - 4;
+ }
+
+ /* There is an ordered list of substitutes that we have to try. */
+ read_lock(&net->sysnames_lock);
+ subs = net->sysnames;
+ refcount_inc(&subs->usage);
+ read_unlock(&net->sysnames_lock);
+
+ for (i = 0; i < subs->nr; i++) {
+ name = subs->subs[i];
+ len = dentry->d_name.len - 4 + strlen(name);
+ if (len >= AFSNAMEMAX) {
+ ret = ERR_PTR(-ENAMETOOLONG);
+ goto out_s;
+ }
+
+ strcpy(p, name);
+ ret = lookup_one_len(buf, dentry->d_parent, len);
+ if (IS_ERR(ret) || d_is_positive(ret))
+ goto out_s;
+ dput(ret);
+ }
+
+ /* We don't want to d_add() the @sys dentry here as we don't want to
+ * the cached dentry to hide changes to the sysnames list.
+ */
+ ret = NULL;
+out_s:
+ afs_put_sysnames(subs);
+ kfree(buf);
+out_p:
+ key_put(key);
+ return ret;
+}
+
+/*
+ * look up an entry in a directory
+ */
+static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_fid fid = {};
+ struct inode *inode;
+ struct dentry *d;
+ struct key *key;
+ int ret;
+
+ _enter("{%llx:%llu},%p{%pd},",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry);
+
+ ASSERTCMP(d_inode(dentry), ==, NULL);
+
+ if (dentry->d_name.len >= AFSNAMEMAX) {
+ _leave(" = -ENAMETOOLONG");
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+
+ if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) {
+ _leave(" = -ESTALE");
+ return ERR_PTR(-ESTALE);
+ }
+
+ key = afs_request_key(dvnode->volume->cell);
+ if (IS_ERR(key)) {
+ _leave(" = %ld [key]", PTR_ERR(key));
+ return ERR_CAST(key);
+ }
+
+ ret = afs_validate(dvnode, key);
+ if (ret < 0) {
+ key_put(key);
+ _leave(" = %d [val]", ret);
+ return ERR_PTR(ret);
+ }
+
+ if (dentry->d_name.len >= 4 &&
+ dentry->d_name.name[dentry->d_name.len - 4] == '@' &&
+ dentry->d_name.name[dentry->d_name.len - 3] == 's' &&
+ dentry->d_name.name[dentry->d_name.len - 2] == 'y' &&
+ dentry->d_name.name[dentry->d_name.len - 1] == 's')
+ return afs_lookup_atsys(dir, dentry, key);
+
+ afs_stat_v(dvnode, n_lookup);
+ inode = afs_do_lookup(dir, dentry, key);
+ key_put(key);
+ if (inode == ERR_PTR(-ENOENT))
+ inode = afs_try_auto_mntpt(dentry, dir);
+
+ if (!IS_ERR_OR_NULL(inode))
+ fid = AFS_FS_I(inode)->fid;
+
+ _debug("splice %p", dentry->d_inode);
+ d = d_splice_alias(inode, dentry);
+ if (!IS_ERR_OR_NULL(d)) {
+ d->d_fsdata = dentry->d_fsdata;
+ trace_afs_lookup(dvnode, &d->d_name, &fid);
+ } else {
+ trace_afs_lookup(dvnode, &dentry->d_name, &fid);
+ }
+ _leave("");
+ return d;
+}
+
+/*
+ * Check the validity of a dentry under RCU conditions.
+ */
+static int afs_d_revalidate_rcu(struct dentry *dentry)
+{
+ struct afs_vnode *dvnode;
+ struct dentry *parent;
+ struct inode *dir;
+ long dir_version, de_version;
+
+ _enter("%p", dentry);
+
+ /* Check the parent directory is still valid first. */
+ parent = READ_ONCE(dentry->d_parent);
+ dir = d_inode_rcu(parent);
+ if (!dir)
+ return -ECHILD;
+ dvnode = AFS_FS_I(dir);
+ if (test_bit(AFS_VNODE_DELETED, &dvnode->flags))
+ return -ECHILD;
+
+ if (!afs_check_validity(dvnode))
+ return -ECHILD;
+
+ /* We only need to invalidate a dentry if the server's copy changed
+ * behind our back. If we made the change, it's no problem. Note that
+ * on a 32-bit system, we only have 32 bits in the dentry to store the
+ * version.
+ */
+ dir_version = (long)READ_ONCE(dvnode->status.data_version);
+ de_version = (long)READ_ONCE(dentry->d_fsdata);
+ if (de_version != dir_version) {
+ dir_version = (long)READ_ONCE(dvnode->invalid_before);
+ if (de_version - dir_version < 0)
+ return -ECHILD;
+ }
+
+ return 1; /* Still valid */
+}
+
+/*
+ * check that a dentry lookup hit has found a valid entry
+ * - NOTE! the hit can be a negative hit too, so we can't assume we have an
+ * inode
+ */
+static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct afs_vnode *vnode, *dir;
+ struct afs_fid fid;
+ struct dentry *parent;
+ struct inode *inode;
+ struct key *key;
+ afs_dataversion_t dir_version, invalid_before;
+ long de_version;
+ int ret;
+
+ if (flags & LOOKUP_RCU)
+ return afs_d_revalidate_rcu(dentry);
+
+ if (d_really_is_positive(dentry)) {
+ vnode = AFS_FS_I(d_inode(dentry));
+ _enter("{v={%llx:%llu} n=%pd fl=%lx},",
+ vnode->fid.vid, vnode->fid.vnode, dentry,
+ vnode->flags);
+ } else {
+ _enter("{neg n=%pd}", dentry);
+ }
+
+ key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
+ if (IS_ERR(key))
+ key = NULL;
+
+ /* Hold the parent dentry so we can peer at it */
+ parent = dget_parent(dentry);
+ dir = AFS_FS_I(d_inode(parent));
+
+ /* validate the parent directory */
+ afs_validate(dir, key);
+
+ if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
+ _debug("%pd: parent dir deleted", dentry);
+ goto not_found;
+ }
+
+ /* We only need to invalidate a dentry if the server's copy changed
+ * behind our back. If we made the change, it's no problem. Note that
+ * on a 32-bit system, we only have 32 bits in the dentry to store the
+ * version.
+ */
+ dir_version = dir->status.data_version;
+ de_version = (long)dentry->d_fsdata;
+ if (de_version == (long)dir_version)
+ goto out_valid_noupdate;
+
+ invalid_before = dir->invalid_before;
+ if (de_version - (long)invalid_before >= 0)
+ goto out_valid;
+
+ _debug("dir modified");
+ afs_stat_v(dir, n_reval);
+
+ /* search the directory for this vnode */
+ ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, key, &dir_version);
+ switch (ret) {
+ case 0:
+ /* the filename maps to something */
+ if (d_really_is_negative(dentry))
+ goto not_found;
+ inode = d_inode(dentry);
+ if (is_bad_inode(inode)) {
+ printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
+ dentry);
+ goto not_found;
+ }
+
+ vnode = AFS_FS_I(inode);
+
+ /* if the vnode ID has changed, then the dirent points to a
+ * different file */
+ if (fid.vnode != vnode->fid.vnode) {
+ _debug("%pd: dirent changed [%llu != %llu]",
+ dentry, fid.vnode,
+ vnode->fid.vnode);
+ goto not_found;
+ }
+
+ /* if the vnode ID uniqifier has changed, then the file has
+ * been deleted and replaced, and the original vnode ID has
+ * been reused */
+ if (fid.unique != vnode->fid.unique) {
+ _debug("%pd: file deleted (uq %u -> %u I:%u)",
+ dentry, fid.unique,
+ vnode->fid.unique,
+ vnode->netfs.inode.i_generation);
+ goto not_found;
+ }
+ goto out_valid;
+
+ case -ENOENT:
+ /* the filename is unknown */
+ _debug("%pd: dirent not found", dentry);
+ if (d_really_is_positive(dentry))
+ goto not_found;
+ goto out_valid;
+
+ default:
+ _debug("failed to iterate dir %pd: %d",
+ parent, ret);
+ goto not_found;
+ }
+
+out_valid:
+ dentry->d_fsdata = (void *)(unsigned long)dir_version;
+out_valid_noupdate:
+ dput(parent);
+ key_put(key);
+ _leave(" = 1 [valid]");
+ return 1;
+
+not_found:
+ _debug("dropping dentry %pd2", dentry);
+ dput(parent);
+ key_put(key);
+
+ _leave(" = 0 [bad]");
+ return 0;
+}
+
+/*
+ * allow the VFS to enquire as to whether a dentry should be unhashed (mustn't
+ * sleep)
+ * - called from dput() when d_count is going to 0.
+ * - return 1 to request dentry be unhashed, 0 otherwise
+ */
+static int afs_d_delete(const struct dentry *dentry)
+{
+ _enter("%pd", dentry);
+
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ goto zap;
+
+ if (d_really_is_positive(dentry) &&
+ (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(d_inode(dentry))->flags) ||
+ test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(d_inode(dentry))->flags)))
+ goto zap;
+
+ _leave(" = 0 [keep]");
+ return 0;
+
+zap:
+ _leave(" = 1 [zap]");
+ return 1;
+}
+
+/*
+ * Clean up sillyrename files on dentry removal.
+ */
+static void afs_d_iput(struct dentry *dentry, struct inode *inode)
+{
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ afs_silly_iput(dentry, inode);
+ iput(inode);
+}
+
+/*
+ * handle dentry release
+ */
+void afs_d_release(struct dentry *dentry)
+{
+ _enter("%pd", dentry);
+}
+
+void afs_check_for_remote_deletion(struct afs_operation *op)
+{
+ struct afs_vnode *vnode = op->file[0].vnode;
+
+ switch (op->ac.abort_code) {
+ case VNOVNODE:
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ afs_break_callback(vnode, afs_cb_break_for_deleted);
+ }
+}
+
+/*
+ * Create a new inode for create/mkdir/symlink
+ */
+static void afs_vnode_new_inode(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[1];
+ struct afs_vnode *vnode;
+ struct inode *inode;
+
+ _enter("");
+
+ ASSERTCMP(op->error, ==, 0);
+
+ inode = afs_iget(op, vp);
+ if (IS_ERR(inode)) {
+ /* ENOMEM or EINTR at a really inconvenient time - just abandon
+ * the new directory on the server.
+ */
+ op->error = PTR_ERR(inode);
+ return;
+ }
+
+ vnode = AFS_FS_I(inode);
+ set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+ if (!op->error)
+ afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb);
+ d_instantiate(op->dentry, inode);
+}
+
+static void afs_create_success(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+ op->ctime = op->file[0].scb.status.mtime_client;
+ afs_vnode_commit_status(op, &op->file[0]);
+ afs_update_dentry_version(op, &op->file[0], op->dentry);
+ afs_vnode_new_inode(op);
+}
+
+static void afs_create_edit_dir(struct afs_operation *op)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ struct afs_vnode *dvnode = dvp->vnode;
+
+ _enter("op=%08x", op->debug_id);
+
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
+ afs_edit_dir_add(dvnode, &op->dentry->d_name, &vp->fid,
+ op->create.reason);
+ up_write(&dvnode->validate_lock);
+}
+
+static void afs_create_put(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+
+ if (op->error)
+ d_drop(op->dentry);
+}
+
+static const struct afs_operation_ops afs_mkdir_operation = {
+ .issue_afs_rpc = afs_fs_make_dir,
+ .issue_yfs_rpc = yfs_fs_make_dir,
+ .success = afs_create_success,
+ .aborted = afs_check_for_remote_deletion,
+ .edit_dir = afs_create_edit_dir,
+ .put = afs_create_put,
+};
+
+/*
+ * create a directory on an AFS filesystem
+ */
+static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
+{
+ struct afs_operation *op;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+
+ _enter("{%llx:%llu},{%pd},%ho",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
+
+ op = afs_alloc_operation(NULL, dvnode->volume);
+ if (IS_ERR(op)) {
+ d_drop(dentry);
+ return PTR_ERR(op);
+ }
+
+ afs_op_set_vnode(op, 0, dvnode);
+ op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[0].update_ctime = true;
+ op->dentry = dentry;
+ op->create.mode = S_IFDIR | mode;
+ op->create.reason = afs_edit_dir_for_mkdir;
+ op->mtime = current_time(dir);
+ op->ops = &afs_mkdir_operation;
+ return afs_do_sync_operation(op);
+}
+
+/*
+ * Remove a subdir from a directory.
+ */
+static void afs_dir_remove_subdir(struct dentry *dentry)
+{
+ if (d_really_is_positive(dentry)) {
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+
+ clear_nlink(&vnode->netfs.inode);
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ }
+}
+
+static void afs_rmdir_success(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+ op->ctime = op->file[0].scb.status.mtime_client;
+ afs_vnode_commit_status(op, &op->file[0]);
+ afs_update_dentry_version(op, &op->file[0], op->dentry);
+}
+
+static void afs_rmdir_edit_dir(struct afs_operation *op)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode *dvnode = dvp->vnode;
+
+ _enter("op=%08x", op->debug_id);
+ afs_dir_remove_subdir(op->dentry);
+
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
+ afs_edit_dir_remove(dvnode, &op->dentry->d_name,
+ afs_edit_dir_for_rmdir);
+ up_write(&dvnode->validate_lock);
+}
+
+static void afs_rmdir_put(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+ if (op->file[1].vnode)
+ up_write(&op->file[1].vnode->rmdir_lock);
+}
+
+static const struct afs_operation_ops afs_rmdir_operation = {
+ .issue_afs_rpc = afs_fs_remove_dir,
+ .issue_yfs_rpc = yfs_fs_remove_dir,
+ .success = afs_rmdir_success,
+ .aborted = afs_check_for_remote_deletion,
+ .edit_dir = afs_rmdir_edit_dir,
+ .put = afs_rmdir_put,
+};
+
+/*
+ * remove a directory from an AFS filesystem
+ */
+static int afs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ struct afs_operation *op;
+ struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
+ int ret;
+
+ _enter("{%llx:%llu},{%pd}",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry);
+
+ op = afs_alloc_operation(NULL, dvnode->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ afs_op_set_vnode(op, 0, dvnode);
+ op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[0].update_ctime = true;
+
+ op->dentry = dentry;
+ op->ops = &afs_rmdir_operation;
+
+ /* Try to make sure we have a callback promise on the victim. */
+ if (d_really_is_positive(dentry)) {
+ vnode = AFS_FS_I(d_inode(dentry));
+ ret = afs_validate(vnode, op->key);
+ if (ret < 0)
+ goto error;
+ }
+
+ if (vnode) {
+ ret = down_write_killable(&vnode->rmdir_lock);
+ if (ret < 0)
+ goto error;
+ op->file[1].vnode = vnode;
+ }
+
+ return afs_do_sync_operation(op);
+
+error:
+ return afs_put_operation(op);
+}
+
+/*
+ * Remove a link to a file or symlink from a directory.
+ *
+ * If the file was not deleted due to excess hard links, the fileserver will
+ * break the callback promise on the file - if it had one - before it returns
+ * to us, and if it was deleted, it won't
+ *
+ * However, if we didn't have a callback promise outstanding, or it was
+ * outstanding on a different server, then it won't break it either...
+ */
+static void afs_dir_remove_link(struct afs_operation *op)
+{
+ struct afs_vnode *dvnode = op->file[0].vnode;
+ struct afs_vnode *vnode = op->file[1].vnode;
+ struct dentry *dentry = op->dentry;
+ int ret;
+
+ if (op->error != 0 ||
+ (op->file[1].scb.have_status && op->file[1].scb.have_error))
+ return;
+ if (d_really_is_positive(dentry))
+ return;
+
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ /* Already done */
+ } else if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+ write_seqlock(&vnode->cb_lock);
+ drop_nlink(&vnode->netfs.inode);
+ if (vnode->netfs.inode.i_nlink == 0) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ __afs_break_callback(vnode, afs_cb_break_for_unlink);
+ }
+ write_sequnlock(&vnode->cb_lock);
+ } else {
+ afs_break_callback(vnode, afs_cb_break_for_unlink);
+
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ _debug("AFS_VNODE_DELETED");
+
+ ret = afs_validate(vnode, op->key);
+ if (ret != -ESTALE)
+ op->error = ret;
+ }
+
+ _debug("nlink %d [val %d]", vnode->netfs.inode.i_nlink, op->error);
+}
+
+static void afs_unlink_success(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+ op->ctime = op->file[0].scb.status.mtime_client;
+ afs_check_dir_conflict(op, &op->file[0]);
+ afs_vnode_commit_status(op, &op->file[0]);
+ afs_vnode_commit_status(op, &op->file[1]);
+ afs_update_dentry_version(op, &op->file[0], op->dentry);
+ afs_dir_remove_link(op);
+}
+
+static void afs_unlink_edit_dir(struct afs_operation *op)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode *dvnode = dvp->vnode;
+
+ _enter("op=%08x", op->debug_id);
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
+ afs_edit_dir_remove(dvnode, &op->dentry->d_name,
+ afs_edit_dir_for_unlink);
+ up_write(&dvnode->validate_lock);
+}
+
+static void afs_unlink_put(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+ if (op->unlink.need_rehash && op->error < 0 && op->error != -ENOENT)
+ d_rehash(op->dentry);
+}
+
+static const struct afs_operation_ops afs_unlink_operation = {
+ .issue_afs_rpc = afs_fs_remove_file,
+ .issue_yfs_rpc = yfs_fs_remove_file,
+ .success = afs_unlink_success,
+ .aborted = afs_check_for_remote_deletion,
+ .edit_dir = afs_unlink_edit_dir,
+ .put = afs_unlink_put,
+};
+
+/*
+ * Remove a file or symlink from an AFS filesystem.
+ */
+static int afs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct afs_operation *op;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+ int ret;
+
+ _enter("{%llx:%llu},{%pd}",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry);
+
+ if (dentry->d_name.len >= AFSNAMEMAX)
+ return -ENAMETOOLONG;
+
+ op = afs_alloc_operation(NULL, dvnode->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ afs_op_set_vnode(op, 0, dvnode);
+ op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[0].update_ctime = true;
+
+ /* Try to make sure we have a callback promise on the victim. */
+ ret = afs_validate(vnode, op->key);
+ if (ret < 0) {
+ op->error = ret;
+ goto error;
+ }
+
+ spin_lock(&dentry->d_lock);
+ if (d_count(dentry) > 1) {
+ spin_unlock(&dentry->d_lock);
+ /* Start asynchronous writeout of the inode */
+ write_inode_now(d_inode(dentry), 0);
+ op->error = afs_sillyrename(dvnode, vnode, dentry, op->key);
+ goto error;
+ }
+ if (!d_unhashed(dentry)) {
+ /* Prevent a race with RCU lookup. */
+ __d_drop(dentry);
+ op->unlink.need_rehash = true;
+ }
+ spin_unlock(&dentry->d_lock);
+
+ op->file[1].vnode = vnode;
+ op->file[1].update_ctime = true;
+ op->file[1].op_unlinked = true;
+ op->dentry = dentry;
+ op->ops = &afs_unlink_operation;
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+
+ /* If there was a conflict with a third party, check the status of the
+ * unlinked vnode.
+ */
+ if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ op->file[1].update_ctime = false;
+ op->fetch_status.which = 1;
+ op->ops = &afs_fetch_status_operation;
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+ }
+
+ return afs_put_operation(op);
+
+error:
+ return afs_put_operation(op);
+}
+
+static const struct afs_operation_ops afs_create_operation = {
+ .issue_afs_rpc = afs_fs_create_file,
+ .issue_yfs_rpc = yfs_fs_create_file,
+ .success = afs_create_success,
+ .aborted = afs_check_for_remote_deletion,
+ .edit_dir = afs_create_edit_dir,
+ .put = afs_create_put,
+};
+
+/*
+ * create a regular file on an AFS filesystem
+ */
+static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
+{
+ struct afs_operation *op;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ int ret = -ENAMETOOLONG;
+
+ _enter("{%llx:%llu},{%pd},%ho",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
+
+ if (dentry->d_name.len >= AFSNAMEMAX)
+ goto error;
+
+ op = afs_alloc_operation(NULL, dvnode->volume);
+ if (IS_ERR(op)) {
+ ret = PTR_ERR(op);
+ goto error;
+ }
+
+ afs_op_set_vnode(op, 0, dvnode);
+ op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[0].update_ctime = true;
+
+ op->dentry = dentry;
+ op->create.mode = S_IFREG | mode;
+ op->create.reason = afs_edit_dir_for_create;
+ op->mtime = current_time(dir);
+ op->ops = &afs_create_operation;
+ return afs_do_sync_operation(op);
+
+error:
+ d_drop(dentry);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+static void afs_link_success(struct afs_operation *op)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+
+ _enter("op=%08x", op->debug_id);
+ op->ctime = dvp->scb.status.mtime_client;
+ afs_vnode_commit_status(op, dvp);
+ afs_vnode_commit_status(op, vp);
+ afs_update_dentry_version(op, dvp, op->dentry);
+ if (op->dentry_2->d_parent == op->dentry->d_parent)
+ afs_update_dentry_version(op, dvp, op->dentry_2);
+ ihold(&vp->vnode->netfs.inode);
+ d_instantiate(op->dentry, &vp->vnode->netfs.inode);
+}
+
+static void afs_link_put(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+ if (op->error)
+ d_drop(op->dentry);
+}
+
+static const struct afs_operation_ops afs_link_operation = {
+ .issue_afs_rpc = afs_fs_link,
+ .issue_yfs_rpc = yfs_fs_link,
+ .success = afs_link_success,
+ .aborted = afs_check_for_remote_deletion,
+ .edit_dir = afs_create_edit_dir,
+ .put = afs_link_put,
+};
+
+/*
+ * create a hard link between files in an AFS filesystem
+ */
+static int afs_link(struct dentry *from, struct inode *dir,
+ struct dentry *dentry)
+{
+ struct afs_operation *op;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(from));
+ int ret = -ENAMETOOLONG;
+
+ _enter("{%llx:%llu},{%llx:%llu},{%pd}",
+ vnode->fid.vid, vnode->fid.vnode,
+ dvnode->fid.vid, dvnode->fid.vnode,
+ dentry);
+
+ if (dentry->d_name.len >= AFSNAMEMAX)
+ goto error;
+
+ op = afs_alloc_operation(NULL, dvnode->volume);
+ if (IS_ERR(op)) {
+ ret = PTR_ERR(op);
+ goto error;
+ }
+
+ ret = afs_validate(vnode, op->key);
+ if (ret < 0)
+ goto error_op;
+
+ afs_op_set_vnode(op, 0, dvnode);
+ afs_op_set_vnode(op, 1, vnode);
+ op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[0].update_ctime = true;
+ op->file[1].update_ctime = true;
+
+ op->dentry = dentry;
+ op->dentry_2 = from;
+ op->ops = &afs_link_operation;
+ op->create.reason = afs_edit_dir_for_link;
+ return afs_do_sync_operation(op);
+
+error_op:
+ afs_put_operation(op);
+error:
+ d_drop(dentry);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+static const struct afs_operation_ops afs_symlink_operation = {
+ .issue_afs_rpc = afs_fs_symlink,
+ .issue_yfs_rpc = yfs_fs_symlink,
+ .success = afs_create_success,
+ .aborted = afs_check_for_remote_deletion,
+ .edit_dir = afs_create_edit_dir,
+ .put = afs_create_put,
+};
+
+/*
+ * create a symlink in an AFS filesystem
+ */
+static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *content)
+{
+ struct afs_operation *op;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ int ret;
+
+ _enter("{%llx:%llu},{%pd},%s",
+ dvnode->fid.vid, dvnode->fid.vnode, dentry,
+ content);
+
+ ret = -ENAMETOOLONG;
+ if (dentry->d_name.len >= AFSNAMEMAX)
+ goto error;
+
+ ret = -EINVAL;
+ if (strlen(content) >= AFSPATHMAX)
+ goto error;
+
+ op = afs_alloc_operation(NULL, dvnode->volume);
+ if (IS_ERR(op)) {
+ ret = PTR_ERR(op);
+ goto error;
+ }
+
+ afs_op_set_vnode(op, 0, dvnode);
+ op->file[0].dv_delta = 1;
+
+ op->dentry = dentry;
+ op->ops = &afs_symlink_operation;
+ op->create.reason = afs_edit_dir_for_symlink;
+ op->create.symlink = content;
+ op->mtime = current_time(dir);
+ return afs_do_sync_operation(op);
+
+error:
+ d_drop(dentry);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+static void afs_rename_success(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+
+ op->ctime = op->file[0].scb.status.mtime_client;
+ afs_check_dir_conflict(op, &op->file[1]);
+ afs_vnode_commit_status(op, &op->file[0]);
+ if (op->file[1].vnode != op->file[0].vnode) {
+ op->ctime = op->file[1].scb.status.mtime_client;
+ afs_vnode_commit_status(op, &op->file[1]);
+ }
+}
+
+static void afs_rename_edit_dir(struct afs_operation *op)
+{
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ struct afs_vnode *orig_dvnode = orig_dvp->vnode;
+ struct afs_vnode *new_dvnode = new_dvp->vnode;
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry));
+ struct dentry *old_dentry = op->dentry;
+ struct dentry *new_dentry = op->dentry_2;
+ struct inode *new_inode;
+
+ _enter("op=%08x", op->debug_id);
+
+ if (op->rename.rehash) {
+ d_rehash(op->rename.rehash);
+ op->rename.rehash = NULL;
+ }
+
+ down_write(&orig_dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
+ orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta)
+ afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
+ afs_edit_dir_for_rename_0);
+
+ if (new_dvnode != orig_dvnode) {
+ up_write(&orig_dvnode->validate_lock);
+ down_write(&new_dvnode->validate_lock);
+ }
+
+ if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) &&
+ new_dvnode->status.data_version == new_dvp->dv_before + new_dvp->dv_delta) {
+ if (!op->rename.new_negative)
+ afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
+ afs_edit_dir_for_rename_1);
+
+ afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
+ &vnode->fid, afs_edit_dir_for_rename_2);
+ }
+
+ new_inode = d_inode(new_dentry);
+ if (new_inode) {
+ spin_lock(&new_inode->i_lock);
+ if (S_ISDIR(new_inode->i_mode))
+ clear_nlink(new_inode);
+ else if (new_inode->i_nlink > 0)
+ drop_nlink(new_inode);
+ spin_unlock(&new_inode->i_lock);
+ }
+
+ /* Now we can update d_fsdata on the dentries to reflect their
+ * new parent's data_version.
+ *
+ * Note that if we ever implement RENAME_EXCHANGE, we'll have
+ * to update both dentries with opposing dir versions.
+ */
+ afs_update_dentry_version(op, new_dvp, op->dentry);
+ afs_update_dentry_version(op, new_dvp, op->dentry_2);
+
+ d_move(old_dentry, new_dentry);
+
+ up_write(&new_dvnode->validate_lock);
+}
+
+static void afs_rename_put(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+ if (op->rename.rehash)
+ d_rehash(op->rename.rehash);
+ dput(op->rename.tmp);
+ if (op->error)
+ d_rehash(op->dentry);
+}
+
+static const struct afs_operation_ops afs_rename_operation = {
+ .issue_afs_rpc = afs_fs_rename,
+ .issue_yfs_rpc = yfs_fs_rename,
+ .success = afs_rename_success,
+ .edit_dir = afs_rename_edit_dir,
+ .put = afs_rename_put,
+};
+
+/*
+ * rename a file in an AFS filesystem and/or move it between directories
+ */
+static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ struct dentry *old_dentry, struct inode *new_dir,
+ struct dentry *new_dentry, unsigned int flags)
+{
+ struct afs_operation *op;
+ struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
+ int ret;
+
+ if (flags)
+ return -EINVAL;
+
+ /* Don't allow silly-rename files be moved around. */
+ if (old_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ return -EINVAL;
+
+ vnode = AFS_FS_I(d_inode(old_dentry));
+ orig_dvnode = AFS_FS_I(old_dir);
+ new_dvnode = AFS_FS_I(new_dir);
+
+ _enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}",
+ orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
+ vnode->fid.vid, vnode->fid.vnode,
+ new_dvnode->fid.vid, new_dvnode->fid.vnode,
+ new_dentry);
+
+ op = afs_alloc_operation(NULL, orig_dvnode->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ ret = afs_validate(vnode, op->key);
+ op->error = ret;
+ if (ret < 0)
+ goto error;
+
+ afs_op_set_vnode(op, 0, orig_dvnode);
+ afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */
+ op->file[0].dv_delta = 1;
+ op->file[1].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[1].modification = true;
+ op->file[0].update_ctime = true;
+ op->file[1].update_ctime = true;
+
+ op->dentry = old_dentry;
+ op->dentry_2 = new_dentry;
+ op->rename.new_negative = d_is_negative(new_dentry);
+ op->ops = &afs_rename_operation;
+
+ /* For non-directories, check whether the target is busy and if so,
+ * make a copy of the dentry and then do a silly-rename. If the
+ * silly-rename succeeds, the copied dentry is hashed and becomes the
+ * new target.
+ */
+ if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) {
+ /* To prevent any new references to the target during the
+ * rename, we unhash the dentry in advance.
+ */
+ if (!d_unhashed(new_dentry)) {
+ d_drop(new_dentry);
+ op->rename.rehash = new_dentry;
+ }
+
+ if (d_count(new_dentry) > 2) {
+ /* copy the target dentry's name */
+ op->rename.tmp = d_alloc(new_dentry->d_parent,
+ &new_dentry->d_name);
+ if (!op->rename.tmp) {
+ op->error = -ENOMEM;
+ goto error;
+ }
+
+ ret = afs_sillyrename(new_dvnode,
+ AFS_FS_I(d_inode(new_dentry)),
+ new_dentry, op->key);
+ if (ret) {
+ op->error = ret;
+ goto error;
+ }
+
+ op->dentry_2 = op->rename.tmp;
+ op->rename.rehash = NULL;
+ op->rename.new_negative = true;
+ }
+ }
+
+ /* This bit is potentially nasty as there's a potential race with
+ * afs_d_revalidate{,_rcu}(). We have to change d_fsdata on the dentry
+ * to reflect it's new parent's new data_version after the op, but
+ * d_revalidate may see old_dentry between the op having taken place
+ * and the version being updated.
+ *
+ * So drop the old_dentry for now to make other threads go through
+ * lookup instead - which we hold a lock against.
+ */
+ d_drop(old_dentry);
+
+ return afs_do_sync_operation(op);
+
+error:
+ return afs_put_operation(op);
+}
+
+/*
+ * Release a directory folio and clean up its private state if it's not busy
+ * - return true if the folio can now be released, false if not
+ */
+static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags)
+{
+ struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
+
+ _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio_index(folio));
+
+ folio_detach_private(folio);
+
+ /* The directory will need reloading. */
+ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ afs_stat_v(dvnode, n_relpg);
+ return true;
+}
+
+/*
+ * Invalidate part or all of a folio.
+ */
+static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
+{
+ struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
+
+ _enter("{%lu},%zu,%zu", folio->index, offset, length);
+
+ BUG_ON(!folio_test_locked(folio));
+
+ /* The directory will need reloading. */
+ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ afs_stat_v(dvnode, n_inval);
+
+ /* we clean up only if the entire folio is being invalidated */
+ if (offset == 0 && length == folio_size(folio))
+ folio_detach_private(folio);
+}
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
new file mode 100644
index 000000000..0ab7752d1
--- /dev/null
+++ b/fs/afs/dir_edit.c
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS filesystem directory editing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "xdr_fs.h"
+
+/*
+ * Find a number of contiguous clear bits in a directory block bitmask.
+ *
+ * There are 64 slots, which means we can load the entire bitmap into a
+ * variable. The first bit doesn't count as it corresponds to the block header
+ * slot. nr_slots is between 1 and 9.
+ */
+static int afs_find_contig_bits(union afs_xdr_dir_block *block, unsigned int nr_slots)
+{
+ u64 bitmap;
+ u32 mask;
+ int bit, n;
+
+ bitmap = (u64)block->hdr.bitmap[0] << 0 * 8;
+ bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8;
+ bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8;
+ bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8;
+ bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8;
+ bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8;
+ bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8;
+ bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8;
+ bitmap >>= 1; /* The first entry is metadata */
+ bit = 1;
+ mask = (1 << nr_slots) - 1;
+
+ do {
+ if (sizeof(unsigned long) == 8)
+ n = ffz(bitmap);
+ else
+ n = ((u32)bitmap) != 0 ?
+ ffz((u32)bitmap) :
+ ffz((u32)(bitmap >> 32)) + 32;
+ bitmap >>= n;
+ bit += n;
+
+ if ((bitmap & mask) == 0) {
+ if (bit > 64 - nr_slots)
+ return -1;
+ return bit;
+ }
+
+ n = __ffs(bitmap);
+ bitmap >>= n;
+ bit += n;
+ } while (bitmap);
+
+ return -1;
+}
+
+/*
+ * Set a number of contiguous bits in the directory block bitmap.
+ */
+static void afs_set_contig_bits(union afs_xdr_dir_block *block,
+ int bit, unsigned int nr_slots)
+{
+ u64 mask;
+
+ mask = (1 << nr_slots) - 1;
+ mask <<= bit;
+
+ block->hdr.bitmap[0] |= (u8)(mask >> 0 * 8);
+ block->hdr.bitmap[1] |= (u8)(mask >> 1 * 8);
+ block->hdr.bitmap[2] |= (u8)(mask >> 2 * 8);
+ block->hdr.bitmap[3] |= (u8)(mask >> 3 * 8);
+ block->hdr.bitmap[4] |= (u8)(mask >> 4 * 8);
+ block->hdr.bitmap[5] |= (u8)(mask >> 5 * 8);
+ block->hdr.bitmap[6] |= (u8)(mask >> 6 * 8);
+ block->hdr.bitmap[7] |= (u8)(mask >> 7 * 8);
+}
+
+/*
+ * Clear a number of contiguous bits in the directory block bitmap.
+ */
+static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
+ int bit, unsigned int nr_slots)
+{
+ u64 mask;
+
+ mask = (1 << nr_slots) - 1;
+ mask <<= bit;
+
+ block->hdr.bitmap[0] &= ~(u8)(mask >> 0 * 8);
+ block->hdr.bitmap[1] &= ~(u8)(mask >> 1 * 8);
+ block->hdr.bitmap[2] &= ~(u8)(mask >> 2 * 8);
+ block->hdr.bitmap[3] &= ~(u8)(mask >> 3 * 8);
+ block->hdr.bitmap[4] &= ~(u8)(mask >> 4 * 8);
+ block->hdr.bitmap[5] &= ~(u8)(mask >> 5 * 8);
+ block->hdr.bitmap[6] &= ~(u8)(mask >> 6 * 8);
+ block->hdr.bitmap[7] &= ~(u8)(mask >> 7 * 8);
+}
+
+/*
+ * Get a new directory folio.
+ */
+static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
+{
+ struct address_space *mapping = vnode->netfs.inode.i_mapping;
+ struct folio *folio;
+
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping->gfp_mask);
+ if (!folio)
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ else if (folio && !folio_test_private(folio))
+ folio_attach_private(folio, (void *)1);
+
+ return folio;
+}
+
+/*
+ * Scan a directory block looking for a dirent of the right name.
+ */
+static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name,
+ unsigned int blocknum)
+{
+ union afs_xdr_dirent *de;
+ u64 bitmap;
+ int d, len, n;
+
+ _enter("");
+
+ bitmap = (u64)block->hdr.bitmap[0] << 0 * 8;
+ bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8;
+ bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8;
+ bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8;
+ bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8;
+ bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8;
+ bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8;
+ bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8;
+
+ for (d = (blocknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+ d < AFS_DIR_SLOTS_PER_BLOCK;
+ d++) {
+ if (!((bitmap >> d) & 1))
+ continue;
+ de = &block->dirents[d];
+ if (de->u.valid != 1)
+ continue;
+
+ /* The block was NUL-terminated by afs_dir_check_page(). */
+ len = strlen(de->u.name);
+ if (len == name->len &&
+ memcmp(de->u.name, name->name, name->len) == 0)
+ return d;
+
+ n = round_up(12 + len + 1 + 4, AFS_DIR_DIRENT_SIZE);
+ n /= AFS_DIR_DIRENT_SIZE;
+ d += n - 1;
+ }
+
+ return -1;
+}
+
+/*
+ * Initialise a new directory block. Note that block 0 is special and contains
+ * some extra metadata.
+ */
+static void afs_edit_init_block(union afs_xdr_dir_block *meta,
+ union afs_xdr_dir_block *block, int block_num)
+{
+ memset(block, 0, sizeof(*block));
+ block->hdr.npages = htons(1);
+ block->hdr.magic = AFS_DIR_MAGIC;
+ block->hdr.bitmap[0] = 1;
+
+ if (block_num == 0) {
+ block->hdr.bitmap[0] = 0xff;
+ block->hdr.bitmap[1] = 0x1f;
+ memset(block->meta.alloc_ctrs,
+ AFS_DIR_SLOTS_PER_BLOCK,
+ sizeof(block->meta.alloc_ctrs));
+ meta->meta.alloc_ctrs[0] =
+ AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS0;
+ }
+
+ if (block_num < AFS_DIR_BLOCKS_WITH_CTR)
+ meta->meta.alloc_ctrs[block_num] =
+ AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS;
+}
+
+/*
+ * Edit a directory's file data to add a new directory entry. Doing this after
+ * create, mkdir, symlink, link or rename if the data version number is
+ * incremented by exactly one avoids the need to re-download the entire
+ * directory contents.
+ *
+ * The caller must hold the inode locked.
+ */
+void afs_edit_dir_add(struct afs_vnode *vnode,
+ struct qstr *name, struct afs_fid *new_fid,
+ enum afs_edit_dir_reason why)
+{
+ union afs_xdr_dir_block *meta, *block;
+ union afs_xdr_dirent *de;
+ struct folio *folio0, *folio;
+ unsigned int need_slots, nr_blocks, b;
+ pgoff_t index;
+ loff_t i_size;
+ int slot;
+
+ _enter(",,{%d,%s},", name->len, name->name);
+
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
+ (i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ return;
+ }
+
+ folio0 = afs_dir_get_folio(vnode, 0);
+ if (!folio0) {
+ _leave(" [fgp]");
+ return;
+ }
+
+ /* Work out how many slots we're going to need. */
+ need_slots = afs_dir_calc_slots(name->len);
+
+ meta = kmap_local_folio(folio0, 0);
+ if (i_size == 0)
+ goto new_directory;
+ nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+ /* Find a block that has sufficient slots available. Each folio
+ * contains two or more directory blocks.
+ */
+ for (b = 0; b < nr_blocks + 1; b++) {
+ /* If the directory extended into a new folio, then we need to
+ * tack a new folio on the end.
+ */
+ index = b / AFS_DIR_BLOCKS_PER_PAGE;
+ if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
+ goto error;
+ if (index >= folio_nr_pages(folio0)) {
+ folio = afs_dir_get_folio(vnode, index);
+ if (!folio)
+ goto error;
+ } else {
+ folio = folio0;
+ }
+
+ block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+
+ /* Abandon the edit if we got a callback break. */
+ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ goto invalidated;
+
+ _debug("block %u: %2u %3u %u",
+ b,
+ (b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99,
+ ntohs(block->hdr.npages),
+ ntohs(block->hdr.magic));
+
+ /* Initialise the block if necessary. */
+ if (b == nr_blocks) {
+ _debug("init %u", b);
+ afs_edit_init_block(meta, block, b);
+ afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
+ }
+
+ /* Only lower dir blocks have a counter in the header. */
+ if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
+ meta->meta.alloc_ctrs[b] >= need_slots) {
+ /* We need to try and find one or more consecutive
+ * slots to hold the entry.
+ */
+ slot = afs_find_contig_bits(block, need_slots);
+ if (slot >= 0) {
+ _debug("slot %u", slot);
+ goto found_space;
+ }
+ }
+
+ kunmap_local(block);
+ if (folio != folio0) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+ }
+
+ /* There are no spare slots of sufficient size, yet the operation
+ * succeeded. Download the directory again.
+ */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name);
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out_unmap;
+
+new_directory:
+ afs_edit_init_block(meta, meta, 0);
+ i_size = AFS_DIR_BLOCK_SIZE;
+ afs_set_i_size(vnode, i_size);
+ slot = AFS_DIR_RESV_BLOCKS0;
+ folio = folio0;
+ block = kmap_local_folio(folio, 0);
+ nr_blocks = 1;
+ b = 0;
+
+found_space:
+ /* Set the dirent slot. */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_create, b, slot,
+ new_fid->vnode, new_fid->unique, name->name);
+ de = &block->dirents[slot];
+ de->u.valid = 1;
+ de->u.unused[0] = 0;
+ de->u.hash_next = 0; // TODO: Really need to maintain this
+ de->u.vnode = htonl(new_fid->vnode);
+ de->u.unique = htonl(new_fid->unique);
+ memcpy(de->u.name, name->name, name->len + 1);
+ de->u.name[name->len] = 0;
+
+ /* Adjust the bitmap. */
+ afs_set_contig_bits(block, slot, need_slots);
+ kunmap_local(block);
+ if (folio != folio0) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+
+ /* Adjust the allocation counter. */
+ if (b < AFS_DIR_BLOCKS_WITH_CTR)
+ meta->meta.alloc_ctrs[b] -= need_slots;
+
+ inode_inc_iversion_raw(&vnode->netfs.inode);
+ afs_stat_v(vnode, n_dir_cr);
+ _debug("Insert %s in %u[%u]", name->name, b, slot);
+
+out_unmap:
+ kunmap_local(meta);
+ folio_unlock(folio0);
+ folio_put(folio0);
+ _leave("");
+ return;
+
+invalidated:
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ kunmap_local(block);
+ if (folio != folio0) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+ goto out_unmap;
+
+error:
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name);
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out_unmap;
+}
+
+/*
+ * Edit a directory's file data to remove a new directory entry. Doing this
+ * after unlink, rmdir or rename if the data version number is incremented by
+ * exactly one avoids the need to re-download the entire directory contents.
+ *
+ * The caller must hold the inode locked.
+ */
+void afs_edit_dir_remove(struct afs_vnode *vnode,
+ struct qstr *name, enum afs_edit_dir_reason why)
+{
+ union afs_xdr_dir_block *meta, *block;
+ union afs_xdr_dirent *de;
+ struct folio *folio0, *folio;
+ unsigned int need_slots, nr_blocks, b;
+ pgoff_t index;
+ loff_t i_size;
+ int slot;
+
+ _enter(",,{%d,%s},", name->len, name->name);
+
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (i_size < AFS_DIR_BLOCK_SIZE ||
+ i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
+ (i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ return;
+ }
+ nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+ folio0 = afs_dir_get_folio(vnode, 0);
+ if (!folio0) {
+ _leave(" [fgp]");
+ return;
+ }
+
+ /* Work out how many slots we're going to discard. */
+ need_slots = afs_dir_calc_slots(name->len);
+
+ meta = kmap_local_folio(folio0, 0);
+
+ /* Find a block that has sufficient slots available. Each folio
+ * contains two or more directory blocks.
+ */
+ for (b = 0; b < nr_blocks; b++) {
+ index = b / AFS_DIR_BLOCKS_PER_PAGE;
+ if (index >= folio_nr_pages(folio0)) {
+ folio = afs_dir_get_folio(vnode, index);
+ if (!folio)
+ goto error;
+ } else {
+ folio = folio0;
+ }
+
+ block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+
+ /* Abandon the edit if we got a callback break. */
+ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ goto invalidated;
+
+ if (b > AFS_DIR_BLOCKS_WITH_CTR ||
+ meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) {
+ slot = afs_dir_scan_block(block, name, b);
+ if (slot >= 0)
+ goto found_dirent;
+ }
+
+ kunmap_local(block);
+ if (folio != folio0) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+ }
+
+ /* Didn't find the dirent to clobber. Download the directory again. */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
+ 0, 0, 0, 0, name->name);
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out_unmap;
+
+found_dirent:
+ de = &block->dirents[slot];
+
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot,
+ ntohl(de->u.vnode), ntohl(de->u.unique),
+ name->name);
+
+ memset(de, 0, sizeof(*de) * need_slots);
+
+ /* Adjust the bitmap. */
+ afs_clear_contig_bits(block, slot, need_slots);
+ kunmap_local(block);
+ if (folio != folio0) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+
+ /* Adjust the allocation counter. */
+ if (b < AFS_DIR_BLOCKS_WITH_CTR)
+ meta->meta.alloc_ctrs[b] += need_slots;
+
+ inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
+ afs_stat_v(vnode, n_dir_rm);
+ _debug("Remove %s from %u[%u]", name->name, b, slot);
+
+out_unmap:
+ kunmap_local(meta);
+ folio_unlock(folio0);
+ folio_put(folio0);
+ _leave("");
+ return;
+
+invalidated:
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
+ 0, 0, 0, 0, name->name);
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ kunmap_local(block);
+ if (folio != folio0) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+ goto out_unmap;
+
+error:
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error,
+ 0, 0, 0, 0, name->name);
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out_unmap;
+}
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
new file mode 100644
index 000000000..bb5807e87
--- /dev/null
+++ b/fs/afs/dir_silly.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS silly rename handling
+ *
+ * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from NFS's sillyrename.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/fsnotify.h>
+#include "internal.h"
+
+static void afs_silly_rename_success(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+
+ afs_check_dir_conflict(op, &op->file[0]);
+ afs_vnode_commit_status(op, &op->file[0]);
+}
+
+static void afs_silly_rename_edit_dir(struct afs_operation *op)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode *dvnode = dvp->vnode;
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry));
+ struct dentry *old = op->dentry;
+ struct dentry *new = op->dentry_2;
+
+ spin_lock(&old->d_lock);
+ old->d_flags |= DCACHE_NFSFS_RENAMED;
+ spin_unlock(&old->d_lock);
+ if (dvnode->silly_key != op->key) {
+ key_put(dvnode->silly_key);
+ dvnode->silly_key = key_get(op->key);
+ }
+
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) {
+ afs_edit_dir_remove(dvnode, &old->d_name,
+ afs_edit_dir_for_silly_0);
+ afs_edit_dir_add(dvnode, &new->d_name,
+ &vnode->fid, afs_edit_dir_for_silly_1);
+ }
+ up_write(&dvnode->validate_lock);
+}
+
+static const struct afs_operation_ops afs_silly_rename_operation = {
+ .issue_afs_rpc = afs_fs_rename,
+ .issue_yfs_rpc = yfs_fs_rename,
+ .success = afs_silly_rename_success,
+ .edit_dir = afs_silly_rename_edit_dir,
+};
+
+/*
+ * Actually perform the silly rename step.
+ */
+static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+ struct dentry *old, struct dentry *new,
+ struct key *key)
+{
+ struct afs_operation *op;
+
+ _enter("%pd,%pd", old, new);
+
+ op = afs_alloc_operation(key, dvnode->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ afs_op_set_vnode(op, 0, dvnode);
+ afs_op_set_vnode(op, 1, dvnode);
+ op->file[0].dv_delta = 1;
+ op->file[1].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[1].modification = true;
+ op->file[0].update_ctime = true;
+ op->file[1].update_ctime = true;
+
+ op->dentry = old;
+ op->dentry_2 = new;
+ op->ops = &afs_silly_rename_operation;
+
+ trace_afs_silly_rename(vnode, false);
+ return afs_do_sync_operation(op);
+}
+
+/*
+ * Perform silly-rename of a dentry.
+ *
+ * AFS is stateless and the server doesn't know when the client is holding a
+ * file open. To prevent application problems when a file is unlinked while
+ * it's still open, the client performs a "silly-rename". That is, it renames
+ * the file to a hidden file in the same directory, and only performs the
+ * unlink once the last reference to it is put.
+ *
+ * The final cleanup is done during dentry_iput.
+ */
+int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+ struct dentry *dentry, struct key *key)
+{
+ static unsigned int sillycounter;
+ struct dentry *sdentry = NULL;
+ unsigned char silly[16];
+ int ret = -EBUSY;
+
+ _enter("");
+
+ /* We don't allow a dentry to be silly-renamed twice. */
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ return -EBUSY;
+
+ sdentry = NULL;
+ do {
+ int slen;
+
+ dput(sdentry);
+ sillycounter++;
+
+ /* Create a silly name. Note that the ".__afs" prefix is
+ * understood by the salvager and must not be changed.
+ */
+ slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
+ sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+
+ /* N.B. Better to return EBUSY here ... it could be dangerous
+ * to delete the file while it's in use.
+ */
+ if (IS_ERR(sdentry))
+ goto out;
+ } while (!d_is_negative(sdentry));
+
+ ihold(&vnode->netfs.inode);
+
+ ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key);
+ switch (ret) {
+ case 0:
+ /* The rename succeeded. */
+ set_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags);
+ d_move(dentry, sdentry);
+ break;
+ case -ERESTARTSYS:
+ /* The result of the rename is unknown. Play it safe by forcing
+ * a new lookup.
+ */
+ d_drop(dentry);
+ d_drop(sdentry);
+ }
+
+ iput(&vnode->netfs.inode);
+ dput(sdentry);
+out:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+static void afs_silly_unlink_success(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+ afs_check_dir_conflict(op, &op->file[0]);
+ afs_vnode_commit_status(op, &op->file[0]);
+ afs_vnode_commit_status(op, &op->file[1]);
+ afs_update_dentry_version(op, &op->file[0], op->dentry);
+}
+
+static void afs_silly_unlink_edit_dir(struct afs_operation *op)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode *dvnode = dvp->vnode;
+
+ _enter("op=%08x", op->debug_id);
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
+ afs_edit_dir_remove(dvnode, &op->dentry->d_name,
+ afs_edit_dir_for_unlink);
+ up_write(&dvnode->validate_lock);
+}
+
+static const struct afs_operation_ops afs_silly_unlink_operation = {
+ .issue_afs_rpc = afs_fs_remove_file,
+ .issue_yfs_rpc = yfs_fs_remove_file,
+ .success = afs_silly_unlink_success,
+ .aborted = afs_check_for_remote_deletion,
+ .edit_dir = afs_silly_unlink_edit_dir,
+};
+
+/*
+ * Tell the server to remove a sillyrename file.
+ */
+static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+ struct dentry *dentry, struct key *key)
+{
+ struct afs_operation *op;
+
+ _enter("");
+
+ op = afs_alloc_operation(NULL, dvnode->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ afs_op_set_vnode(op, 0, dvnode);
+ afs_op_set_vnode(op, 1, vnode);
+ op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[0].update_ctime = true;
+ op->file[1].op_unlinked = true;
+ op->file[1].update_ctime = true;
+
+ op->dentry = dentry;
+ op->ops = &afs_silly_unlink_operation;
+
+ trace_afs_silly_rename(vnode, true);
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+
+ /* If there was a conflict with a third party, check the status of the
+ * unlinked vnode.
+ */
+ if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ op->file[1].update_ctime = false;
+ op->fetch_status.which = 1;
+ op->ops = &afs_fetch_status_operation;
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+ }
+
+ return afs_put_operation(op);
+}
+
+/*
+ * Remove sillyrename file on iput.
+ */
+int afs_silly_iput(struct dentry *dentry, struct inode *inode)
+{
+ struct afs_vnode *dvnode = AFS_FS_I(d_inode(dentry->d_parent));
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct dentry *alias;
+ int ret;
+
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
+
+ down_read(&dvnode->rmdir_lock);
+
+ alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq);
+ if (IS_ERR(alias)) {
+ up_read(&dvnode->rmdir_lock);
+ return 0;
+ }
+
+ if (!d_in_lookup(alias)) {
+ /* We raced with lookup... See if we need to transfer the
+ * sillyrename information to the aliased dentry.
+ */
+ ret = 0;
+ spin_lock(&alias->d_lock);
+ if (d_really_is_positive(alias) &&
+ !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+ alias->d_flags |= DCACHE_NFSFS_RENAMED;
+ ret = 1;
+ }
+ spin_unlock(&alias->d_lock);
+ up_read(&dvnode->rmdir_lock);
+ dput(alias);
+ return ret;
+ }
+
+ /* Stop lock-release from complaining. */
+ spin_lock(&vnode->lock);
+ vnode->lock_state = AFS_VNODE_LOCK_DELETED;
+ trace_afs_flock_ev(vnode, NULL, afs_flock_silly_delete, 0);
+ spin_unlock(&vnode->lock);
+
+ afs_do_silly_unlink(dvnode, vnode, dentry, dvnode->silly_key);
+ up_read(&dvnode->rmdir_lock);
+ d_lookup_done(alias);
+ dput(alias);
+ return 1;
+}
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
new file mode 100644
index 000000000..9937993cf
--- /dev/null
+++ b/fs/afs/dynroot.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS dynamic root handling
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/dns_resolver.h>
+#include "internal.h"
+
+static atomic_t afs_autocell_ino;
+
+/*
+ * iget5() comparator for inode created by autocell operations
+ *
+ * These pseudo inodes don't match anything.
+ */
+static int afs_iget5_pseudo_test(struct inode *inode, void *opaque)
+{
+ return 0;
+}
+
+/*
+ * iget5() inode initialiser
+ */
+static int afs_iget5_pseudo_set(struct inode *inode, void *opaque)
+{
+ struct afs_super_info *as = AFS_FS_S(inode->i_sb);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_fid *fid = opaque;
+
+ vnode->volume = as->volume;
+ vnode->fid = *fid;
+ inode->i_ino = fid->vnode;
+ inode->i_generation = fid->unique;
+ return 0;
+}
+
+/*
+ * Create an inode for a dynamic root directory or an autocell dynamic
+ * automount dir.
+ */
+struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
+{
+ struct afs_super_info *as = AFS_FS_S(sb);
+ struct afs_vnode *vnode;
+ struct inode *inode;
+ struct afs_fid fid = {};
+
+ _enter("");
+
+ if (as->volume)
+ fid.vid = as->volume->vid;
+ if (root) {
+ fid.vnode = 1;
+ fid.unique = 1;
+ } else {
+ fid.vnode = atomic_inc_return(&afs_autocell_ino);
+ fid.unique = 0;
+ }
+
+ inode = iget5_locked(sb, fid.vnode,
+ afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
+ if (!inode) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
+ inode, inode->i_ino, fid.vid, fid.vnode, fid.unique);
+
+ vnode = AFS_FS_I(inode);
+
+ /* there shouldn't be an existing inode */
+ BUG_ON(!(inode->i_state & I_NEW));
+
+ netfs_inode_init(&vnode->netfs, NULL);
+ inode->i_size = 0;
+ inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
+ if (root) {
+ inode->i_op = &afs_dynroot_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ } else {
+ inode->i_op = &afs_autocell_inode_operations;
+ }
+ set_nlink(inode, 2);
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode);
+ inode->i_blocks = 0;
+ inode->i_generation = 0;
+
+ set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
+ if (!root) {
+ set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+ inode->i_flags |= S_AUTOMOUNT;
+ }
+
+ inode->i_flags |= S_NOATIME;
+ unlock_new_inode(inode);
+ _leave(" = %p", inode);
+ return inode;
+}
+
+/*
+ * Probe to see if a cell may exist. This prevents positive dentries from
+ * being created unnecessarily.
+ */
+static int afs_probe_cell_name(struct dentry *dentry)
+{
+ struct afs_cell *cell;
+ struct afs_net *net = afs_d2net(dentry);
+ const char *name = dentry->d_name.name;
+ size_t len = dentry->d_name.len;
+ char *result = NULL;
+ int ret;
+
+ /* Names prefixed with a dot are R/W mounts. */
+ if (name[0] == '.') {
+ if (len == 1)
+ return -EINVAL;
+ name++;
+ len--;
+ }
+
+ cell = afs_find_cell(net, name, len, afs_cell_trace_use_probe);
+ if (!IS_ERR(cell)) {
+ afs_unuse_cell(net, cell, afs_cell_trace_unuse_probe);
+ return 0;
+ }
+
+ ret = dns_query(net->net, "afsdb", name, len, "srv=1",
+ &result, NULL, false);
+ if (ret == -ENODATA || ret == -ENOKEY || ret == 0)
+ ret = -ENOENT;
+ if (ret > 0 && ret >= sizeof(struct dns_server_list_v1_header)) {
+ struct dns_server_list_v1_header *v1 = (void *)result;
+
+ if (v1->hdr.zero == 0 &&
+ v1->hdr.content == DNS_PAYLOAD_IS_SERVER_LIST &&
+ v1->hdr.version == 1 &&
+ (v1->status != DNS_LOOKUP_GOOD &&
+ v1->status != DNS_LOOKUP_GOOD_WITH_BAD))
+ return -ENOENT;
+
+ }
+
+ kfree(result);
+ return ret;
+}
+
+/*
+ * Try to auto mount the mountpoint with pseudo directory, if the autocell
+ * operation is setted.
+ */
+struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
+{
+ struct afs_vnode *vnode = AFS_FS_I(dir);
+ struct inode *inode;
+ int ret = -ENOENT;
+
+ _enter("%p{%pd}, {%llx:%llu}",
+ dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
+
+ if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
+ goto out;
+
+ ret = afs_probe_cell_name(dentry);
+ if (ret < 0)
+ goto out;
+
+ inode = afs_iget_pseudo_dir(dir->i_sb, false);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto out;
+ }
+
+ _leave("= %p", inode);
+ return inode;
+
+out:
+ _leave("= %d", ret);
+ return ret == -ENOENT ? NULL : ERR_PTR(ret);
+}
+
+/*
+ * Look up @cell in a dynroot directory. This is a substitution for the
+ * local cell name for the net namespace.
+ */
+static struct dentry *afs_lookup_atcell(struct dentry *dentry)
+{
+ struct afs_cell *cell;
+ struct afs_net *net = afs_d2net(dentry);
+ struct dentry *ret;
+ char *name;
+ int len;
+
+ if (!net->ws_cell)
+ return ERR_PTR(-ENOENT);
+
+ ret = ERR_PTR(-ENOMEM);
+ name = kmalloc(AFS_MAXCELLNAME + 1, GFP_KERNEL);
+ if (!name)
+ goto out_p;
+
+ down_read(&net->cells_lock);
+ cell = net->ws_cell;
+ if (cell) {
+ len = cell->name_len;
+ memcpy(name, cell->name, len + 1);
+ }
+ up_read(&net->cells_lock);
+
+ ret = ERR_PTR(-ENOENT);
+ if (!cell)
+ goto out_n;
+
+ ret = lookup_one_len(name, dentry->d_parent, len);
+
+ /* We don't want to d_add() the @cell dentry here as we don't want to
+ * the cached dentry to hide changes to the local cell name.
+ */
+
+out_n:
+ kfree(name);
+out_p:
+ return ret;
+}
+
+/*
+ * Look up an entry in a dynroot directory.
+ */
+static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ _enter("%pd", dentry);
+
+ ASSERTCMP(d_inode(dentry), ==, NULL);
+
+ if (flags & LOOKUP_CREATE)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (dentry->d_name.len >= AFSNAMEMAX) {
+ _leave(" = -ENAMETOOLONG");
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+
+ if (dentry->d_name.len == 5 &&
+ memcmp(dentry->d_name.name, "@cell", 5) == 0)
+ return afs_lookup_atcell(dentry);
+
+ return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry);
+}
+
+const struct inode_operations afs_dynroot_inode_operations = {
+ .lookup = afs_dynroot_lookup,
+};
+
+/*
+ * Dirs in the dynamic root don't need revalidation.
+ */
+static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return 1;
+}
+
+const struct dentry_operations afs_dynroot_dentry_operations = {
+ .d_revalidate = afs_dynroot_d_revalidate,
+ .d_delete = always_delete_dentry,
+ .d_release = afs_d_release,
+ .d_automount = afs_d_automount,
+};
+
+/*
+ * Create a manually added cell mount directory.
+ * - The caller must hold net->proc_cells_lock
+ */
+int afs_dynroot_mkdir(struct afs_net *net, struct afs_cell *cell)
+{
+ struct super_block *sb = net->dynroot_sb;
+ struct dentry *root, *subdir;
+ int ret;
+
+ if (!sb || atomic_read(&sb->s_active) == 0)
+ return 0;
+
+ /* Let the ->lookup op do the creation */
+ root = sb->s_root;
+ inode_lock(root->d_inode);
+ subdir = lookup_one_len(cell->name, root, cell->name_len);
+ if (IS_ERR(subdir)) {
+ ret = PTR_ERR(subdir);
+ goto unlock;
+ }
+
+ /* Note that we're retaining an extra ref on the dentry */
+ subdir->d_fsdata = (void *)1UL;
+ ret = 0;
+unlock:
+ inode_unlock(root->d_inode);
+ return ret;
+}
+
+/*
+ * Remove a manually added cell mount directory.
+ * - The caller must hold net->proc_cells_lock
+ */
+void afs_dynroot_rmdir(struct afs_net *net, struct afs_cell *cell)
+{
+ struct super_block *sb = net->dynroot_sb;
+ struct dentry *root, *subdir;
+
+ if (!sb || atomic_read(&sb->s_active) == 0)
+ return;
+
+ root = sb->s_root;
+ inode_lock(root->d_inode);
+
+ /* Don't want to trigger a lookup call, which will re-add the cell */
+ subdir = try_lookup_one_len(cell->name, root, cell->name_len);
+ if (IS_ERR_OR_NULL(subdir)) {
+ _debug("lookup %ld", PTR_ERR(subdir));
+ goto no_dentry;
+ }
+
+ _debug("rmdir %pd %u", subdir, d_count(subdir));
+
+ if (subdir->d_fsdata) {
+ _debug("unpin %u", d_count(subdir));
+ subdir->d_fsdata = NULL;
+ dput(subdir);
+ }
+ dput(subdir);
+no_dentry:
+ inode_unlock(root->d_inode);
+ _leave("");
+}
+
+/*
+ * Populate a newly created dynamic root with cell names.
+ */
+int afs_dynroot_populate(struct super_block *sb)
+{
+ struct afs_cell *cell;
+ struct afs_net *net = afs_sb2net(sb);
+ int ret;
+
+ mutex_lock(&net->proc_cells_lock);
+
+ net->dynroot_sb = sb;
+ hlist_for_each_entry(cell, &net->proc_cells, proc_link) {
+ ret = afs_dynroot_mkdir(net, cell);
+ if (ret < 0)
+ goto error;
+ }
+
+ ret = 0;
+out:
+ mutex_unlock(&net->proc_cells_lock);
+ return ret;
+
+error:
+ net->dynroot_sb = NULL;
+ goto out;
+}
+
+/*
+ * When a dynamic root that's in the process of being destroyed, depopulate it
+ * of pinned directories.
+ */
+void afs_dynroot_depopulate(struct super_block *sb)
+{
+ struct afs_net *net = afs_sb2net(sb);
+ struct dentry *root = sb->s_root, *subdir, *tmp;
+
+ /* Prevent more subdirs from being created */
+ mutex_lock(&net->proc_cells_lock);
+ if (net->dynroot_sb == sb)
+ net->dynroot_sb = NULL;
+ mutex_unlock(&net->proc_cells_lock);
+
+ if (root) {
+ inode_lock(root->d_inode);
+
+ /* Remove all the pins for dirs created for manually added cells */
+ list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) {
+ if (subdir->d_fsdata) {
+ subdir->d_fsdata = NULL;
+ dput(subdir);
+ }
+ }
+
+ inode_unlock(root->d_inode);
+ }
+}
diff --git a/fs/afs/file.c b/fs/afs/file.c
new file mode 100644
index 000000000..2eeab57df
--- /dev/null
+++ b/fs/afs/file.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS filesystem file handling
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/gfp.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/netfs.h>
+#include "internal.h"
+
+static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
+static int afs_symlink_read_folio(struct file *file, struct folio *folio);
+static void afs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length);
+static bool afs_release_folio(struct folio *folio, gfp_t gfp_flags);
+
+static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+static void afs_vm_open(struct vm_area_struct *area);
+static void afs_vm_close(struct vm_area_struct *area);
+static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff);
+
+const struct file_operations afs_file_operations = {
+ .open = afs_open,
+ .release = afs_release,
+ .llseek = generic_file_llseek,
+ .read_iter = afs_file_read_iter,
+ .write_iter = afs_file_write,
+ .mmap = afs_file_mmap,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
+ .fsync = afs_fsync,
+ .lock = afs_lock,
+ .flock = afs_flock,
+};
+
+const struct inode_operations afs_file_inode_operations = {
+ .getattr = afs_getattr,
+ .setattr = afs_setattr,
+ .permission = afs_permission,
+};
+
+const struct address_space_operations afs_file_aops = {
+ .read_folio = netfs_read_folio,
+ .readahead = netfs_readahead,
+ .dirty_folio = afs_dirty_folio,
+ .launder_folio = afs_launder_folio,
+ .release_folio = afs_release_folio,
+ .invalidate_folio = afs_invalidate_folio,
+ .write_begin = afs_write_begin,
+ .write_end = afs_write_end,
+ .writepage = afs_writepage,
+ .writepages = afs_writepages,
+};
+
+const struct address_space_operations afs_symlink_aops = {
+ .read_folio = afs_symlink_read_folio,
+ .release_folio = afs_release_folio,
+ .invalidate_folio = afs_invalidate_folio,
+};
+
+static const struct vm_operations_struct afs_vm_ops = {
+ .open = afs_vm_open,
+ .close = afs_vm_close,
+ .fault = filemap_fault,
+ .map_pages = afs_vm_map_pages,
+ .page_mkwrite = afs_page_mkwrite,
+};
+
+/*
+ * Discard a pin on a writeback key.
+ */
+void afs_put_wb_key(struct afs_wb_key *wbk)
+{
+ if (wbk && refcount_dec_and_test(&wbk->usage)) {
+ key_put(wbk->key);
+ kfree(wbk);
+ }
+}
+
+/*
+ * Cache key for writeback.
+ */
+int afs_cache_wb_key(struct afs_vnode *vnode, struct afs_file *af)
+{
+ struct afs_wb_key *wbk, *p;
+
+ wbk = kzalloc(sizeof(struct afs_wb_key), GFP_KERNEL);
+ if (!wbk)
+ return -ENOMEM;
+ refcount_set(&wbk->usage, 2);
+ wbk->key = af->key;
+
+ spin_lock(&vnode->wb_lock);
+ list_for_each_entry(p, &vnode->wb_keys, vnode_link) {
+ if (p->key == wbk->key)
+ goto found;
+ }
+
+ key_get(wbk->key);
+ list_add_tail(&wbk->vnode_link, &vnode->wb_keys);
+ spin_unlock(&vnode->wb_lock);
+ af->wb = wbk;
+ return 0;
+
+found:
+ refcount_inc(&p->usage);
+ spin_unlock(&vnode->wb_lock);
+ af->wb = p;
+ kfree(wbk);
+ return 0;
+}
+
+/*
+ * open an AFS file or directory and attach a key to it
+ */
+int afs_open(struct inode *inode, struct file *file)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_file *af;
+ struct key *key;
+ int ret;
+
+ _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
+
+ key = afs_request_key(vnode->volume->cell);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error;
+ }
+
+ af = kzalloc(sizeof(*af), GFP_KERNEL);
+ if (!af) {
+ ret = -ENOMEM;
+ goto error_key;
+ }
+ af->key = key;
+
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ goto error_af;
+
+ if (file->f_mode & FMODE_WRITE) {
+ ret = afs_cache_wb_key(vnode, af);
+ if (ret < 0)
+ goto error_af;
+ }
+
+ if (file->f_flags & O_TRUNC)
+ set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+
+ fscache_use_cookie(afs_vnode_cache(vnode), file->f_mode & FMODE_WRITE);
+
+ file->private_data = af;
+ _leave(" = 0");
+ return 0;
+
+error_af:
+ kfree(af);
+error_key:
+ key_put(key);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * release an AFS file or directory and discard its key
+ */
+int afs_release(struct inode *inode, struct file *file)
+{
+ struct afs_vnode_cache_aux aux;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_file *af = file->private_data;
+ loff_t i_size;
+ int ret = 0;
+
+ _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
+
+ if ((file->f_mode & FMODE_WRITE))
+ ret = vfs_fsync(file, 0);
+
+ file->private_data = NULL;
+ if (af->wb)
+ afs_put_wb_key(af->wb);
+
+ if ((file->f_mode & FMODE_WRITE)) {
+ i_size = i_size_read(&vnode->netfs.inode);
+ afs_set_cache_aux(vnode, &aux);
+ fscache_unuse_cookie(afs_vnode_cache(vnode), &aux, &i_size);
+ } else {
+ fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL);
+ }
+
+ key_put(af->key);
+ kfree(af);
+ afs_prune_wb_keys(vnode);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Allocate a new read record.
+ */
+struct afs_read *afs_alloc_read(gfp_t gfp)
+{
+ struct afs_read *req;
+
+ req = kzalloc(sizeof(struct afs_read), gfp);
+ if (req)
+ refcount_set(&req->usage, 1);
+
+ return req;
+}
+
+/*
+ * Dispose of a ref to a read record.
+ */
+void afs_put_read(struct afs_read *req)
+{
+ if (refcount_dec_and_test(&req->usage)) {
+ if (req->cleanup)
+ req->cleanup(req);
+ key_put(req->key);
+ kfree(req);
+ }
+}
+
+static void afs_fetch_data_notify(struct afs_operation *op)
+{
+ struct afs_read *req = op->fetch.req;
+ struct netfs_io_subrequest *subreq = req->subreq;
+ int error = op->error;
+
+ if (error == -ECONNABORTED)
+ error = afs_abort_to_error(op->ac.abort_code);
+ req->error = error;
+
+ if (subreq) {
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
+ req->subreq = NULL;
+ } else if (req->done) {
+ req->done(req);
+ }
+}
+
+static void afs_fetch_data_success(struct afs_operation *op)
+{
+ struct afs_vnode *vnode = op->file[0].vnode;
+
+ _enter("op=%08x", op->debug_id);
+ afs_vnode_commit_status(op, &op->file[0]);
+ afs_stat_v(vnode, n_fetches);
+ atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes);
+ afs_fetch_data_notify(op);
+}
+
+static void afs_fetch_data_put(struct afs_operation *op)
+{
+ op->fetch.req->error = op->error;
+ afs_put_read(op->fetch.req);
+}
+
+static const struct afs_operation_ops afs_fetch_data_operation = {
+ .issue_afs_rpc = afs_fs_fetch_data,
+ .issue_yfs_rpc = yfs_fs_fetch_data,
+ .success = afs_fetch_data_success,
+ .aborted = afs_check_for_remote_deletion,
+ .failed = afs_fetch_data_notify,
+ .put = afs_fetch_data_put,
+};
+
+/*
+ * Fetch file data from the volume.
+ */
+int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
+{
+ struct afs_operation *op;
+
+ _enter("%s{%llx:%llu.%u},%x,,,",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(req->key));
+
+ op = afs_alloc_operation(req->key, vnode->volume);
+ if (IS_ERR(op)) {
+ if (req->subreq)
+ netfs_subreq_terminated(req->subreq, PTR_ERR(op), false);
+ return PTR_ERR(op);
+ }
+
+ afs_op_set_vnode(op, 0, vnode);
+
+ op->fetch.req = afs_get_read(req);
+ op->ops = &afs_fetch_data_operation;
+ return afs_do_sync_operation(op);
+}
+
+static void afs_issue_read(struct netfs_io_subrequest *subreq)
+{
+ struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
+ struct afs_read *fsreq;
+
+ fsreq = afs_alloc_read(GFP_NOFS);
+ if (!fsreq)
+ return netfs_subreq_terminated(subreq, -ENOMEM, false);
+
+ fsreq->subreq = subreq;
+ fsreq->pos = subreq->start + subreq->transferred;
+ fsreq->len = subreq->len - subreq->transferred;
+ fsreq->key = key_get(subreq->rreq->netfs_priv);
+ fsreq->vnode = vnode;
+ fsreq->iter = &fsreq->def_iter;
+
+ iov_iter_xarray(&fsreq->def_iter, ITER_DEST,
+ &fsreq->vnode->netfs.inode.i_mapping->i_pages,
+ fsreq->pos, fsreq->len);
+
+ afs_fetch_data(fsreq->vnode, fsreq);
+ afs_put_read(fsreq);
+}
+
+static int afs_symlink_read_folio(struct file *file, struct folio *folio)
+{
+ struct afs_vnode *vnode = AFS_FS_I(folio->mapping->host);
+ struct afs_read *fsreq;
+ int ret;
+
+ fsreq = afs_alloc_read(GFP_NOFS);
+ if (!fsreq)
+ return -ENOMEM;
+
+ fsreq->pos = folio_pos(folio);
+ fsreq->len = folio_size(folio);
+ fsreq->vnode = vnode;
+ fsreq->iter = &fsreq->def_iter;
+ iov_iter_xarray(&fsreq->def_iter, ITER_DEST, &folio->mapping->i_pages,
+ fsreq->pos, fsreq->len);
+
+ ret = afs_fetch_data(fsreq->vnode, fsreq);
+ if (ret == 0)
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
+ return ret;
+}
+
+static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
+{
+ rreq->netfs_priv = key_get(afs_file_key(file));
+ return 0;
+}
+
+static int afs_begin_cache_operation(struct netfs_io_request *rreq)
+{
+#ifdef CONFIG_AFS_FSCACHE
+ struct afs_vnode *vnode = AFS_FS_I(rreq->inode);
+
+ return fscache_begin_read_operation(&rreq->cache_resources,
+ afs_vnode_cache(vnode));
+#else
+ return -ENOBUFS;
+#endif
+}
+
+static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
+ struct folio **foliop, void **_fsdata)
+{
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+
+ return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
+}
+
+static void afs_free_request(struct netfs_io_request *rreq)
+{
+ key_put(rreq->netfs_priv);
+}
+
+const struct netfs_request_ops afs_req_ops = {
+ .init_request = afs_init_request,
+ .free_request = afs_free_request,
+ .begin_cache_operation = afs_begin_cache_operation,
+ .check_write_begin = afs_check_write_begin,
+ .issue_read = afs_issue_read,
+};
+
+int afs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ fscache_unpin_writeback(wbc, afs_vnode_cache(AFS_FS_I(inode)));
+ return 0;
+}
+
+/*
+ * Adjust the dirty region of the page on truncation or full invalidation,
+ * getting rid of the markers altogether if the region is entirely invalidated.
+ */
+static void afs_invalidate_dirty(struct folio *folio, size_t offset,
+ size_t length)
+{
+ struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
+ unsigned long priv;
+ unsigned int f, t, end = offset + length;
+
+ priv = (unsigned long)folio_get_private(folio);
+
+ /* we clean up only if the entire page is being invalidated */
+ if (offset == 0 && length == folio_size(folio))
+ goto full_invalidate;
+
+ /* If the page was dirtied by page_mkwrite(), the PTE stays writable
+ * and we don't get another notification to tell us to expand it
+ * again.
+ */
+ if (afs_is_folio_dirty_mmapped(priv))
+ return;
+
+ /* We may need to shorten the dirty region */
+ f = afs_folio_dirty_from(folio, priv);
+ t = afs_folio_dirty_to(folio, priv);
+
+ if (t <= offset || f >= end)
+ return; /* Doesn't overlap */
+
+ if (f < offset && t > end)
+ return; /* Splits the dirty region - just absorb it */
+
+ if (f >= offset && t <= end)
+ goto undirty;
+
+ if (f < offset)
+ t = offset;
+ else
+ f = end;
+ if (f == t)
+ goto undirty;
+
+ priv = afs_folio_dirty(folio, f, t);
+ folio_change_private(folio, (void *)priv);
+ trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio);
+ return;
+
+undirty:
+ trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio);
+ folio_clear_dirty_for_io(folio);
+full_invalidate:
+ trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio);
+ folio_detach_private(folio);
+}
+
+/*
+ * invalidate part or all of a page
+ * - release a page and clean up its private data if offset is 0 (indicating
+ * the entire page)
+ */
+static void afs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
+{
+ _enter("{%lu},%zu,%zu", folio->index, offset, length);
+
+ BUG_ON(!folio_test_locked(folio));
+
+ if (folio_get_private(folio))
+ afs_invalidate_dirty(folio, offset, length);
+
+ folio_wait_fscache(folio);
+ _leave("");
+}
+
+/*
+ * release a page and clean up its private state if it's not busy
+ * - return true if the page can now be released, false if not
+ */
+static bool afs_release_folio(struct folio *folio, gfp_t gfp)
+{
+ struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
+
+ _enter("{{%llx:%llu}[%lu],%lx},%x",
+ vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags,
+ gfp);
+
+ /* deny if folio is being written to the cache and the caller hasn't
+ * elected to wait */
+#ifdef CONFIG_AFS_FSCACHE
+ if (folio_test_fscache(folio)) {
+ if (current_is_kswapd() || !(gfp & __GFP_FS))
+ return false;
+ folio_wait_fscache(folio);
+ }
+ fscache_note_page_release(afs_vnode_cache(vnode));
+#endif
+
+ if (folio_test_private(folio)) {
+ trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio);
+ folio_detach_private(folio);
+ }
+
+ /* Indicate that the folio can be released */
+ _leave(" = T");
+ return true;
+}
+
+static void afs_add_open_mmap(struct afs_vnode *vnode)
+{
+ if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) {
+ down_write(&vnode->volume->cell->fs_open_mmaps_lock);
+
+ if (list_empty(&vnode->cb_mmap_link))
+ list_add_tail(&vnode->cb_mmap_link,
+ &vnode->volume->cell->fs_open_mmaps);
+
+ up_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ }
+}
+
+static void afs_drop_open_mmap(struct afs_vnode *vnode)
+{
+ if (!atomic_dec_and_test(&vnode->cb_nr_mmap))
+ return;
+
+ down_write(&vnode->volume->cell->fs_open_mmaps_lock);
+
+ if (atomic_read(&vnode->cb_nr_mmap) == 0)
+ list_del_init(&vnode->cb_mmap_link);
+
+ up_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ flush_work(&vnode->cb_work);
+}
+
+/*
+ * Handle setting up a memory mapping on an AFS file.
+ */
+static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+ int ret;
+
+ afs_add_open_mmap(vnode);
+
+ ret = generic_file_mmap(file, vma);
+ if (ret == 0)
+ vma->vm_ops = &afs_vm_ops;
+ else
+ afs_drop_open_mmap(vnode);
+ return ret;
+}
+
+static void afs_vm_open(struct vm_area_struct *vma)
+{
+ afs_add_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
+}
+
+static void afs_vm_close(struct vm_area_struct *vma)
+{
+ afs_drop_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
+}
+
+static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff)
+{
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file));
+ struct afs_file *af = vmf->vma->vm_file->private_data;
+
+ switch (afs_validate(vnode, af->key)) {
+ case 0:
+ return filemap_map_pages(vmf, start_pgoff, end_pgoff);
+ case -ENOMEM:
+ return VM_FAULT_OOM;
+ case -EINTR:
+ case -ERESTARTSYS:
+ return VM_FAULT_RETRY;
+ case -ESTALE:
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+}
+
+static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
+ struct afs_file *af = iocb->ki_filp->private_data;
+ int ret;
+
+ ret = afs_validate(vnode, af->key);
+ if (ret < 0)
+ return ret;
+
+ return generic_file_read_iter(iocb, iter);
+}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
new file mode 100644
index 000000000..bbcc5afd1
--- /dev/null
+++ b/fs/afs/flock.c
@@ -0,0 +1,877 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS file locking support
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include "internal.h"
+
+#define AFS_LOCK_GRANTED 0
+#define AFS_LOCK_PENDING 1
+#define AFS_LOCK_YOUR_TRY 2
+
+struct workqueue_struct *afs_lock_manager;
+
+static void afs_next_locker(struct afs_vnode *vnode, int error);
+static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
+static void afs_fl_release_private(struct file_lock *fl);
+
+static const struct file_lock_operations afs_lock_ops = {
+ .fl_copy_lock = afs_fl_copy_lock,
+ .fl_release_private = afs_fl_release_private,
+};
+
+static inline void afs_set_lock_state(struct afs_vnode *vnode, enum afs_lock_state state)
+{
+ _debug("STATE %u -> %u", vnode->lock_state, state);
+ vnode->lock_state = state;
+}
+
+static atomic_t afs_file_lock_debug_id;
+
+/*
+ * if the callback is broken on this vnode, then the lock may now be available
+ */
+void afs_lock_may_be_available(struct afs_vnode *vnode)
+{
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+ spin_lock(&vnode->lock);
+ if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
+ afs_next_locker(vnode, 0);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_callback_break, 0);
+ spin_unlock(&vnode->lock);
+}
+
+/*
+ * the lock will time out in 5 minutes unless we extend it, so schedule
+ * extension in a bit less than that time
+ */
+static void afs_schedule_lock_extension(struct afs_vnode *vnode)
+{
+ ktime_t expires_at, now, duration;
+ u64 duration_j;
+
+ expires_at = ktime_add_ms(vnode->locked_at, AFS_LOCKWAIT * 1000 / 2);
+ now = ktime_get_real();
+ duration = ktime_sub(expires_at, now);
+ if (duration <= 0)
+ duration_j = 0;
+ else
+ duration_j = nsecs_to_jiffies(ktime_to_ns(duration));
+
+ queue_delayed_work(afs_lock_manager, &vnode->lock_work, duration_j);
+}
+
+/*
+ * In the case of successful completion of a lock operation, record the time
+ * the reply appeared and start the lock extension timer.
+ */
+void afs_lock_op_done(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode *vnode = op->file[0].vnode;
+
+ if (call->error == 0) {
+ spin_lock(&vnode->lock);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
+ vnode->locked_at = call->issue_time;
+ afs_schedule_lock_extension(vnode);
+ spin_unlock(&vnode->lock);
+ }
+}
+
+/*
+ * grant one or more locks (readlocks are allowed to jump the queue if the
+ * first lock in the queue is itself a readlock)
+ * - the caller must hold the vnode lock
+ */
+static void afs_grant_locks(struct afs_vnode *vnode)
+{
+ struct file_lock *p, *_p;
+ bool exclusive = (vnode->lock_type == AFS_LOCK_WRITE);
+
+ list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+ if (!exclusive && p->fl_type == F_WRLCK)
+ continue;
+
+ list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks);
+ p->fl_u.afs.state = AFS_LOCK_GRANTED;
+ trace_afs_flock_op(vnode, p, afs_flock_op_grant);
+ wake_up(&p->fl_wait);
+ }
+}
+
+/*
+ * If an error is specified, reject every pending lock that matches the
+ * authentication and type of the lock we failed to get. If there are any
+ * remaining lockers, try to wake up one of them to have a go.
+ */
+static void afs_next_locker(struct afs_vnode *vnode, int error)
+{
+ struct file_lock *p, *_p, *next = NULL;
+ struct key *key = vnode->lock_key;
+ unsigned int fl_type = F_RDLCK;
+
+ _enter("");
+
+ if (vnode->lock_type == AFS_LOCK_WRITE)
+ fl_type = F_WRLCK;
+
+ list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+ if (error &&
+ p->fl_type == fl_type &&
+ afs_file_key(p->fl_file) == key) {
+ list_del_init(&p->fl_u.afs.link);
+ p->fl_u.afs.state = error;
+ wake_up(&p->fl_wait);
+ }
+
+ /* Select the next locker to hand off to. */
+ if (next &&
+ (next->fl_type == F_WRLCK || p->fl_type == F_RDLCK))
+ continue;
+ next = p;
+ }
+
+ vnode->lock_key = NULL;
+ key_put(key);
+
+ if (next) {
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+ next->fl_u.afs.state = AFS_LOCK_YOUR_TRY;
+ trace_afs_flock_op(vnode, next, afs_flock_op_wake);
+ wake_up(&next->fl_wait);
+ } else {
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_no_lockers, 0);
+ }
+
+ _leave("");
+}
+
+/*
+ * Kill off all waiters in the the pending lock queue due to the vnode being
+ * deleted.
+ */
+static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
+{
+ struct file_lock *p;
+
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_DELETED);
+
+ while (!list_empty(&vnode->pending_locks)) {
+ p = list_entry(vnode->pending_locks.next,
+ struct file_lock, fl_u.afs.link);
+ list_del_init(&p->fl_u.afs.link);
+ p->fl_u.afs.state = -ENOENT;
+ wake_up(&p->fl_wait);
+ }
+
+ key_put(vnode->lock_key);
+ vnode->lock_key = NULL;
+}
+
+static void afs_lock_success(struct afs_operation *op)
+{
+ _enter("op=%08x", op->debug_id);
+ afs_vnode_commit_status(op, &op->file[0]);
+}
+
+static const struct afs_operation_ops afs_set_lock_operation = {
+ .issue_afs_rpc = afs_fs_set_lock,
+ .issue_yfs_rpc = yfs_fs_set_lock,
+ .success = afs_lock_success,
+ .aborted = afs_check_for_remote_deletion,
+};
+
+/*
+ * Get a lock on a file
+ */
+static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
+ afs_lock_type_t type)
+{
+ struct afs_operation *op;
+
+ _enter("%s{%llx:%llu.%u},%x,%u",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(key), type);
+
+ op = afs_alloc_operation(key, vnode->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ afs_op_set_vnode(op, 0, vnode);
+
+ op->lock.type = type;
+ op->ops = &afs_set_lock_operation;
+ return afs_do_sync_operation(op);
+}
+
+static const struct afs_operation_ops afs_extend_lock_operation = {
+ .issue_afs_rpc = afs_fs_extend_lock,
+ .issue_yfs_rpc = yfs_fs_extend_lock,
+ .success = afs_lock_success,
+};
+
+/*
+ * Extend a lock on a file
+ */
+static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
+{
+ struct afs_operation *op;
+
+ _enter("%s{%llx:%llu.%u},%x",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(key));
+
+ op = afs_alloc_operation(key, vnode->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ afs_op_set_vnode(op, 0, vnode);
+
+ op->flags |= AFS_OPERATION_UNINTR;
+ op->ops = &afs_extend_lock_operation;
+ return afs_do_sync_operation(op);
+}
+
+static const struct afs_operation_ops afs_release_lock_operation = {
+ .issue_afs_rpc = afs_fs_release_lock,
+ .issue_yfs_rpc = yfs_fs_release_lock,
+ .success = afs_lock_success,
+};
+
+/*
+ * Release a lock on a file
+ */
+static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
+{
+ struct afs_operation *op;
+
+ _enter("%s{%llx:%llu.%u},%x",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(key));
+
+ op = afs_alloc_operation(key, vnode->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ afs_op_set_vnode(op, 0, vnode);
+
+ op->flags |= AFS_OPERATION_UNINTR;
+ op->ops = &afs_release_lock_operation;
+ return afs_do_sync_operation(op);
+}
+
+/*
+ * do work for a lock, including:
+ * - probing for a lock we're waiting on but didn't get immediately
+ * - extending a lock that's close to timing out
+ */
+void afs_lock_work(struct work_struct *work)
+{
+ struct afs_vnode *vnode =
+ container_of(work, struct afs_vnode, lock_work.work);
+ struct key *key;
+ int ret;
+
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+ spin_lock(&vnode->lock);
+
+again:
+ _debug("wstate %u for %p", vnode->lock_state, vnode);
+ switch (vnode->lock_state) {
+ case AFS_VNODE_LOCK_NEED_UNLOCK:
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_UNLOCKING);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_work_unlocking, 0);
+ spin_unlock(&vnode->lock);
+
+ /* attempt to release the server lock; if it fails, we just
+ * wait 5 minutes and it'll expire anyway */
+ ret = afs_release_lock(vnode, vnode->lock_key);
+ if (ret < 0 && vnode->lock_state != AFS_VNODE_LOCK_DELETED) {
+ trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail,
+ ret);
+ printk(KERN_WARNING "AFS:"
+ " Failed to release lock on {%llx:%llx} error %d\n",
+ vnode->fid.vid, vnode->fid.vnode, ret);
+ }
+
+ spin_lock(&vnode->lock);
+ if (ret == -ENOENT)
+ afs_kill_lockers_enoent(vnode);
+ else
+ afs_next_locker(vnode, 0);
+ spin_unlock(&vnode->lock);
+ return;
+
+ /* If we've already got a lock, then it must be time to extend that
+ * lock as AFS locks time out after 5 minutes.
+ */
+ case AFS_VNODE_LOCK_GRANTED:
+ _debug("extend");
+
+ ASSERT(!list_empty(&vnode->granted_locks));
+
+ key = key_get(vnode->lock_key);
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_EXTENDING);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_work_extending, 0);
+ spin_unlock(&vnode->lock);
+
+ ret = afs_extend_lock(vnode, key); /* RPC */
+ key_put(key);
+
+ if (ret < 0) {
+ trace_afs_flock_ev(vnode, NULL, afs_flock_extend_fail,
+ ret);
+ pr_warn("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
+ vnode->fid.vid, vnode->fid.vnode, ret);
+ }
+
+ spin_lock(&vnode->lock);
+
+ if (ret == -ENOENT) {
+ afs_kill_lockers_enoent(vnode);
+ spin_unlock(&vnode->lock);
+ return;
+ }
+
+ if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
+ goto again;
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
+
+ if (ret != 0)
+ queue_delayed_work(afs_lock_manager, &vnode->lock_work,
+ HZ * 10);
+ spin_unlock(&vnode->lock);
+ _leave(" [ext]");
+ return;
+
+ /* If we're waiting for a callback to indicate lock release, we can't
+ * actually rely on this, so need to recheck at regular intervals. The
+ * problem is that the server might not notify us if the lock just
+ * expires (say because a client died) rather than being explicitly
+ * released.
+ */
+ case AFS_VNODE_LOCK_WAITING_FOR_CB:
+ _debug("retry");
+ afs_next_locker(vnode, 0);
+ spin_unlock(&vnode->lock);
+ return;
+
+ case AFS_VNODE_LOCK_DELETED:
+ afs_kill_lockers_enoent(vnode);
+ spin_unlock(&vnode->lock);
+ return;
+
+ default:
+ /* Looks like a lock request was withdrawn. */
+ spin_unlock(&vnode->lock);
+ _leave(" [no]");
+ return;
+ }
+}
+
+/*
+ * pass responsibility for the unlocking of a vnode on the server to the
+ * manager thread, lest a pending signal in the calling thread interrupt
+ * AF_RXRPC
+ * - the caller must hold the vnode lock
+ */
+static void afs_defer_unlock(struct afs_vnode *vnode)
+{
+ _enter("%u", vnode->lock_state);
+
+ if (list_empty(&vnode->granted_locks) &&
+ (vnode->lock_state == AFS_VNODE_LOCK_GRANTED ||
+ vnode->lock_state == AFS_VNODE_LOCK_EXTENDING)) {
+ cancel_delayed_work(&vnode->lock_work);
+
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_NEED_UNLOCK);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_defer_unlock, 0);
+ queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
+ }
+}
+
+/*
+ * Check that our view of the file metadata is up to date and check to see
+ * whether we think that we have a locking permit.
+ */
+static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
+ enum afs_flock_mode mode, afs_lock_type_t type)
+{
+ afs_access_t access;
+ int ret;
+
+ /* Make sure we've got a callback on this file and that our view of the
+ * data version is up to date.
+ */
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ return ret;
+
+ /* Check the permission set to see if we're actually going to be
+ * allowed to get a lock on this file.
+ */
+ ret = afs_check_permit(vnode, key, &access);
+ if (ret < 0)
+ return ret;
+
+ /* At a rough estimation, you need LOCK, WRITE or INSERT perm to
+ * read-lock a file and WRITE or INSERT perm to write-lock a file.
+ *
+ * We can't rely on the server to do this for us since if we want to
+ * share a read lock that we already have, we won't go the server.
+ */
+ if (type == AFS_LOCK_READ) {
+ if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
+ return -EACCES;
+ } else {
+ if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+/*
+ * request a lock on a file on the server
+ */
+static int afs_do_setlk(struct file *file, struct file_lock *fl)
+{
+ struct inode *inode = locks_inode(file);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode;
+ afs_lock_type_t type;
+ struct key *key = afs_file_key(file);
+ bool partial, no_server_lock = false;
+ int ret;
+
+ if (mode == afs_flock_mode_unset)
+ mode = afs_flock_mode_openafs;
+
+ _enter("{%llx:%llu},%llu-%llu,%u,%u",
+ vnode->fid.vid, vnode->fid.vnode,
+ fl->fl_start, fl->fl_end, fl->fl_type, mode);
+
+ fl->fl_ops = &afs_lock_ops;
+ INIT_LIST_HEAD(&fl->fl_u.afs.link);
+ fl->fl_u.afs.state = AFS_LOCK_PENDING;
+
+ partial = (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX);
+ type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+ if (mode == afs_flock_mode_write && partial)
+ type = AFS_LOCK_WRITE;
+
+ ret = afs_do_setlk_check(vnode, key, mode, type);
+ if (ret < 0)
+ return ret;
+
+ trace_afs_flock_op(vnode, fl, afs_flock_op_set_lock);
+
+ /* AFS3 protocol only supports full-file locks and doesn't provide any
+ * method of upgrade/downgrade, so we need to emulate for partial-file
+ * locks.
+ *
+ * The OpenAFS client only gets a server lock for a full-file lock and
+ * keeps partial-file locks local. Allow this behaviour to be emulated
+ * (as the default).
+ */
+ if (mode == afs_flock_mode_local ||
+ (partial && mode == afs_flock_mode_openafs)) {
+ no_server_lock = true;
+ goto skip_server_lock;
+ }
+
+ spin_lock(&vnode->lock);
+ list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
+
+ ret = -ENOENT;
+ if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
+ goto error_unlock;
+
+ /* If we've already got a lock on the server then try to move to having
+ * the VFS grant the requested lock. Note that this means that other
+ * clients may get starved out.
+ */
+ _debug("try %u", vnode->lock_state);
+ if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED) {
+ if (type == AFS_LOCK_READ) {
+ _debug("instant readlock");
+ list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+ fl->fl_u.afs.state = AFS_LOCK_GRANTED;
+ goto vnode_is_locked_u;
+ }
+
+ if (vnode->lock_type == AFS_LOCK_WRITE) {
+ _debug("instant writelock");
+ list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+ fl->fl_u.afs.state = AFS_LOCK_GRANTED;
+ goto vnode_is_locked_u;
+ }
+ }
+
+ if (vnode->lock_state == AFS_VNODE_LOCK_NONE &&
+ !(fl->fl_flags & FL_SLEEP)) {
+ ret = -EAGAIN;
+ if (type == AFS_LOCK_READ) {
+ if (vnode->status.lock_count == -1)
+ goto lock_is_contended; /* Write locked */
+ } else {
+ if (vnode->status.lock_count != 0)
+ goto lock_is_contended; /* Locked */
+ }
+ }
+
+ if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
+ goto need_to_wait;
+
+try_to_lock:
+ /* We don't have a lock on this vnode and we aren't currently waiting
+ * for one either, so ask the server for a lock.
+ *
+ * Note that we need to be careful if we get interrupted by a signal
+ * after dispatching the request as we may still get the lock, even
+ * though we don't wait for the reply (it's not too bad a problem - the
+ * lock will expire in 5 mins anyway).
+ */
+ trace_afs_flock_ev(vnode, fl, afs_flock_try_to_lock, 0);
+ vnode->lock_key = key_get(key);
+ vnode->lock_type = type;
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+ spin_unlock(&vnode->lock);
+
+ ret = afs_set_lock(vnode, key, type); /* RPC */
+
+ spin_lock(&vnode->lock);
+ switch (ret) {
+ case -EKEYREJECTED:
+ case -EKEYEXPIRED:
+ case -EKEYREVOKED:
+ case -EPERM:
+ case -EACCES:
+ fl->fl_u.afs.state = ret;
+ trace_afs_flock_ev(vnode, fl, afs_flock_fail_perm, ret);
+ list_del_init(&fl->fl_u.afs.link);
+ afs_next_locker(vnode, ret);
+ goto error_unlock;
+
+ case -ENOENT:
+ fl->fl_u.afs.state = ret;
+ trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret);
+ list_del_init(&fl->fl_u.afs.link);
+ afs_kill_lockers_enoent(vnode);
+ goto error_unlock;
+
+ default:
+ fl->fl_u.afs.state = ret;
+ trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret);
+ list_del_init(&fl->fl_u.afs.link);
+ afs_next_locker(vnode, 0);
+ goto error_unlock;
+
+ case -EWOULDBLOCK:
+ /* The server doesn't have a lock-waiting queue, so the client
+ * will have to retry. The server will break the outstanding
+ * callbacks on a file when a lock is released.
+ */
+ ASSERT(list_empty(&vnode->granted_locks));
+ ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link);
+ goto lock_is_contended;
+
+ case 0:
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
+ trace_afs_flock_ev(vnode, fl, afs_flock_acquired, type);
+ afs_grant_locks(vnode);
+ goto vnode_is_locked_u;
+ }
+
+vnode_is_locked_u:
+ spin_unlock(&vnode->lock);
+vnode_is_locked:
+ /* the lock has been granted by the server... */
+ ASSERTCMP(fl->fl_u.afs.state, ==, AFS_LOCK_GRANTED);
+
+skip_server_lock:
+ /* ... but the VFS still needs to distribute access on this client. */
+ trace_afs_flock_ev(vnode, fl, afs_flock_vfs_locking, 0);
+ ret = locks_lock_file_wait(file, fl);
+ trace_afs_flock_ev(vnode, fl, afs_flock_vfs_lock, ret);
+ if (ret < 0)
+ goto vfs_rejected_lock;
+
+ /* Again, make sure we've got a callback on this file and, again, make
+ * sure that our view of the data version is up to date (we ignore
+ * errors incurred here and deal with the consequences elsewhere).
+ */
+ afs_validate(vnode, key);
+ _leave(" = 0");
+ return 0;
+
+lock_is_contended:
+ if (!(fl->fl_flags & FL_SLEEP)) {
+ list_del_init(&fl->fl_u.afs.link);
+ afs_next_locker(vnode, 0);
+ ret = -EAGAIN;
+ goto error_unlock;
+ }
+
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_WAITING_FOR_CB);
+ trace_afs_flock_ev(vnode, fl, afs_flock_would_block, ret);
+ queue_delayed_work(afs_lock_manager, &vnode->lock_work, HZ * 5);
+
+need_to_wait:
+ /* We're going to have to wait. Either this client doesn't have a lock
+ * on the server yet and we need to wait for a callback to occur, or
+ * the client does have a lock on the server, but it's shared and we
+ * need an exclusive lock.
+ */
+ spin_unlock(&vnode->lock);
+
+ trace_afs_flock_ev(vnode, fl, afs_flock_waiting, 0);
+ ret = wait_event_interruptible(fl->fl_wait,
+ fl->fl_u.afs.state != AFS_LOCK_PENDING);
+ trace_afs_flock_ev(vnode, fl, afs_flock_waited, ret);
+
+ if (fl->fl_u.afs.state >= 0 && fl->fl_u.afs.state != AFS_LOCK_GRANTED) {
+ spin_lock(&vnode->lock);
+
+ switch (fl->fl_u.afs.state) {
+ case AFS_LOCK_YOUR_TRY:
+ fl->fl_u.afs.state = AFS_LOCK_PENDING;
+ goto try_to_lock;
+ case AFS_LOCK_PENDING:
+ if (ret > 0) {
+ /* We need to retry the lock. We may not be
+ * notified by the server if it just expired
+ * rather than being released.
+ */
+ ASSERTCMP(vnode->lock_state, ==, AFS_VNODE_LOCK_WAITING_FOR_CB);
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+ fl->fl_u.afs.state = AFS_LOCK_PENDING;
+ goto try_to_lock;
+ }
+ goto error_unlock;
+ case AFS_LOCK_GRANTED:
+ default:
+ break;
+ }
+
+ spin_unlock(&vnode->lock);
+ }
+
+ if (fl->fl_u.afs.state == AFS_LOCK_GRANTED)
+ goto vnode_is_locked;
+ ret = fl->fl_u.afs.state;
+ goto error;
+
+vfs_rejected_lock:
+ /* The VFS rejected the lock we just obtained, so we have to discard
+ * what we just got. We defer this to the lock manager work item to
+ * deal with.
+ */
+ _debug("vfs refused %d", ret);
+ if (no_server_lock)
+ goto error;
+ spin_lock(&vnode->lock);
+ list_del_init(&fl->fl_u.afs.link);
+ afs_defer_unlock(vnode);
+
+error_unlock:
+ spin_unlock(&vnode->lock);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * unlock on a file on the server
+ */
+static int afs_do_unlk(struct file *file, struct file_lock *fl)
+{
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ int ret;
+
+ _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+
+ trace_afs_flock_op(vnode, fl, afs_flock_op_unlock);
+
+ /* Flush all pending writes before doing anything with locks. */
+ vfs_fsync(file, 0);
+
+ ret = locks_lock_file_wait(file, fl);
+ _leave(" = %d [%u]", ret, vnode->lock_state);
+ return ret;
+}
+
+/*
+ * return information about a lock we currently hold, if indeed we hold one
+ */
+static int afs_do_getlk(struct file *file, struct file_lock *fl)
+{
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ struct key *key = afs_file_key(file);
+ int ret, lock_count;
+
+ _enter("");
+
+ if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
+ return -ENOENT;
+
+ fl->fl_type = F_UNLCK;
+
+ /* check local lock records first */
+ posix_test_lock(file, fl);
+ if (fl->fl_type == F_UNLCK) {
+ /* no local locks; consult the server */
+ ret = afs_fetch_status(vnode, key, false, NULL);
+ if (ret < 0)
+ goto error;
+
+ lock_count = READ_ONCE(vnode->status.lock_count);
+ if (lock_count != 0) {
+ if (lock_count > 0)
+ fl->fl_type = F_RDLCK;
+ else
+ fl->fl_type = F_WRLCK;
+ fl->fl_start = 0;
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_pid = 0;
+ }
+ }
+
+ ret = 0;
+error:
+ _leave(" = %d [%hd]", ret, fl->fl_type);
+ return ret;
+}
+
+/*
+ * manage POSIX locks on a file
+ */
+int afs_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ enum afs_flock_operation op;
+ int ret;
+
+ _enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
+ vnode->fid.vid, vnode->fid.vnode, cmd,
+ fl->fl_type, fl->fl_flags,
+ (long long) fl->fl_start, (long long) fl->fl_end);
+
+ if (IS_GETLK(cmd))
+ return afs_do_getlk(file, fl);
+
+ fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
+ trace_afs_flock_op(vnode, fl, afs_flock_op_lock);
+
+ if (fl->fl_type == F_UNLCK)
+ ret = afs_do_unlk(file, fl);
+ else
+ ret = afs_do_setlk(file, fl);
+
+ switch (ret) {
+ case 0: op = afs_flock_op_return_ok; break;
+ case -EAGAIN: op = afs_flock_op_return_eagain; break;
+ case -EDEADLK: op = afs_flock_op_return_edeadlk; break;
+ default: op = afs_flock_op_return_error; break;
+ }
+ trace_afs_flock_op(vnode, fl, op);
+ return ret;
+}
+
+/*
+ * manage FLOCK locks on a file
+ */
+int afs_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
+ enum afs_flock_operation op;
+ int ret;
+
+ _enter("{%llx:%llu},%d,{t=%x,fl=%x}",
+ vnode->fid.vid, vnode->fid.vnode, cmd,
+ fl->fl_type, fl->fl_flags);
+
+ /*
+ * No BSD flocks over NFS allowed.
+ * Note: we could try to fake a POSIX lock request here by
+ * using ((u32) filp | 0x80000000) or some such as the pid.
+ * Not sure whether that would be unique, though, or whether
+ * that would break in other places.
+ */
+ if (!(fl->fl_flags & FL_FLOCK))
+ return -ENOLCK;
+
+ fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
+ trace_afs_flock_op(vnode, fl, afs_flock_op_flock);
+
+ /* we're simulating flock() locks using posix locks on the server */
+ if (fl->fl_type == F_UNLCK)
+ ret = afs_do_unlk(file, fl);
+ else
+ ret = afs_do_setlk(file, fl);
+
+ switch (ret) {
+ case 0: op = afs_flock_op_return_ok; break;
+ case -EAGAIN: op = afs_flock_op_return_eagain; break;
+ case -EDEADLK: op = afs_flock_op_return_edeadlk; break;
+ default: op = afs_flock_op_return_error; break;
+ }
+ trace_afs_flock_op(vnode, fl, op);
+ return ret;
+}
+
+/*
+ * the POSIX lock management core VFS code copies the lock record and adds the
+ * copy into its own list, so we need to add that copy to the vnode's lock
+ * queue in the same place as the original (which will be deleted shortly
+ * after)
+ */
+static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+
+ _enter("");
+
+ new->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
+
+ spin_lock(&vnode->lock);
+ trace_afs_flock_op(vnode, new, afs_flock_op_copy_lock);
+ list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
+ spin_unlock(&vnode->lock);
+}
+
+/*
+ * need to remove this lock from the vnode queue when it's removed from the
+ * VFS's list
+ */
+static void afs_fl_release_private(struct file_lock *fl)
+{
+ struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+
+ _enter("");
+
+ spin_lock(&vnode->lock);
+
+ trace_afs_flock_op(vnode, fl, afs_flock_op_release_lock);
+ list_del_init(&fl->fl_u.afs.link);
+ if (list_empty(&vnode->granted_locks))
+ afs_defer_unlock(vnode);
+
+ _debug("state %u for %p", vnode->lock_state, vnode);
+ spin_unlock(&vnode->lock);
+}
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
new file mode 100644
index 000000000..7a3803ce3
--- /dev/null
+++ b/fs/afs/fs_operation.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Fileserver-directed operation handling.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include "internal.h"
+
+static atomic_t afs_operation_debug_counter;
+
+/*
+ * Create an operation against a volume.
+ */
+struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *volume)
+{
+ struct afs_operation *op;
+
+ _enter("");
+
+ op = kzalloc(sizeof(*op), GFP_KERNEL);
+ if (!op)
+ return ERR_PTR(-ENOMEM);
+
+ if (!key) {
+ key = afs_request_key(volume->cell);
+ if (IS_ERR(key)) {
+ kfree(op);
+ return ERR_CAST(key);
+ }
+ } else {
+ key_get(key);
+ }
+
+ op->key = key;
+ op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op);
+ op->net = volume->cell->net;
+ op->cb_v_break = volume->cb_v_break;
+ op->debug_id = atomic_inc_return(&afs_operation_debug_counter);
+ op->error = -EDESTADDRREQ;
+ op->ac.error = SHRT_MAX;
+
+ _leave(" = [op=%08x]", op->debug_id);
+ return op;
+}
+
+/*
+ * Lock the vnode(s) being operated upon.
+ */
+static bool afs_get_io_locks(struct afs_operation *op)
+{
+ struct afs_vnode *vnode = op->file[0].vnode;
+ struct afs_vnode *vnode2 = op->file[1].vnode;
+
+ _enter("");
+
+ if (op->flags & AFS_OPERATION_UNINTR) {
+ mutex_lock(&vnode->io_lock);
+ op->flags |= AFS_OPERATION_LOCK_0;
+ _leave(" = t [1]");
+ return true;
+ }
+
+ if (!vnode2 || !op->file[1].need_io_lock || vnode == vnode2)
+ vnode2 = NULL;
+
+ if (vnode2 > vnode)
+ swap(vnode, vnode2);
+
+ if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
+ op->error = -ERESTARTSYS;
+ op->flags |= AFS_OPERATION_STOP;
+ _leave(" = f [I 0]");
+ return false;
+ }
+ op->flags |= AFS_OPERATION_LOCK_0;
+
+ if (vnode2) {
+ if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) {
+ op->error = -ERESTARTSYS;
+ op->flags |= AFS_OPERATION_STOP;
+ mutex_unlock(&vnode->io_lock);
+ op->flags &= ~AFS_OPERATION_LOCK_0;
+ _leave(" = f [I 1]");
+ return false;
+ }
+ op->flags |= AFS_OPERATION_LOCK_1;
+ }
+
+ _leave(" = t [2]");
+ return true;
+}
+
+static void afs_drop_io_locks(struct afs_operation *op)
+{
+ struct afs_vnode *vnode = op->file[0].vnode;
+ struct afs_vnode *vnode2 = op->file[1].vnode;
+
+ _enter("");
+
+ if (op->flags & AFS_OPERATION_LOCK_1)
+ mutex_unlock(&vnode2->io_lock);
+ if (op->flags & AFS_OPERATION_LOCK_0)
+ mutex_unlock(&vnode->io_lock);
+}
+
+static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp,
+ unsigned int index)
+{
+ struct afs_vnode *vnode = vp->vnode;
+
+ if (vnode) {
+ vp->fid = vnode->fid;
+ vp->dv_before = vnode->status.data_version;
+ vp->cb_break_before = afs_calc_vnode_cb_break(vnode);
+ if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
+ op->flags |= AFS_OPERATION_CUR_ONLY;
+ if (vp->modification)
+ set_bit(AFS_VNODE_MODIFYING, &vnode->flags);
+ }
+
+ if (vp->fid.vnode)
+ _debug("PREP[%u] {%llx:%llu.%u}",
+ index, vp->fid.vid, vp->fid.vnode, vp->fid.unique);
+}
+
+/*
+ * Begin an operation on the fileserver.
+ *
+ * Fileserver operations are serialised on the server by vnode, so we serialise
+ * them here also using the io_lock.
+ */
+bool afs_begin_vnode_operation(struct afs_operation *op)
+{
+ struct afs_vnode *vnode = op->file[0].vnode;
+
+ ASSERT(vnode);
+
+ _enter("");
+
+ if (op->file[0].need_io_lock)
+ if (!afs_get_io_locks(op))
+ return false;
+
+ afs_prepare_vnode(op, &op->file[0], 0);
+ afs_prepare_vnode(op, &op->file[1], 1);
+ op->cb_v_break = op->volume->cb_v_break;
+ _leave(" = true");
+ return true;
+}
+
+/*
+ * Tidy up a filesystem cursor and unlock the vnode.
+ */
+static void afs_end_vnode_operation(struct afs_operation *op)
+{
+ _enter("");
+
+ if (op->error == -EDESTADDRREQ ||
+ op->error == -EADDRNOTAVAIL ||
+ op->error == -ENETUNREACH ||
+ op->error == -EHOSTUNREACH)
+ afs_dump_edestaddrreq(op);
+
+ afs_drop_io_locks(op);
+
+ if (op->error == -ECONNABORTED)
+ op->error = afs_abort_to_error(op->ac.abort_code);
+}
+
+/*
+ * Wait for an in-progress operation to complete.
+ */
+void afs_wait_for_operation(struct afs_operation *op)
+{
+ _enter("");
+
+ while (afs_select_fileserver(op)) {
+ op->cb_s_break = op->server->cb_s_break;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) &&
+ op->ops->issue_yfs_rpc)
+ op->ops->issue_yfs_rpc(op);
+ else if (op->ops->issue_afs_rpc)
+ op->ops->issue_afs_rpc(op);
+ else
+ op->ac.error = -ENOTSUPP;
+
+ if (op->call)
+ op->error = afs_wait_for_call_to_complete(op->call, &op->ac);
+ }
+
+ switch (op->error) {
+ case 0:
+ _debug("success");
+ op->ops->success(op);
+ break;
+ case -ECONNABORTED:
+ if (op->ops->aborted)
+ op->ops->aborted(op);
+ fallthrough;
+ default:
+ if (op->ops->failed)
+ op->ops->failed(op);
+ break;
+ }
+
+ afs_end_vnode_operation(op);
+
+ if (op->error == 0 && op->ops->edit_dir) {
+ _debug("edit_dir");
+ op->ops->edit_dir(op);
+ }
+ _leave("");
+}
+
+/*
+ * Dispose of an operation.
+ */
+int afs_put_operation(struct afs_operation *op)
+{
+ int i, ret = op->error;
+
+ _enter("op=%08x,%d", op->debug_id, ret);
+
+ if (op->ops && op->ops->put)
+ op->ops->put(op);
+ if (op->file[0].modification)
+ clear_bit(AFS_VNODE_MODIFYING, &op->file[0].vnode->flags);
+ if (op->file[1].modification && op->file[1].vnode != op->file[0].vnode)
+ clear_bit(AFS_VNODE_MODIFYING, &op->file[1].vnode->flags);
+ if (op->file[0].put_vnode)
+ iput(&op->file[0].vnode->netfs.inode);
+ if (op->file[1].put_vnode)
+ iput(&op->file[1].vnode->netfs.inode);
+
+ if (op->more_files) {
+ for (i = 0; i < op->nr_files - 2; i++)
+ if (op->more_files[i].put_vnode)
+ iput(&op->more_files[i].vnode->netfs.inode);
+ kfree(op->more_files);
+ }
+
+ afs_end_cursor(&op->ac);
+ afs_put_serverlist(op->net, op->server_list);
+ afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op);
+ key_put(op->key);
+ kfree(op);
+ return ret;
+}
+
+int afs_do_sync_operation(struct afs_operation *op)
+{
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+ return afs_put_operation(op);
+}
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
new file mode 100644
index 000000000..daaf3810c
--- /dev/null
+++ b/fs/afs/fs_probe.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS fileserver probing
+ *
+ * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_afs.h"
+#include "protocol_yfs.h"
+
+static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
+static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
+
+/*
+ * Start the probe polling timer. We have to supply it with an inc on the
+ * outstanding server count.
+ */
+static void afs_schedule_fs_probe(struct afs_net *net,
+ struct afs_server *server, bool fast)
+{
+ unsigned long atj;
+
+ if (!net->live)
+ return;
+
+ atj = server->probed_at;
+ atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
+
+ afs_inc_servers_outstanding(net);
+ if (timer_reduce(&net->fs_probe_timer, atj))
+ afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Handle the completion of a set of probes.
+ */
+static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
+{
+ bool responded = server->probe.responded;
+
+ write_seqlock(&net->fs_lock);
+ if (responded) {
+ list_add_tail(&server->probe_link, &net->fs_probe_slow);
+ } else {
+ server->rtt = UINT_MAX;
+ clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
+ list_add_tail(&server->probe_link, &net->fs_probe_fast);
+ }
+ write_sequnlock(&net->fs_lock);
+
+ afs_schedule_fs_probe(net, server, !responded);
+}
+
+/*
+ * Handle the completion of a probe.
+ */
+static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
+{
+ _enter("");
+
+ if (atomic_dec_and_test(&server->probe_outstanding))
+ afs_finished_fs_probe(net, server);
+
+ wake_up_all(&server->probe_wq);
+}
+
+/*
+ * Handle inability to send a probe due to ENOMEM when trying to allocate a
+ * call struct.
+ */
+static void afs_fs_probe_not_done(struct afs_net *net,
+ struct afs_server *server,
+ struct afs_addr_cursor *ac)
+{
+ struct afs_addr_list *alist = ac->alist;
+ unsigned int index = ac->index;
+
+ _enter("");
+
+ trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
+ spin_lock(&server->probe_lock);
+
+ server->probe.local_failure = true;
+ if (server->probe.error == 0)
+ server->probe.error = -ENOMEM;
+
+ set_bit(index, &alist->failed);
+
+ spin_unlock(&server->probe_lock);
+ return afs_done_one_fs_probe(net, server);
+}
+
+/*
+ * Process the result of probing a fileserver. This is called after successful
+ * or failed delivery of an FS.GetCapabilities operation.
+ */
+void afs_fileserver_probe_result(struct afs_call *call)
+{
+ struct afs_addr_list *alist = call->alist;
+ struct afs_server *server = call->server;
+ unsigned int index = call->addr_ix;
+ unsigned int rtt_us = 0, cap0;
+ int ret = call->error;
+
+ _enter("%pU,%u", &server->uuid, index);
+
+ spin_lock(&server->probe_lock);
+
+ switch (ret) {
+ case 0:
+ server->probe.error = 0;
+ goto responded;
+ case -ECONNABORTED:
+ if (!server->probe.responded) {
+ server->probe.abort_code = call->abort_code;
+ server->probe.error = ret;
+ }
+ goto responded;
+ case -ENOMEM:
+ case -ENONET:
+ clear_bit(index, &alist->responded);
+ server->probe.local_failure = true;
+ trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
+ goto out;
+ case -ECONNRESET: /* Responded, but call expired. */
+ case -ERFKILL:
+ case -EADDRNOTAVAIL:
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -EHOSTDOWN:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
+ default:
+ clear_bit(index, &alist->responded);
+ set_bit(index, &alist->failed);
+ if (!server->probe.responded &&
+ (server->probe.error == 0 ||
+ server->probe.error == -ETIMEDOUT ||
+ server->probe.error == -ETIME))
+ server->probe.error = ret;
+ trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
+ goto out;
+ }
+
+responded:
+ clear_bit(index, &alist->failed);
+
+ if (call->service_id == YFS_FS_SERVICE) {
+ server->probe.is_yfs = true;
+ set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ } else {
+ server->probe.not_yfs = true;
+ if (!server->probe.is_yfs) {
+ clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ }
+ cap0 = ntohl(call->tmp);
+ if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
+ set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
+ else
+ clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
+ }
+
+ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
+ if (rtt_us < server->probe.rtt) {
+ server->probe.rtt = rtt_us;
+ server->rtt = rtt_us;
+ alist->preferred = index;
+ }
+
+ smp_wmb(); /* Set rtt before responded. */
+ server->probe.responded = true;
+ set_bit(index, &alist->responded);
+ set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
+out:
+ spin_unlock(&server->probe_lock);
+
+ _debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
+ &server->uuid, index, &alist->addrs[index].transport,
+ rtt_us, ret);
+
+ return afs_done_one_fs_probe(call->net, server);
+}
+
+/*
+ * Probe one or all of a fileserver's addresses to find out the best route and
+ * to query its capabilities.
+ */
+void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
+ struct key *key, bool all)
+{
+ struct afs_addr_cursor ac = {
+ .index = 0,
+ };
+
+ _enter("%pU", &server->uuid);
+
+ read_lock(&server->fs_lock);
+ ac.alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->fs_lock));
+ afs_get_addrlist(ac.alist);
+ read_unlock(&server->fs_lock);
+
+ server->probed_at = jiffies;
+ atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
+ memset(&server->probe, 0, sizeof(server->probe));
+ server->probe.rtt = UINT_MAX;
+
+ ac.index = ac.alist->preferred;
+ if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
+ all = true;
+
+ if (all) {
+ for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
+ if (!afs_fs_get_capabilities(net, server, &ac, key))
+ afs_fs_probe_not_done(net, server, &ac);
+ } else {
+ if (!afs_fs_get_capabilities(net, server, &ac, key))
+ afs_fs_probe_not_done(net, server, &ac);
+ }
+
+ afs_put_addrlist(ac.alist);
+}
+
+/*
+ * Wait for the first as-yet untried fileserver to respond.
+ */
+int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
+{
+ struct wait_queue_entry *waits;
+ struct afs_server *server;
+ unsigned int rtt = UINT_MAX, rtt_s;
+ bool have_responders = false;
+ int pref = -1, i;
+
+ _enter("%u,%lx", slist->nr_servers, untried);
+
+ /* Only wait for servers that have a probe outstanding. */
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ if (!atomic_read(&server->probe_outstanding))
+ __clear_bit(i, &untried);
+ if (server->probe.responded)
+ have_responders = true;
+ }
+ }
+ if (have_responders || !untried)
+ return 0;
+
+ waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+ if (!waits)
+ return -ENOMEM;
+
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ init_waitqueue_entry(&waits[i], current);
+ add_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ for (;;) {
+ bool still_probing = false;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ if (server->probe.responded)
+ goto stop;
+ if (atomic_read(&server->probe_outstanding))
+ still_probing = true;
+ }
+ }
+
+ if (!still_probing || signal_pending(current))
+ goto stop;
+ schedule();
+ }
+
+stop:
+ set_current_state(TASK_RUNNING);
+
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ rtt_s = READ_ONCE(server->rtt);
+ if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
+ rtt_s < rtt) {
+ pref = i;
+ rtt = rtt_s;
+ }
+
+ remove_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ kfree(waits);
+
+ if (pref == -1 && signal_pending(current))
+ return -ERESTARTSYS;
+
+ if (pref >= 0)
+ slist->preferred = pref;
+ return 0;
+}
+
+/*
+ * Probe timer. We have an increment on fs_outstanding that we need to pass
+ * along to the work item.
+ */
+void afs_fs_probe_timer(struct timer_list *timer)
+{
+ struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
+
+ if (!net->live || !queue_work(afs_wq, &net->fs_prober))
+ afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Dispatch a probe to a server.
+ */
+static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
+ __releases(&net->fs_lock)
+{
+ struct key *key = NULL;
+
+ /* We remove it from the queues here - it will be added back to
+ * one of the queues on the completion of the probe.
+ */
+ list_del_init(&server->probe_link);
+
+ afs_get_server(server, afs_server_trace_get_probe);
+ write_sequnlock(&net->fs_lock);
+
+ afs_fs_probe_fileserver(net, server, key, all);
+ afs_put_server(net, server, afs_server_trace_put_probe);
+}
+
+/*
+ * Probe a server immediately without waiting for its due time to come
+ * round. This is used when all of the addresses have been tried.
+ */
+void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
+{
+ write_seqlock(&net->fs_lock);
+ if (!list_empty(&server->probe_link))
+ return afs_dispatch_fs_probe(net, server, true);
+ write_sequnlock(&net->fs_lock);
+}
+
+/*
+ * Probe dispatcher to regularly dispatch probes to keep NAT alive.
+ */
+void afs_fs_probe_dispatcher(struct work_struct *work)
+{
+ struct afs_net *net = container_of(work, struct afs_net, fs_prober);
+ struct afs_server *fast, *slow, *server;
+ unsigned long nowj, timer_at, poll_at;
+ bool first_pass = true, set_timer = false;
+
+ if (!net->live) {
+ afs_dec_servers_outstanding(net);
+ return;
+ }
+
+ _enter("");
+
+ if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
+ afs_dec_servers_outstanding(net);
+ _leave(" [none]");
+ return;
+ }
+
+again:
+ write_seqlock(&net->fs_lock);
+
+ fast = slow = server = NULL;
+ nowj = jiffies;
+ timer_at = nowj + MAX_JIFFY_OFFSET;
+
+ if (!list_empty(&net->fs_probe_fast)) {
+ fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
+ poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
+ if (time_before(nowj, poll_at)) {
+ timer_at = poll_at;
+ set_timer = true;
+ fast = NULL;
+ }
+ }
+
+ if (!list_empty(&net->fs_probe_slow)) {
+ slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
+ poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
+ if (time_before(nowj, poll_at)) {
+ if (time_before(poll_at, timer_at))
+ timer_at = poll_at;
+ set_timer = true;
+ slow = NULL;
+ }
+ }
+
+ server = fast ?: slow;
+ if (server)
+ _debug("probe %pU", &server->uuid);
+
+ if (server && (first_pass || !need_resched())) {
+ afs_dispatch_fs_probe(net, server, server == fast);
+ first_pass = false;
+ goto again;
+ }
+
+ write_sequnlock(&net->fs_lock);
+
+ if (server) {
+ if (!queue_work(afs_wq, &net->fs_prober))
+ afs_dec_servers_outstanding(net);
+ _leave(" [requeue]");
+ } else if (set_timer) {
+ if (timer_reduce(&net->fs_probe_timer, timer_at))
+ afs_dec_servers_outstanding(net);
+ _leave(" [timer]");
+ } else {
+ afs_dec_servers_outstanding(net);
+ _leave(" [quiesce]");
+ }
+}
+
+/*
+ * Wait for a probe on a particular fileserver to complete for 2s.
+ */
+int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
+{
+ struct wait_queue_entry wait;
+ unsigned long timo = 2 * HZ;
+
+ if (atomic_read(&server->probe_outstanding) == 0)
+ goto dont_wait;
+
+ init_wait_entry(&wait, 0);
+ for (;;) {
+ prepare_to_wait_event(&server->probe_wq, &wait,
+ is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+ if (timo == 0 ||
+ server->probe.responded ||
+ atomic_read(&server->probe_outstanding) == 0 ||
+ (is_intr && signal_pending(current)))
+ break;
+ timo = schedule_timeout(timo);
+ }
+
+ finish_wait(&server->probe_wq, &wait);
+
+dont_wait:
+ if (server->probe.responded)
+ return 0;
+ if (is_intr && signal_pending(current))
+ return -ERESTARTSYS;
+ if (timo == 0)
+ return -ETIME;
+ return -EDESTADDRREQ;
+}
+
+/*
+ * Clean up the probing when the namespace is killed off.
+ */
+void afs_fs_probe_cleanup(struct afs_net *net)
+{
+ if (del_timer_sync(&net->fs_probe_timer))
+ afs_dec_servers_outstanding(net);
+}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
new file mode 100644
index 000000000..7d37f63ef
--- /dev/null
+++ b/fs/afs/fsclient.c
@@ -0,0 +1,2083 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS File Server client stubs
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/circ_buf.h>
+#include <linux/iversion.h>
+#include <linux/netfs.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+
+/*
+ * decode an AFSFid block
+ */
+static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
+{
+ const __be32 *bp = *_bp;
+
+ fid->vid = ntohl(*bp++);
+ fid->vnode = ntohl(*bp++);
+ fid->unique = ntohl(*bp++);
+ *_bp = bp;
+}
+
+/*
+ * Dump a bad file status record.
+ */
+static void xdr_dump_bad(const __be32 *bp)
+{
+ __be32 x[4];
+ int i;
+
+ pr_notice("AFS XDR: Bad status record\n");
+ for (i = 0; i < 5 * 4 * 4; i += 16) {
+ memcpy(x, bp, 16);
+ bp += 4;
+ pr_notice("%03x: %08x %08x %08x %08x\n",
+ i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
+ }
+
+ memcpy(x, bp, 4);
+ pr_notice("0x50: %08x\n", ntohl(x[0]));
+}
+
+/*
+ * decode an AFSFetchStatus block
+ */
+static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
+{
+ const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
+ struct afs_file_status *status = &scb->status;
+ bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
+ u64 data_version, size;
+ u32 type, abort_code;
+
+ abort_code = ntohl(xdr->abort_code);
+
+ if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) {
+ if (xdr->if_version == htonl(0) &&
+ abort_code != 0 &&
+ inline_error) {
+ /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus
+ * whereby it doesn't set the interface version in the error
+ * case.
+ */
+ status->abort_code = abort_code;
+ scb->have_error = true;
+ goto advance;
+ }
+
+ pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
+ goto bad;
+ }
+
+ if (abort_code != 0 && inline_error) {
+ status->abort_code = abort_code;
+ scb->have_error = true;
+ goto advance;
+ }
+
+ type = ntohl(xdr->type);
+ switch (type) {
+ case AFS_FTYPE_FILE:
+ case AFS_FTYPE_DIR:
+ case AFS_FTYPE_SYMLINK:
+ status->type = type;
+ break;
+ default:
+ goto bad;
+ }
+
+ status->nlink = ntohl(xdr->nlink);
+ status->author = ntohl(xdr->author);
+ status->owner = ntohl(xdr->owner);
+ status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */
+ status->anon_access = ntohl(xdr->anon_access);
+ status->mode = ntohl(xdr->mode) & S_IALLUGO;
+ status->group = ntohl(xdr->group);
+ status->lock_count = ntohl(xdr->lock_count);
+
+ status->mtime_client.tv_sec = ntohl(xdr->mtime_client);
+ status->mtime_client.tv_nsec = 0;
+ status->mtime_server.tv_sec = ntohl(xdr->mtime_server);
+ status->mtime_server.tv_nsec = 0;
+
+ size = (u64)ntohl(xdr->size_lo);
+ size |= (u64)ntohl(xdr->size_hi) << 32;
+ status->size = size;
+
+ data_version = (u64)ntohl(xdr->data_version_lo);
+ data_version |= (u64)ntohl(xdr->data_version_hi) << 32;
+ status->data_version = data_version;
+ scb->have_status = true;
+advance:
+ *_bp = (const void *)*_bp + sizeof(*xdr);
+ return;
+
+bad:
+ xdr_dump_bad(*_bp);
+ afs_protocol_error(call, afs_eproto_bad_status);
+ goto advance;
+}
+
+static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
+{
+ return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry;
+}
+
+static void xdr_decode_AFSCallBack(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
+{
+ struct afs_callback *cb = &scb->callback;
+ const __be32 *bp = *_bp;
+
+ bp++; /* version */
+ cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++));
+ bp++; /* type */
+ scb->have_cb = true;
+ *_bp = bp;
+}
+
+/*
+ * decode an AFSVolSync block
+ */
+static void xdr_decode_AFSVolSync(const __be32 **_bp,
+ struct afs_volsync *volsync)
+{
+ const __be32 *bp = *_bp;
+ u32 creation;
+
+ creation = ntohl(*bp++);
+ bp++; /* spare2 */
+ bp++; /* spare3 */
+ bp++; /* spare4 */
+ bp++; /* spare5 */
+ bp++; /* spare6 */
+ *_bp = bp;
+
+ if (volsync)
+ volsync->creation = creation;
+}
+
+/*
+ * encode the requested attributes into an AFSStoreStatus block
+ */
+static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr)
+{
+ __be32 *bp = *_bp;
+ u32 mask = 0, mtime = 0, owner = 0, group = 0, mode = 0;
+
+ mask = 0;
+ if (attr->ia_valid & ATTR_MTIME) {
+ mask |= AFS_SET_MTIME;
+ mtime = attr->ia_mtime.tv_sec;
+ }
+
+ if (attr->ia_valid & ATTR_UID) {
+ mask |= AFS_SET_OWNER;
+ owner = from_kuid(&init_user_ns, attr->ia_uid);
+ }
+
+ if (attr->ia_valid & ATTR_GID) {
+ mask |= AFS_SET_GROUP;
+ group = from_kgid(&init_user_ns, attr->ia_gid);
+ }
+
+ if (attr->ia_valid & ATTR_MODE) {
+ mask |= AFS_SET_MODE;
+ mode = attr->ia_mode & S_IALLUGO;
+ }
+
+ *bp++ = htonl(mask);
+ *bp++ = htonl(mtime);
+ *bp++ = htonl(owner);
+ *bp++ = htonl(group);
+ *bp++ = htonl(mode);
+ *bp++ = 0; /* segment size */
+ *_bp = bp;
+}
+
+/*
+ * decode an AFSFetchVolumeStatus block
+ */
+static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
+ struct afs_volume_status *vs)
+{
+ const __be32 *bp = *_bp;
+
+ vs->vid = ntohl(*bp++);
+ vs->parent_id = ntohl(*bp++);
+ vs->online = ntohl(*bp++);
+ vs->in_service = ntohl(*bp++);
+ vs->blessed = ntohl(*bp++);
+ vs->needs_salvage = ntohl(*bp++);
+ vs->type = ntohl(*bp++);
+ vs->min_quota = ntohl(*bp++);
+ vs->max_quota = ntohl(*bp++);
+ vs->blocks_in_use = ntohl(*bp++);
+ vs->part_blocks_avail = ntohl(*bp++);
+ vs->part_max_blocks = ntohl(*bp++);
+ vs->vol_copy_date = 0;
+ vs->vol_backup_date = 0;
+ *_bp = bp;
+}
+
+/*
+ * deliver reply data to an FS.FetchStatus
+ */
+static int afs_deliver_fs_fetch_status(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_AFSCallBack(&bp, call, &vp->scb);
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.FetchStatus operation type
+ */
+static const struct afs_call_type afs_RXFSFetchStatus = {
+ .name = "FS.FetchStatus",
+ .op = afs_FS_FetchStatus,
+ .deliver = afs_deliver_fs_fetch_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * fetch the status information for a file
+ */
+void afs_fs_fetch_status(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSFetchStatus,
+ 16, (21 + 3 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp[0] = htonl(FSFETCHSTATUS);
+ bp[1] = htonl(vp->fid.vid);
+ bp[2] = htonl(vp->fid.vnode);
+ bp[3] = htonl(vp->fid.unique);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * deliver reply data to an FS.FetchData
+ */
+static int afs_deliver_fs_fetch_data(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_read *req = op->fetch.req;
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u,%zu,%zu/%llu}",
+ call->unmarshall, call->iov_len, iov_iter_count(call->iter),
+ req->actual_len);
+
+ switch (call->unmarshall) {
+ case 0:
+ req->actual_len = 0;
+ call->unmarshall++;
+ if (call->operation_ID == FSFETCHDATA64) {
+ afs_extract_to_tmp64(call);
+ } else {
+ call->tmp_u = htonl(0);
+ afs_extract_to_tmp(call);
+ }
+ fallthrough;
+
+ /* Extract the returned data length into
+ * ->actual_len. This may indicate more or less data than was
+ * requested will be returned.
+ */
+ case 1:
+ _debug("extract data length");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ req->actual_len = be64_to_cpu(call->tmp64);
+ _debug("DATA length: %llu", req->actual_len);
+
+ if (req->actual_len == 0)
+ goto no_more_data;
+
+ call->iter = req->iter;
+ call->iov_len = min(req->actual_len, req->len);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the returned data */
+ case 2:
+ _debug("extract data %zu/%llu",
+ iov_iter_count(call->iter), req->actual_len);
+
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->iter = &call->def_iter;
+ if (req->actual_len <= req->len)
+ goto no_more_data;
+
+ /* Discard any excess data the server gave us */
+ afs_extract_discard(call, req->actual_len - req->len);
+ call->unmarshall = 3;
+ fallthrough;
+
+ case 3:
+ _debug("extract discard %zu/%llu",
+ iov_iter_count(call->iter), req->actual_len - req->len);
+
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ no_more_data:
+ call->unmarshall = 4;
+ afs_extract_to_buf(call, (21 + 3 + 6) * 4);
+ fallthrough;
+
+ /* extract the metadata */
+ case 4:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_AFSCallBack(&bp, call, &vp->scb);
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ req->data_version = vp->scb.status.data_version;
+ req->file_size = vp->scb.status.size;
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 5:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.FetchData operation type
+ */
+static const struct afs_call_type afs_RXFSFetchData = {
+ .name = "FS.FetchData",
+ .op = afs_FS_FetchData,
+ .deliver = afs_deliver_fs_fetch_data,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type afs_RXFSFetchData64 = {
+ .name = "FS.FetchData64",
+ .op = afs_FS_FetchData64,
+ .deliver = afs_deliver_fs_fetch_data,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * fetch data from a very large file
+ */
+static void afs_fs_fetch_data64(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_read *req = op->fetch.req;
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp[0] = htonl(FSFETCHDATA64);
+ bp[1] = htonl(vp->fid.vid);
+ bp[2] = htonl(vp->fid.vnode);
+ bp[3] = htonl(vp->fid.unique);
+ bp[4] = htonl(upper_32_bits(req->pos));
+ bp[5] = htonl(lower_32_bits(req->pos));
+ bp[6] = 0;
+ bp[7] = htonl(lower_32_bits(req->len));
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * fetch data from a file
+ */
+void afs_fs_fetch_data(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ struct afs_read *req = op->fetch.req;
+ __be32 *bp;
+
+ if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
+ return afs_fs_fetch_data64(op);
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ req->call_debug_id = call->debug_id;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp[0] = htonl(FSFETCHDATA);
+ bp[1] = htonl(vp->fid.vid);
+ bp[2] = htonl(vp->fid.vnode);
+ bp[3] = htonl(vp->fid.unique);
+ bp[4] = htonl(lower_32_bits(req->pos));
+ bp[5] = htonl(lower_32_bits(req->len));
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * deliver reply data to an FS.CreateFile or an FS.MakeDir
+ */
+static int afs_deliver_fs_create_vnode(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_AFSFid(&bp, &op->file[1].fid);
+ xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb);
+ xdr_decode_AFSCallBack(&bp, call, &vp->scb);
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.CreateFile and FS.MakeDir operation type
+ */
+static const struct afs_call_type afs_RXFSCreateFile = {
+ .name = "FS.CreateFile",
+ .op = afs_FS_CreateFile,
+ .deliver = afs_deliver_fs_create_vnode,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Create a file.
+ */
+void afs_fs_create_file(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ size_t namesz, reqsz, padsz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = name->len;
+ padsz = (4 - (namesz & 3)) & 3;
+ reqsz = (5 * 4) + namesz + padsz + (6 * 4);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSCreateFile,
+ reqsz, (3 + 21 + 21 + 3 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSCREATEFILE);
+ *bp++ = htonl(dvp->fid.vid);
+ *bp++ = htonl(dvp->fid.vnode);
+ *bp++ = htonl(dvp->fid.unique);
+ *bp++ = htonl(namesz);
+ memcpy(bp, name->name, namesz);
+ bp = (void *) bp + namesz;
+ if (padsz > 0) {
+ memset(bp, 0, padsz);
+ bp = (void *) bp + padsz;
+ }
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(op->mtime.tv_sec); /* mtime */
+ *bp++ = 0; /* owner */
+ *bp++ = 0; /* group */
+ *bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */
+ *bp++ = 0; /* segment size */
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+static const struct afs_call_type afs_RXFSMakeDir = {
+ .name = "FS.MakeDir",
+ .op = afs_FS_MakeDir,
+ .deliver = afs_deliver_fs_create_vnode,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Create a new directory
+ */
+void afs_fs_make_dir(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ size_t namesz, reqsz, padsz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = name->len;
+ padsz = (4 - (namesz & 3)) & 3;
+ reqsz = (5 * 4) + namesz + padsz + (6 * 4);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSMakeDir,
+ reqsz, (3 + 21 + 21 + 3 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSMAKEDIR);
+ *bp++ = htonl(dvp->fid.vid);
+ *bp++ = htonl(dvp->fid.vnode);
+ *bp++ = htonl(dvp->fid.unique);
+ *bp++ = htonl(namesz);
+ memcpy(bp, name->name, namesz);
+ bp = (void *) bp + namesz;
+ if (padsz > 0) {
+ memset(bp, 0, padsz);
+ bp = (void *) bp + padsz;
+ }
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(op->mtime.tv_sec); /* mtime */
+ *bp++ = 0; /* owner */
+ *bp++ = 0; /* group */
+ *bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */
+ *bp++ = 0; /* segment size */
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to any operation that returns status and volume sync.
+ */
+static int afs_deliver_fs_file_status_and_vol(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *vp = &op->file[0];
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.RemoveFile operation type
+ */
+static const struct afs_call_type afs_RXFSRemoveFile = {
+ .name = "FS.RemoveFile",
+ .op = afs_FS_RemoveFile,
+ .deliver = afs_deliver_fs_file_status_and_vol,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Remove a file.
+ */
+void afs_fs_remove_file(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ size_t namesz, reqsz, padsz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = name->len;
+ padsz = (4 - (namesz & 3)) & 3;
+ reqsz = (5 * 4) + namesz + padsz;
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSRemoveFile,
+ reqsz, (21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSREMOVEFILE);
+ *bp++ = htonl(dvp->fid.vid);
+ *bp++ = htonl(dvp->fid.vnode);
+ *bp++ = htonl(dvp->fid.unique);
+ *bp++ = htonl(namesz);
+ memcpy(bp, name->name, namesz);
+ bp = (void *) bp + namesz;
+ if (padsz > 0) {
+ memset(bp, 0, padsz);
+ bp = (void *) bp + padsz;
+ }
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+static const struct afs_call_type afs_RXFSRemoveDir = {
+ .name = "FS.RemoveDir",
+ .op = afs_FS_RemoveDir,
+ .deliver = afs_deliver_fs_file_status_and_vol,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Remove a directory.
+ */
+void afs_fs_remove_dir(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ size_t namesz, reqsz, padsz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = name->len;
+ padsz = (4 - (namesz & 3)) & 3;
+ reqsz = (5 * 4) + namesz + padsz;
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSRemoveDir,
+ reqsz, (21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSREMOVEDIR);
+ *bp++ = htonl(dvp->fid.vid);
+ *bp++ = htonl(dvp->fid.vnode);
+ *bp++ = htonl(dvp->fid.unique);
+ *bp++ = htonl(namesz);
+ memcpy(bp, name->name, namesz);
+ bp = (void *) bp + namesz;
+ if (padsz > 0) {
+ memset(bp, 0, padsz);
+ bp = (void *) bp + padsz;
+ }
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * deliver reply data to an FS.Link
+ */
+static int afs_deliver_fs_link(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb);
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.Link operation type
+ */
+static const struct afs_call_type afs_RXFSLink = {
+ .name = "FS.Link",
+ .op = afs_FS_Link,
+ .deliver = afs_deliver_fs_link,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * make a hard link
+ */
+void afs_fs_link(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ struct afs_call *call;
+ size_t namesz, reqsz, padsz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = name->len;
+ padsz = (4 - (namesz & 3)) & 3;
+ reqsz = (5 * 4) + namesz + padsz + (3 * 4);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSLINK);
+ *bp++ = htonl(dvp->fid.vid);
+ *bp++ = htonl(dvp->fid.vnode);
+ *bp++ = htonl(dvp->fid.unique);
+ *bp++ = htonl(namesz);
+ memcpy(bp, name->name, namesz);
+ bp = (void *) bp + namesz;
+ if (padsz > 0) {
+ memset(bp, 0, padsz);
+ bp = (void *) bp + padsz;
+ }
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+
+ trace_afs_make_fs_call1(call, &vp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * deliver reply data to an FS.Symlink
+ */
+static int afs_deliver_fs_symlink(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_AFSFid(&bp, &vp->fid);
+ xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb);
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.Symlink operation type
+ */
+static const struct afs_call_type afs_RXFSSymlink = {
+ .name = "FS.Symlink",
+ .op = afs_FS_Symlink,
+ .deliver = afs_deliver_fs_symlink,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * create a symbolic link
+ */
+void afs_fs_symlink(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ size_t namesz, reqsz, padsz, c_namesz, c_padsz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = name->len;
+ padsz = (4 - (namesz & 3)) & 3;
+
+ c_namesz = strlen(op->create.symlink);
+ c_padsz = (4 - (c_namesz & 3)) & 3;
+
+ reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSSymlink, reqsz,
+ (3 + 21 + 21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSYMLINK);
+ *bp++ = htonl(dvp->fid.vid);
+ *bp++ = htonl(dvp->fid.vnode);
+ *bp++ = htonl(dvp->fid.unique);
+ *bp++ = htonl(namesz);
+ memcpy(bp, name->name, namesz);
+ bp = (void *) bp + namesz;
+ if (padsz > 0) {
+ memset(bp, 0, padsz);
+ bp = (void *) bp + padsz;
+ }
+ *bp++ = htonl(c_namesz);
+ memcpy(bp, op->create.symlink, c_namesz);
+ bp = (void *) bp + c_namesz;
+ if (c_padsz > 0) {
+ memset(bp, 0, c_padsz);
+ bp = (void *) bp + c_padsz;
+ }
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(op->mtime.tv_sec); /* mtime */
+ *bp++ = 0; /* owner */
+ *bp++ = 0; /* group */
+ *bp++ = htonl(S_IRWXUGO); /* unix mode */
+ *bp++ = 0; /* segment size */
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * deliver reply data to an FS.Rename
+ */
+static int afs_deliver_fs_rename(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ /* If the two dirs are the same, we have two copies of the same status
+ * report, so we just decode it twice.
+ */
+ xdr_decode_AFSFetchStatus(&bp, call, &orig_dvp->scb);
+ xdr_decode_AFSFetchStatus(&bp, call, &new_dvp->scb);
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.Rename operation type
+ */
+static const struct afs_call_type afs_RXFSRename = {
+ .name = "FS.Rename",
+ .op = afs_FS_Rename,
+ .deliver = afs_deliver_fs_rename,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Rename/move a file or directory.
+ */
+void afs_fs_rename(struct afs_operation *op)
+{
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ const struct qstr *orig_name = &op->dentry->d_name;
+ const struct qstr *new_name = &op->dentry_2->d_name;
+ struct afs_call *call;
+ size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
+ __be32 *bp;
+
+ _enter("");
+
+ o_namesz = orig_name->len;
+ o_padsz = (4 - (o_namesz & 3)) & 3;
+
+ n_namesz = new_name->len;
+ n_padsz = (4 - (n_namesz & 3)) & 3;
+
+ reqsz = (4 * 4) +
+ 4 + o_namesz + o_padsz +
+ (3 * 4) +
+ 4 + n_namesz + n_padsz;
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSRENAME);
+ *bp++ = htonl(orig_dvp->fid.vid);
+ *bp++ = htonl(orig_dvp->fid.vnode);
+ *bp++ = htonl(orig_dvp->fid.unique);
+ *bp++ = htonl(o_namesz);
+ memcpy(bp, orig_name->name, o_namesz);
+ bp = (void *) bp + o_namesz;
+ if (o_padsz > 0) {
+ memset(bp, 0, o_padsz);
+ bp = (void *) bp + o_padsz;
+ }
+
+ *bp++ = htonl(new_dvp->fid.vid);
+ *bp++ = htonl(new_dvp->fid.vnode);
+ *bp++ = htonl(new_dvp->fid.unique);
+ *bp++ = htonl(n_namesz);
+ memcpy(bp, new_name->name, n_namesz);
+ bp = (void *) bp + n_namesz;
+ if (n_padsz > 0) {
+ memset(bp, 0, n_padsz);
+ bp = (void *) bp + n_padsz;
+ }
+
+ trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to FS.StoreData or FS.StoreStatus
+ */
+static int afs_deliver_fs_store_data(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *vp = &op->file[0];
+ const __be32 *bp;
+ int ret;
+
+ _enter("");
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.StoreData operation type
+ */
+static const struct afs_call_type afs_RXFSStoreData = {
+ .name = "FS.StoreData",
+ .op = afs_FS_StoreData,
+ .deliver = afs_deliver_fs_store_data,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type afs_RXFSStoreData64 = {
+ .name = "FS.StoreData64",
+ .op = afs_FS_StoreData64,
+ .deliver = afs_deliver_fs_store_data,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * store a set of pages to a very large file
+ */
+static void afs_fs_store_data64(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData64,
+ (4 + 6 + 3 * 2) * 4,
+ (21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ call->write_iter = op->store.write_iter;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSTOREDATA64);
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(op->mtime.tv_sec); /* mtime */
+ *bp++ = 0; /* owner */
+ *bp++ = 0; /* group */
+ *bp++ = 0; /* unix mode */
+ *bp++ = 0; /* segment size */
+
+ *bp++ = htonl(upper_32_bits(op->store.pos));
+ *bp++ = htonl(lower_32_bits(op->store.pos));
+ *bp++ = htonl(upper_32_bits(op->store.size));
+ *bp++ = htonl(lower_32_bits(op->store.size));
+ *bp++ = htonl(upper_32_bits(op->store.i_size));
+ *bp++ = htonl(lower_32_bits(op->store.i_size));
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Write data to a file on the server.
+ */
+void afs_fs_store_data(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ _debug("size %llx, at %llx, i_size %llx",
+ (unsigned long long)op->store.size,
+ (unsigned long long)op->store.pos,
+ (unsigned long long)op->store.i_size);
+
+ if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
+ return afs_fs_store_data64(op);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData,
+ (4 + 6 + 3) * 4,
+ (21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ call->write_iter = op->store.write_iter;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSTOREDATA);
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(op->mtime.tv_sec); /* mtime */
+ *bp++ = 0; /* owner */
+ *bp++ = 0; /* group */
+ *bp++ = 0; /* unix mode */
+ *bp++ = 0; /* segment size */
+
+ *bp++ = htonl(lower_32_bits(op->store.pos));
+ *bp++ = htonl(lower_32_bits(op->store.size));
+ *bp++ = htonl(lower_32_bits(op->store.i_size));
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * FS.StoreStatus operation type
+ */
+static const struct afs_call_type afs_RXFSStoreStatus = {
+ .name = "FS.StoreStatus",
+ .op = afs_FS_StoreStatus,
+ .deliver = afs_deliver_fs_store_data,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type afs_RXFSStoreData_as_Status = {
+ .name = "FS.StoreData",
+ .op = afs_FS_StoreData,
+ .deliver = afs_deliver_fs_store_data,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
+ .name = "FS.StoreData64",
+ .op = afs_FS_StoreData64,
+ .deliver = afs_deliver_fs_store_data,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * set the attributes on a very large file, using FS.StoreData rather than
+ * FS.StoreStatus so as to alter the file size also
+ */
+static void afs_fs_setattr_size64(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ struct iattr *attr = op->setattr.attr;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ ASSERT(attr->ia_valid & ATTR_SIZE);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData64_as_Status,
+ (4 + 6 + 3 * 2) * 4,
+ (21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSTOREDATA64);
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+
+ xdr_encode_AFS_StoreStatus(&bp, attr);
+
+ *bp++ = htonl(upper_32_bits(attr->ia_size)); /* position of start of write */
+ *bp++ = htonl(lower_32_bits(attr->ia_size));
+ *bp++ = 0; /* size of write */
+ *bp++ = 0;
+ *bp++ = htonl(upper_32_bits(attr->ia_size)); /* new file length */
+ *bp++ = htonl(lower_32_bits(attr->ia_size));
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
+ * so as to alter the file size also
+ */
+static void afs_fs_setattr_size(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ struct iattr *attr = op->setattr.attr;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ ASSERT(attr->ia_valid & ATTR_SIZE);
+ if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
+ return afs_fs_setattr_size64(op);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData_as_Status,
+ (4 + 6 + 3) * 4,
+ (21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSTOREDATA);
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+
+ xdr_encode_AFS_StoreStatus(&bp, attr);
+
+ *bp++ = htonl(attr->ia_size); /* position of start of write */
+ *bp++ = 0; /* size of write */
+ *bp++ = htonl(attr->ia_size); /* new file length */
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * set the attributes on a file, using FS.StoreData if there's a change in file
+ * size, and FS.StoreStatus otherwise
+ */
+void afs_fs_setattr(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ struct iattr *attr = op->setattr.attr;
+ __be32 *bp;
+
+ if (attr->ia_valid & ATTR_SIZE)
+ return afs_fs_setattr_size(op);
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSStoreStatus,
+ (4 + 6) * 4,
+ (21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSTORESTATUS);
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+
+ xdr_encode_AFS_StoreStatus(&bp, op->setattr.attr);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * deliver reply data to an FS.GetVolumeStatus
+ */
+static int afs_deliver_fs_get_volume_status(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ const __be32 *bp;
+ char *p;
+ u32 size;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ call->unmarshall++;
+ afs_extract_to_buf(call, 12 * 4);
+ fallthrough;
+
+ /* extract the returned status record */
+ case 1:
+ _debug("extract status");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_AFSFetchVolumeStatus(&bp, &op->volstatus.vs);
+ call->unmarshall++;
+ afs_extract_to_tmp(call);
+ fallthrough;
+
+ /* extract the volume name length */
+ case 2:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("volname length: %u", call->count);
+ if (call->count >= AFSNAMEMAX)
+ return afs_protocol_error(call, afs_eproto_volname_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_to_buf(call, size);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the volume name */
+ case 3:
+ _debug("extract volname");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ p = call->buffer;
+ p[call->count] = 0;
+ _debug("volname '%s'", p);
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the offline message length */
+ case 4:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("offline msg length: %u", call->count);
+ if (call->count >= AFSNAMEMAX)
+ return afs_protocol_error(call, afs_eproto_offline_msg_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_to_buf(call, size);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the offline message */
+ case 5:
+ _debug("extract offline");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ p = call->buffer;
+ p[call->count] = 0;
+ _debug("offline '%s'", p);
+
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the message of the day length */
+ case 6:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("motd length: %u", call->count);
+ if (call->count >= AFSNAMEMAX)
+ return afs_protocol_error(call, afs_eproto_motd_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_to_buf(call, size);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the message of the day */
+ case 7:
+ _debug("extract motd");
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ p = call->buffer;
+ p[call->count] = 0;
+ _debug("motd '%s'", p);
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 8:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.GetVolumeStatus operation type
+ */
+static const struct afs_call_type afs_RXFSGetVolumeStatus = {
+ .name = "FS.GetVolumeStatus",
+ .op = afs_FS_GetVolumeStatus,
+ .deliver = afs_deliver_fs_get_volume_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * fetch the status of a volume
+ */
+void afs_fs_get_volume_status(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSGetVolumeStatus, 2 * 4,
+ max(12 * 4, AFSOPAQUEMAX + 1));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp[0] = htonl(FSGETVOLUMESTATUS);
+ bp[1] = htonl(vp->fid.vid);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock
+ */
+static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.SetLock operation type
+ */
+static const struct afs_call_type afs_RXFSSetLock = {
+ .name = "FS.SetLock",
+ .op = afs_FS_SetLock,
+ .deliver = afs_deliver_fs_xxxx_lock,
+ .done = afs_lock_op_done,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * FS.ExtendLock operation type
+ */
+static const struct afs_call_type afs_RXFSExtendLock = {
+ .name = "FS.ExtendLock",
+ .op = afs_FS_ExtendLock,
+ .deliver = afs_deliver_fs_xxxx_lock,
+ .done = afs_lock_op_done,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * FS.ReleaseLock operation type
+ */
+static const struct afs_call_type afs_RXFSReleaseLock = {
+ .name = "FS.ReleaseLock",
+ .op = afs_FS_ReleaseLock,
+ .deliver = afs_deliver_fs_xxxx_lock,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Set a lock on a file
+ */
+void afs_fs_set_lock(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSSetLock, 5 * 4, 6 * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSETLOCK);
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+ *bp++ = htonl(op->lock.type);
+
+ trace_afs_make_fs_calli(call, &vp->fid, op->lock.type);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * extend a lock on a file
+ */
+void afs_fs_extend_lock(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSExtendLock, 4 * 4, 6 * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSEXTENDLOCK);
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * release a lock on a file
+ */
+void afs_fs_release_lock(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSRELEASELOCK);
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to an FS.GiveUpAllCallBacks operation.
+ */
+static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call)
+{
+ return afs_transfer_reply(call);
+}
+
+/*
+ * FS.GiveUpAllCallBacks operation type
+ */
+static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
+ .name = "FS.GiveUpAllCallBacks",
+ .op = afs_FS_GiveUpAllCallBacks,
+ .deliver = afs_deliver_fs_give_up_all_callbacks,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Flush all the callbacks we have on a server.
+ */
+int afs_fs_give_up_all_callbacks(struct afs_net *net,
+ struct afs_server *server,
+ struct afs_addr_cursor *ac,
+ struct key *key)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_RXFSGiveUpAllCallBacks, 1 * 4, 0);
+ if (!call)
+ return -ENOMEM;
+
+ call->key = key;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSGIVEUPALLCALLBACKS);
+
+ call->server = afs_use_server(server, afs_server_trace_give_up_cb);
+ afs_make_call(ac, call, GFP_NOFS);
+ return afs_wait_for_call_to_complete(call, ac);
+}
+
+/*
+ * Deliver reply data to an FS.GetCapabilities operation.
+ */
+static int afs_deliver_fs_get_capabilities(struct afs_call *call)
+{
+ u32 count;
+ int ret;
+
+ _enter("{%u,%zu}", call->unmarshall, iov_iter_count(call->iter));
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* Extract the capabilities word count */
+ case 1:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ count = ntohl(call->tmp);
+ call->count = count;
+ call->count2 = count;
+ if (count == 0) {
+ call->unmarshall = 4;
+ call->tmp = 0;
+ break;
+ }
+
+ /* Extract the first word of the capabilities to call->tmp */
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ case 2:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ afs_extract_discard(call, (count - 1) * sizeof(__be32));
+ call->unmarshall++;
+ fallthrough;
+
+ /* Extract remaining capabilities words */
+ case 3:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ call->unmarshall++;
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_RXFSGetCapabilities = {
+ .name = "FS.GetCapabilities",
+ .op = afs_FS_GetCapabilities,
+ .deliver = afs_deliver_fs_get_capabilities,
+ .done = afs_fileserver_probe_result,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Probe a fileserver for the capabilities that it supports. This RPC can
+ * reply with up to 196 words. The operation is asynchronous and if we managed
+ * to allocate a call, true is returned the result is delivered through the
+ * ->done() - otherwise we return false to indicate we didn't even try.
+ */
+bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
+ struct afs_addr_cursor *ac, struct key *key)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4);
+ if (!call)
+ return false;
+
+ call->key = key;
+ call->server = afs_use_server(server, afs_server_trace_get_caps);
+ call->upgrade = true;
+ call->async = true;
+ call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSGETCAPABILITIES);
+
+ trace_afs_make_fs_call(call, NULL);
+ afs_make_call(ac, call, GFP_NOFS);
+ afs_put_call(call);
+ return true;
+}
+
+/*
+ * Deliver reply data to an FS.InlineBulkStatus call
+ */
+static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_status_cb *scb;
+ const __be32 *bp;
+ u32 tmp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* Extract the file status count and array in two steps */
+ case 1:
+ _debug("extract status count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ tmp = ntohl(call->tmp);
+ _debug("status count: %u/%u", tmp, op->nr_files);
+ if (tmp != op->nr_files)
+ return afs_protocol_error(call, afs_eproto_ibulkst_count);
+
+ call->count = 0;
+ call->unmarshall++;
+ more_counts:
+ afs_extract_to_buf(call, 21 * sizeof(__be32));
+ fallthrough;
+
+ case 2:
+ _debug("extract status array %u", call->count);
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ switch (call->count) {
+ case 0:
+ scb = &op->file[0].scb;
+ break;
+ case 1:
+ scb = &op->file[1].scb;
+ break;
+ default:
+ scb = &op->more_files[call->count - 2].scb;
+ break;
+ }
+
+ bp = call->buffer;
+ xdr_decode_AFSFetchStatus(&bp, call, scb);
+
+ call->count++;
+ if (call->count < op->nr_files)
+ goto more_counts;
+
+ call->count = 0;
+ call->unmarshall++;
+ afs_extract_to_tmp(call);
+ fallthrough;
+
+ /* Extract the callback count and array in two steps */
+ case 3:
+ _debug("extract CB count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ tmp = ntohl(call->tmp);
+ _debug("CB count: %u", tmp);
+ if (tmp != op->nr_files)
+ return afs_protocol_error(call, afs_eproto_ibulkst_cb_count);
+ call->count = 0;
+ call->unmarshall++;
+ more_cbs:
+ afs_extract_to_buf(call, 3 * sizeof(__be32));
+ fallthrough;
+
+ case 4:
+ _debug("extract CB array");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ _debug("unmarshall CB array");
+ switch (call->count) {
+ case 0:
+ scb = &op->file[0].scb;
+ break;
+ case 1:
+ scb = &op->file[1].scb;
+ break;
+ default:
+ scb = &op->more_files[call->count - 2].scb;
+ break;
+ }
+
+ bp = call->buffer;
+ xdr_decode_AFSCallBack(&bp, call, scb);
+ call->count++;
+ if (call->count < op->nr_files)
+ goto more_cbs;
+
+ afs_extract_to_buf(call, 6 * sizeof(__be32));
+ call->unmarshall++;
+ fallthrough;
+
+ case 5:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 6:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+static void afs_done_fs_inline_bulk_status(struct afs_call *call)
+{
+ if (call->error == -ECONNABORTED &&
+ call->abort_code == RX_INVALID_OPERATION) {
+ set_bit(AFS_SERVER_FL_NO_IBULK, &call->server->flags);
+ if (call->op)
+ set_bit(AFS_VOLUME_MAYBE_NO_IBULK, &call->op->volume->flags);
+ }
+}
+
+/*
+ * FS.InlineBulkStatus operation type
+ */
+static const struct afs_call_type afs_RXFSInlineBulkStatus = {
+ .name = "FS.InlineBulkStatus",
+ .op = afs_FS_InlineBulkStatus,
+ .deliver = afs_deliver_fs_inline_bulk_status,
+ .done = afs_done_fs_inline_bulk_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for up to 50 files
+ */
+void afs_fs_inline_bulk_status(struct afs_operation *op)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ struct afs_call *call;
+ __be32 *bp;
+ int i;
+
+ if (test_bit(AFS_SERVER_FL_NO_IBULK, &op->server->flags)) {
+ op->error = -ENOTSUPP;
+ return;
+ }
+
+ _enter(",%x,{%llx:%llu},%u",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode, op->nr_files);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSInlineBulkStatus,
+ (2 + op->nr_files * 3) * 4,
+ 21 * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSINLINEBULKSTATUS);
+ *bp++ = htonl(op->nr_files);
+ *bp++ = htonl(dvp->fid.vid);
+ *bp++ = htonl(dvp->fid.vnode);
+ *bp++ = htonl(dvp->fid.unique);
+ *bp++ = htonl(vp->fid.vid);
+ *bp++ = htonl(vp->fid.vnode);
+ *bp++ = htonl(vp->fid.unique);
+ for (i = 0; i < op->nr_files - 2; i++) {
+ *bp++ = htonl(op->more_files[i].fid.vid);
+ *bp++ = htonl(op->more_files[i].fid.vnode);
+ *bp++ = htonl(op->more_files[i].fid.unique);
+ }
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * deliver reply data to an FS.FetchACL
+ */
+static int afs_deliver_fs_fetch_acl(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_acl *acl;
+ const __be32 *bp;
+ unsigned int size;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the returned data length */
+ case 1:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ size = call->count2 = ntohl(call->tmp);
+ size = round_up(size, 4);
+
+ acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+ if (!acl)
+ return -ENOMEM;
+ op->acl = acl;
+ acl->size = call->count2;
+ afs_extract_begin(call, acl->data, size);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the returned data */
+ case 2:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ afs_extract_to_buf(call, (21 + 6) * 4);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the metadata */
+ case 3:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 4:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.FetchACL operation type
+ */
+static const struct afs_call_type afs_RXFSFetchACL = {
+ .name = "FS.FetchACL",
+ .op = afs_FS_FetchACL,
+ .deliver = afs_deliver_fs_fetch_acl,
+};
+
+/*
+ * Fetch the ACL for a file.
+ */
+void afs_fs_fetch_acl(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSFetchACL, 16, (21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp[0] = htonl(FSFETCHACL);
+ bp[1] = htonl(vp->fid.vid);
+ bp[2] = htonl(vp->fid.vnode);
+ bp[3] = htonl(vp->fid.unique);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_KERNEL);
+}
+
+/*
+ * FS.StoreACL operation type
+ */
+static const struct afs_call_type afs_RXFSStoreACL = {
+ .name = "FS.StoreACL",
+ .op = afs_FS_StoreACL,
+ .deliver = afs_deliver_fs_file_status_and_vol,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the ACL for a file.
+ */
+void afs_fs_store_acl(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ const struct afs_acl *acl = op->acl;
+ size_t size;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ size = round_up(acl->size, 4);
+ call = afs_alloc_flat_call(op->net, &afs_RXFSStoreACL,
+ 5 * 4 + size, (21 + 6) * 4);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp[0] = htonl(FSSTOREACL);
+ bp[1] = htonl(vp->fid.vid);
+ bp[2] = htonl(vp->fid.vnode);
+ bp[3] = htonl(vp->fid.unique);
+ bp[4] = htonl(acl->size);
+ memcpy(&bp[5], acl->data, acl->size);
+ if (acl->size != size)
+ memset((void *)&bp[5] + acl->size, 0, size - acl->size);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_KERNEL);
+}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
new file mode 100644
index 000000000..5921dd368
--- /dev/null
+++ b/fs/afs/inode.c
@@ -0,0 +1,972 @@
+/*
+ * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ * David Howells <dhowells@redhat.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+
+static const struct inode_operations afs_symlink_inode_operations = {
+ .get_link = page_get_link,
+};
+
+static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
+{
+ static unsigned long once_only;
+
+ pr_warn("kAFS: AFS vnode with undefined type %u\n", vnode->status.type);
+ pr_warn("kAFS: A=%d m=%o s=%llx v=%llx\n",
+ vnode->status.abort_code,
+ vnode->status.mode,
+ vnode->status.size,
+ vnode->status.data_version);
+ pr_warn("kAFS: vnode %llx:%llx:%x\n",
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique);
+ if (parent_vnode)
+ pr_warn("kAFS: dir %llx:%llx:%x\n",
+ parent_vnode->fid.vid,
+ parent_vnode->fid.vnode,
+ parent_vnode->fid.unique);
+
+ if (!test_and_set_bit(0, &once_only))
+ dump_stack();
+}
+
+/*
+ * Set parameters for the netfs library
+ */
+static void afs_set_netfs_context(struct afs_vnode *vnode)
+{
+ netfs_inode_init(&vnode->netfs, &afs_req_ops);
+}
+
+/*
+ * Initialise an inode from the vnode status.
+ */
+static int afs_inode_init_from_status(struct afs_operation *op,
+ struct afs_vnode_param *vp,
+ struct afs_vnode *vnode)
+{
+ struct afs_file_status *status = &vp->scb.status;
+ struct inode *inode = AFS_VNODE_TO_I(vnode);
+ struct timespec64 t;
+
+ _enter("{%llx:%llu.%u} %s",
+ vp->fid.vid, vp->fid.vnode, vp->fid.unique,
+ op->type ? op->type->name : "???");
+
+ _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
+ status->type,
+ status->nlink,
+ (unsigned long long) status->size,
+ status->data_version,
+ status->mode);
+
+ write_seqlock(&vnode->cb_lock);
+
+ vnode->cb_v_break = op->cb_v_break;
+ vnode->cb_s_break = op->cb_s_break;
+ vnode->status = *status;
+
+ t = status->mtime_client;
+ inode->i_ctime = t;
+ inode->i_mtime = t;
+ inode->i_atime = t;
+ inode->i_flags |= S_NOATIME;
+ inode->i_uid = make_kuid(&init_user_ns, status->owner);
+ inode->i_gid = make_kgid(&init_user_ns, status->group);
+ set_nlink(&vnode->netfs.inode, status->nlink);
+
+ switch (status->type) {
+ case AFS_FTYPE_FILE:
+ inode->i_mode = S_IFREG | (status->mode & S_IALLUGO);
+ inode->i_op = &afs_file_inode_operations;
+ inode->i_fop = &afs_file_operations;
+ inode->i_mapping->a_ops = &afs_file_aops;
+ mapping_set_large_folios(inode->i_mapping);
+ break;
+ case AFS_FTYPE_DIR:
+ inode->i_mode = S_IFDIR | (status->mode & S_IALLUGO);
+ inode->i_op = &afs_dir_inode_operations;
+ inode->i_fop = &afs_dir_file_operations;
+ inode->i_mapping->a_ops = &afs_dir_aops;
+ mapping_set_large_folios(inode->i_mapping);
+ break;
+ case AFS_FTYPE_SYMLINK:
+ /* Symlinks with a mode of 0644 are actually mountpoints. */
+ if ((status->mode & 0777) == 0644) {
+ inode->i_flags |= S_AUTOMOUNT;
+
+ set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+
+ inode->i_mode = S_IFDIR | 0555;
+ inode->i_op = &afs_mntpt_inode_operations;
+ inode->i_fop = &afs_mntpt_file_operations;
+ inode->i_mapping->a_ops = &afs_symlink_aops;
+ } else {
+ inode->i_mode = S_IFLNK | status->mode;
+ inode->i_op = &afs_symlink_inode_operations;
+ inode->i_mapping->a_ops = &afs_symlink_aops;
+ }
+ inode_nohighmem(inode);
+ break;
+ default:
+ dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL);
+ write_sequnlock(&vnode->cb_lock);
+ return afs_protocol_error(NULL, afs_eproto_file_type);
+ }
+
+ afs_set_i_size(vnode, status->size);
+ afs_set_netfs_context(vnode);
+
+ vnode->invalid_before = status->data_version;
+ inode_set_iversion_raw(&vnode->netfs.inode, status->data_version);
+
+ if (!vp->scb.have_cb) {
+ /* it's a symlink we just created (the fileserver
+ * didn't give us a callback) */
+ vnode->cb_expires_at = ktime_get_real_seconds();
+ } else {
+ vnode->cb_expires_at = vp->scb.callback.expires_at;
+ vnode->cb_server = op->server;
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
+
+ write_sequnlock(&vnode->cb_lock);
+ return 0;
+}
+
+/*
+ * Update the core inode struct from a returned status record.
+ */
+static void afs_apply_status(struct afs_operation *op,
+ struct afs_vnode_param *vp)
+{
+ struct afs_file_status *status = &vp->scb.status;
+ struct afs_vnode *vnode = vp->vnode;
+ struct inode *inode = &vnode->netfs.inode;
+ struct timespec64 t;
+ umode_t mode;
+ bool data_changed = false;
+ bool change_size = vp->set_size;
+
+ _enter("{%llx:%llu.%u} %s",
+ vp->fid.vid, vp->fid.vnode, vp->fid.unique,
+ op->type ? op->type->name : "???");
+
+ BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags));
+
+ if (status->type != vnode->status.type) {
+ pr_warn("Vnode %llx:%llx:%x changed type %u to %u\n",
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ status->type, vnode->status.type);
+ afs_protocol_error(NULL, afs_eproto_bad_status);
+ return;
+ }
+
+ if (status->nlink != vnode->status.nlink)
+ set_nlink(inode, status->nlink);
+
+ if (status->owner != vnode->status.owner)
+ inode->i_uid = make_kuid(&init_user_ns, status->owner);
+
+ if (status->group != vnode->status.group)
+ inode->i_gid = make_kgid(&init_user_ns, status->group);
+
+ if (status->mode != vnode->status.mode) {
+ mode = inode->i_mode;
+ mode &= ~S_IALLUGO;
+ mode |= status->mode & S_IALLUGO;
+ WRITE_ONCE(inode->i_mode, mode);
+ }
+
+ t = status->mtime_client;
+ inode->i_mtime = t;
+ if (vp->update_ctime)
+ inode->i_ctime = op->ctime;
+
+ if (vnode->status.data_version != status->data_version)
+ data_changed = true;
+
+ vnode->status = *status;
+
+ if (vp->dv_before + vp->dv_delta != status->data_version) {
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n",
+ vnode->fid.vid, vnode->fid.vnode,
+ (unsigned long long)vp->dv_before + vp->dv_delta,
+ (unsigned long long)status->data_version,
+ op->type ? op->type->name : "???",
+ op->debug_id);
+
+ vnode->invalid_before = status->data_version;
+ if (vnode->status.type == AFS_FTYPE_DIR) {
+ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ afs_stat_v(vnode, n_inval);
+ } else {
+ set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+ }
+ change_size = true;
+ data_changed = true;
+ } else if (vnode->status.type == AFS_FTYPE_DIR) {
+ /* Expected directory change is handled elsewhere so
+ * that we can locally edit the directory and save on a
+ * download.
+ */
+ if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ data_changed = false;
+ change_size = true;
+ }
+
+ if (data_changed) {
+ inode_set_iversion_raw(inode, status->data_version);
+
+ /* Only update the size if the data version jumped. If the
+ * file is being modified locally, then we might have our own
+ * idea of what the size should be that's not the same as
+ * what's on the server.
+ */
+ vnode->netfs.remote_i_size = status->size;
+ if (change_size) {
+ afs_set_i_size(vnode, status->size);
+ inode->i_ctime = t;
+ inode->i_atime = t;
+ }
+ }
+}
+
+/*
+ * Apply a callback to a vnode.
+ */
+static void afs_apply_callback(struct afs_operation *op,
+ struct afs_vnode_param *vp)
+{
+ struct afs_callback *cb = &vp->scb.callback;
+ struct afs_vnode *vnode = vp->vnode;
+
+ if (!afs_cb_is_broken(vp->cb_break_before, vnode)) {
+ vnode->cb_expires_at = cb->expires_at;
+ vnode->cb_server = op->server;
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
+}
+
+/*
+ * Apply the received status and callback to an inode all in the same critical
+ * section to avoid races with afs_validate().
+ */
+void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *vp)
+{
+ struct afs_vnode *vnode = vp->vnode;
+
+ _enter("");
+
+ write_seqlock(&vnode->cb_lock);
+
+ if (vp->scb.have_error) {
+ /* A YFS server will return this from RemoveFile2 and AFS and
+ * YFS will return this from InlineBulkStatus.
+ */
+ if (vp->scb.status.abort_code == VNOVNODE) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ clear_nlink(&vnode->netfs.inode);
+ __afs_break_callback(vnode, afs_cb_break_for_deleted);
+ op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
+ }
+ } else if (vp->scb.have_status) {
+ if (vp->speculative &&
+ (test_bit(AFS_VNODE_MODIFYING, &vnode->flags) ||
+ vp->dv_before != vnode->status.data_version))
+ /* Ignore the result of a speculative bulk status fetch
+ * if it splits around a modification op, thereby
+ * appearing to regress the data version.
+ */
+ goto out;
+ afs_apply_status(op, vp);
+ if (vp->scb.have_cb)
+ afs_apply_callback(op, vp);
+ } else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ drop_nlink(&vnode->netfs.inode);
+ if (vnode->netfs.inode.i_nlink == 0) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ __afs_break_callback(vnode, afs_cb_break_for_deleted);
+ }
+ }
+
+out:
+ write_sequnlock(&vnode->cb_lock);
+
+ if (vp->scb.have_status)
+ afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
+}
+
+static void afs_fetch_status_success(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+ struct afs_vnode *vnode = vp->vnode;
+ int ret;
+
+ if (vnode->netfs.inode.i_state & I_NEW) {
+ ret = afs_inode_init_from_status(op, vp, vnode);
+ op->error = ret;
+ if (ret == 0)
+ afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
+ } else {
+ afs_vnode_commit_status(op, vp);
+ }
+}
+
+const struct afs_operation_ops afs_fetch_status_operation = {
+ .issue_afs_rpc = afs_fs_fetch_status,
+ .issue_yfs_rpc = yfs_fs_fetch_status,
+ .success = afs_fetch_status_success,
+ .aborted = afs_check_for_remote_deletion,
+};
+
+/*
+ * Fetch file status from the volume.
+ */
+int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new,
+ afs_access_t *_caller_access)
+{
+ struct afs_operation *op;
+
+ _enter("%s,{%llx:%llu.%u,S=%lx}",
+ vnode->volume->name,
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+ vnode->flags);
+
+ op = afs_alloc_operation(key, vnode->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ afs_op_set_vnode(op, 0, vnode);
+
+ op->nr_files = 1;
+ op->ops = &afs_fetch_status_operation;
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+
+ if (_caller_access)
+ *_caller_access = op->file[0].scb.status.caller_access;
+ return afs_put_operation(op);
+}
+
+/*
+ * ilookup() comparator
+ */
+int afs_ilookup5_test_by_fid(struct inode *inode, void *opaque)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_fid *fid = opaque;
+
+ return (fid->vnode == vnode->fid.vnode &&
+ fid->vnode_hi == vnode->fid.vnode_hi &&
+ fid->unique == vnode->fid.unique);
+}
+
+/*
+ * iget5() comparator
+ */
+static int afs_iget5_test(struct inode *inode, void *opaque)
+{
+ struct afs_vnode_param *vp = opaque;
+ //struct afs_vnode *vnode = AFS_FS_I(inode);
+
+ return afs_ilookup5_test_by_fid(inode, &vp->fid);
+}
+
+/*
+ * iget5() inode initialiser
+ */
+static int afs_iget5_set(struct inode *inode, void *opaque)
+{
+ struct afs_vnode_param *vp = opaque;
+ struct afs_super_info *as = AFS_FS_S(inode->i_sb);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+
+ vnode->volume = as->volume;
+ vnode->fid = vp->fid;
+
+ /* YFS supports 96-bit vnode IDs, but Linux only supports
+ * 64-bit inode numbers.
+ */
+ inode->i_ino = vnode->fid.vnode;
+ inode->i_generation = vnode->fid.unique;
+ return 0;
+}
+
+/*
+ * Get a cache cookie for an inode.
+ */
+static void afs_get_inode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+ struct {
+ __be32 vnode_id;
+ __be32 unique;
+ __be32 vnode_id_ext[2]; /* Allow for a 96-bit key */
+ } __packed key;
+ struct afs_vnode_cache_aux aux;
+
+ if (vnode->status.type != AFS_FTYPE_FILE) {
+ vnode->netfs.cache = NULL;
+ return;
+ }
+
+ key.vnode_id = htonl(vnode->fid.vnode);
+ key.unique = htonl(vnode->fid.unique);
+ key.vnode_id_ext[0] = htonl(vnode->fid.vnode >> 32);
+ key.vnode_id_ext[1] = htonl(vnode->fid.vnode_hi);
+ afs_set_cache_aux(vnode, &aux);
+
+ afs_vnode_set_cache(vnode,
+ fscache_acquire_cookie(
+ vnode->volume->cache,
+ vnode->status.type == AFS_FTYPE_FILE ?
+ 0 : FSCACHE_ADV_SINGLE_CHUNK,
+ &key, sizeof(key),
+ &aux, sizeof(aux),
+ i_size_read(&vnode->netfs.inode)));
+#endif
+}
+
+/*
+ * inode retrieval
+ */
+struct inode *afs_iget(struct afs_operation *op, struct afs_vnode_param *vp)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct super_block *sb = dvp->vnode->netfs.inode.i_sb;
+ struct afs_vnode *vnode;
+ struct inode *inode;
+ int ret;
+
+ _enter(",{%llx:%llu.%u},,", vp->fid.vid, vp->fid.vnode, vp->fid.unique);
+
+ inode = iget5_locked(sb, vp->fid.vnode, afs_iget5_test, afs_iget5_set, vp);
+ if (!inode) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ vnode = AFS_FS_I(inode);
+
+ _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }",
+ inode, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+
+ /* deal with an existing inode */
+ if (!(inode->i_state & I_NEW)) {
+ _leave(" = %p", inode);
+ return inode;
+ }
+
+ ret = afs_inode_init_from_status(op, vp, vnode);
+ if (ret < 0)
+ goto bad_inode;
+
+ afs_get_inode_cache(vnode);
+
+ /* success */
+ clear_bit(AFS_VNODE_UNSET, &vnode->flags);
+ unlock_new_inode(inode);
+ _leave(" = %p", inode);
+ return inode;
+
+ /* failure */
+bad_inode:
+ iget_failed(inode);
+ _leave(" = %d [bad]", ret);
+ return ERR_PTR(ret);
+}
+
+static int afs_iget5_set_root(struct inode *inode, void *opaque)
+{
+ struct afs_super_info *as = AFS_FS_S(inode->i_sb);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+
+ vnode->volume = as->volume;
+ vnode->fid.vid = as->volume->vid,
+ vnode->fid.vnode = 1;
+ vnode->fid.unique = 1;
+ inode->i_ino = 1;
+ inode->i_generation = 1;
+ return 0;
+}
+
+/*
+ * Set up the root inode for a volume. This is always vnode 1, unique 1 within
+ * the volume.
+ */
+struct inode *afs_root_iget(struct super_block *sb, struct key *key)
+{
+ struct afs_super_info *as = AFS_FS_S(sb);
+ struct afs_operation *op;
+ struct afs_vnode *vnode;
+ struct inode *inode;
+ int ret;
+
+ _enter(",{%llx},,", as->volume->vid);
+
+ inode = iget5_locked(sb, 1, NULL, afs_iget5_set_root, NULL);
+ if (!inode) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ _debug("GOT ROOT INODE %p { vl=%llx }", inode, as->volume->vid);
+
+ BUG_ON(!(inode->i_state & I_NEW));
+
+ vnode = AFS_FS_I(inode);
+ vnode->cb_v_break = as->volume->cb_v_break,
+ afs_set_netfs_context(vnode);
+
+ op = afs_alloc_operation(key, as->volume);
+ if (IS_ERR(op)) {
+ ret = PTR_ERR(op);
+ goto error;
+ }
+
+ afs_op_set_vnode(op, 0, vnode);
+
+ op->nr_files = 1;
+ op->ops = &afs_fetch_status_operation;
+ ret = afs_do_sync_operation(op);
+ if (ret < 0)
+ goto error;
+
+ afs_get_inode_cache(vnode);
+
+ clear_bit(AFS_VNODE_UNSET, &vnode->flags);
+ unlock_new_inode(inode);
+ _leave(" = %p", inode);
+ return inode;
+
+error:
+ iget_failed(inode);
+ _leave(" = %d [bad]", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * mark the data attached to an inode as obsolete due to a write on the server
+ * - might also want to ditch all the outstanding writes and dirty pages
+ */
+static void afs_zap_data(struct afs_vnode *vnode)
+{
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+ afs_invalidate_cache(vnode, 0);
+
+ /* nuke all the non-dirty pages that aren't locked, mapped or being
+ * written back in a regular file and completely discard the pages in a
+ * directory or symlink */
+ if (S_ISREG(vnode->netfs.inode.i_mode))
+ invalidate_remote_inode(&vnode->netfs.inode);
+ else
+ invalidate_inode_pages2(vnode->netfs.inode.i_mapping);
+}
+
+/*
+ * Check to see if we have a server currently serving this volume and that it
+ * hasn't been reinitialised or dropped from the list.
+ */
+static bool afs_check_server_good(struct afs_vnode *vnode)
+{
+ struct afs_server_list *slist;
+ struct afs_server *server;
+ bool good;
+ int i;
+
+ if (vnode->cb_fs_s_break == atomic_read(&vnode->volume->cell->fs_s_break))
+ return true;
+
+ rcu_read_lock();
+
+ slist = rcu_dereference(vnode->volume->servers);
+ for (i = 0; i < slist->nr_servers; i++) {
+ server = slist->servers[i].server;
+ if (server == vnode->cb_server) {
+ good = (vnode->cb_s_break == server->cb_s_break);
+ rcu_read_unlock();
+ return good;
+ }
+ }
+
+ rcu_read_unlock();
+ return false;
+}
+
+/*
+ * Check the validity of a vnode/inode.
+ */
+bool afs_check_validity(struct afs_vnode *vnode)
+{
+ enum afs_cb_break_reason need_clear = afs_cb_break_no_break;
+ time64_t now = ktime_get_real_seconds();
+ unsigned int cb_break;
+ int seq = 0;
+
+ do {
+ read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+ cb_break = vnode->cb_break;
+
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ if (vnode->cb_v_break != vnode->volume->cb_v_break)
+ need_clear = afs_cb_break_for_v_break;
+ else if (!afs_check_server_good(vnode))
+ need_clear = afs_cb_break_for_s_reinit;
+ else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
+ need_clear = afs_cb_break_for_zap;
+ else if (vnode->cb_expires_at - 10 <= now)
+ need_clear = afs_cb_break_for_lapsed;
+ } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ ;
+ } else {
+ need_clear = afs_cb_break_no_promise;
+ }
+
+ } while (need_seqretry(&vnode->cb_lock, seq));
+
+ done_seqretry(&vnode->cb_lock, seq);
+
+ if (need_clear == afs_cb_break_no_break)
+ return true;
+
+ write_seqlock(&vnode->cb_lock);
+ if (need_clear == afs_cb_break_no_promise)
+ vnode->cb_v_break = vnode->volume->cb_v_break;
+ else if (cb_break == vnode->cb_break)
+ __afs_break_callback(vnode, need_clear);
+ else
+ trace_afs_cb_miss(&vnode->fid, need_clear);
+ write_sequnlock(&vnode->cb_lock);
+ return false;
+}
+
+/*
+ * validate a vnode/inode
+ * - there are several things we need to check
+ * - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
+ * symlink)
+ * - parent dir metadata changed (security changes)
+ * - dentry data changed (write, truncate)
+ * - dentry metadata changed (security changes)
+ */
+int afs_validate(struct afs_vnode *vnode, struct key *key)
+{
+ int ret;
+
+ _enter("{v={%llx:%llu} fl=%lx},%x",
+ vnode->fid.vid, vnode->fid.vnode, vnode->flags,
+ key_serial(key));
+
+ if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) {
+ if (vnode->netfs.inode.i_nlink)
+ clear_nlink(&vnode->netfs.inode);
+ goto valid;
+ }
+
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) &&
+ afs_check_validity(vnode))
+ goto valid;
+
+ down_write(&vnode->validate_lock);
+
+ /* if the promise has expired, we need to check the server again to get
+ * a new promise - note that if the (parent) directory's metadata was
+ * changed then the security may be different and we may no longer have
+ * access */
+ if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ _debug("not promised");
+ ret = afs_fetch_status(vnode, key, false, NULL);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ ret = -ESTALE;
+ }
+ goto error_unlock;
+ }
+ _debug("new promise [fl=%lx]", vnode->flags);
+ }
+
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ _debug("file already deleted");
+ ret = -ESTALE;
+ goto error_unlock;
+ }
+
+ /* if the vnode's data version number changed then its contents are
+ * different */
+ if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
+ afs_zap_data(vnode);
+ up_write(&vnode->validate_lock);
+valid:
+ _leave(" = 0");
+ return 0;
+
+error_unlock:
+ up_write(&vnode->validate_lock);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * read the attributes of an inode
+ */
+int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct key *key;
+ int ret, seq = 0;
+
+ _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
+
+ if (vnode->volume &&
+ !(query_flags & AT_STATX_DONT_SYNC) &&
+ !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ key = afs_request_key(vnode->volume->cell);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+ ret = afs_validate(vnode, key);
+ key_put(key);
+ if (ret < 0)
+ return ret;
+ }
+
+ do {
+ read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+ generic_fillattr(&init_user_ns, inode, stat);
+ if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
+ stat->nlink > 0)
+ stat->nlink -= 1;
+
+ /* Lie about the size of directories. We maintain a locally
+ * edited copy and may make different allocation decisions on
+ * it, but we need to give userspace the server's size.
+ */
+ if (S_ISDIR(inode->i_mode))
+ stat->size = vnode->netfs.remote_i_size;
+ } while (need_seqretry(&vnode->cb_lock, seq));
+
+ done_seqretry(&vnode->cb_lock, seq);
+ return 0;
+}
+
+/*
+ * discard an AFS inode
+ */
+int afs_drop_inode(struct inode *inode)
+{
+ _enter("");
+
+ if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags))
+ return generic_delete_inode(inode);
+ else
+ return generic_drop_inode(inode);
+}
+
+/*
+ * clear an AFS inode
+ */
+void afs_evict_inode(struct inode *inode)
+{
+ struct afs_vnode_cache_aux aux;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+
+ _enter("{%llx:%llu.%d}",
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique);
+
+ _debug("CLEAR INODE %p", inode);
+
+ ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
+
+ truncate_inode_pages_final(&inode->i_data);
+
+ afs_set_cache_aux(vnode, &aux);
+ fscache_clear_inode_writeback(afs_vnode_cache(vnode), inode, &aux);
+ clear_inode(inode);
+
+ while (!list_empty(&vnode->wb_keys)) {
+ struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
+ struct afs_wb_key, vnode_link);
+ list_del(&wbk->vnode_link);
+ afs_put_wb_key(wbk);
+ }
+
+ fscache_relinquish_cookie(afs_vnode_cache(vnode),
+ test_bit(AFS_VNODE_DELETED, &vnode->flags));
+
+ afs_prune_wb_keys(vnode);
+ afs_put_permits(rcu_access_pointer(vnode->permit_cache));
+ key_put(vnode->silly_key);
+ vnode->silly_key = NULL;
+ key_put(vnode->lock_key);
+ vnode->lock_key = NULL;
+ _leave("");
+}
+
+static void afs_setattr_success(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct inode *inode = &vp->vnode->netfs.inode;
+ loff_t old_i_size = i_size_read(inode);
+
+ op->setattr.old_i_size = old_i_size;
+ afs_vnode_commit_status(op, vp);
+ /* inode->i_size has now been changed. */
+
+ if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+ loff_t size = op->setattr.attr->ia_size;
+ if (size > old_i_size)
+ pagecache_isize_extended(inode, old_i_size, size);
+ }
+}
+
+static void afs_setattr_edit_file(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct inode *inode = &vp->vnode->netfs.inode;
+
+ if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+ loff_t size = op->setattr.attr->ia_size;
+ loff_t i_size = op->setattr.old_i_size;
+
+ if (size < i_size)
+ truncate_pagecache(inode, size);
+ if (size != i_size)
+ fscache_resize_cookie(afs_vnode_cache(vp->vnode),
+ vp->scb.status.size);
+ }
+}
+
+static const struct afs_operation_ops afs_setattr_operation = {
+ .issue_afs_rpc = afs_fs_setattr,
+ .issue_yfs_rpc = yfs_fs_setattr,
+ .success = afs_setattr_success,
+ .edit_dir = afs_setattr_edit_file,
+};
+
+/*
+ * set the attributes of an inode
+ */
+int afs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
+{
+ const unsigned int supported =
+ ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
+ ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET | ATTR_TOUCH;
+ struct afs_operation *op;
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+ struct inode *inode = &vnode->netfs.inode;
+ loff_t i_size;
+ int ret;
+
+ _enter("{%llx:%llu},{n=%pd},%x",
+ vnode->fid.vid, vnode->fid.vnode, dentry,
+ attr->ia_valid);
+
+ if (!(attr->ia_valid & supported)) {
+ _leave(" = 0 [unsupported]");
+ return 0;
+ }
+
+ i_size = i_size_read(inode);
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (!S_ISREG(inode->i_mode))
+ return -EISDIR;
+
+ ret = inode_newsize_ok(inode, attr->ia_size);
+ if (ret)
+ return ret;
+
+ if (attr->ia_size == i_size)
+ attr->ia_valid &= ~ATTR_SIZE;
+ }
+
+ fscache_use_cookie(afs_vnode_cache(vnode), true);
+
+ /* Prevent any new writebacks from starting whilst we do this. */
+ down_write(&vnode->validate_lock);
+
+ if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode)) {
+ loff_t size = attr->ia_size;
+
+ /* Wait for any outstanding writes to the server to complete */
+ loff_t from = min(size, i_size);
+ loff_t to = max(size, i_size);
+ ret = filemap_fdatawait_range(inode->i_mapping, from, to);
+ if (ret < 0)
+ goto out_unlock;
+
+ /* Don't talk to the server if we're just shortening in-memory
+ * writes that haven't gone to the server yet.
+ */
+ if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) &&
+ attr->ia_size < i_size &&
+ attr->ia_size > vnode->status.size) {
+ truncate_pagecache(inode, attr->ia_size);
+ fscache_resize_cookie(afs_vnode_cache(vnode),
+ attr->ia_size);
+ i_size_write(inode, attr->ia_size);
+ ret = 0;
+ goto out_unlock;
+ }
+ }
+
+ op = afs_alloc_operation(((attr->ia_valid & ATTR_FILE) ?
+ afs_file_key(attr->ia_file) : NULL),
+ vnode->volume);
+ if (IS_ERR(op)) {
+ ret = PTR_ERR(op);
+ goto out_unlock;
+ }
+
+ afs_op_set_vnode(op, 0, vnode);
+ op->setattr.attr = attr;
+
+ if (attr->ia_valid & ATTR_SIZE) {
+ op->file[0].dv_delta = 1;
+ op->file[0].set_size = true;
+ }
+ op->ctime = attr->ia_ctime;
+ op->file[0].update_ctime = 1;
+ op->file[0].modification = true;
+
+ op->ops = &afs_setattr_operation;
+ ret = afs_do_sync_operation(op);
+
+out_unlock:
+ up_write(&vnode->validate_lock);
+ fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL);
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
new file mode 100644
index 000000000..a25fdc3e5
--- /dev/null
+++ b/fs/afs/internal.h
@@ -0,0 +1,1785 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* internal AFS stuff
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/rxrpc.h>
+#include <linux/key.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/fscache.h>
+#include <linux/backing-dev.h>
+#include <linux/uuid.h>
+#include <linux/mm_types.h>
+#include <linux/dns_resolver.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+
+#include "afs.h"
+#include "afs_vl.h"
+
+#define AFS_CELL_MAX_ADDRS 15
+
+struct pagevec;
+struct afs_call;
+struct afs_vnode;
+
+/*
+ * Partial file-locking emulation mode. (The problem being that AFS3 only
+ * allows whole-file locks and no upgrading/downgrading).
+ */
+enum afs_flock_mode {
+ afs_flock_mode_unset,
+ afs_flock_mode_local, /* Local locking only */
+ afs_flock_mode_openafs, /* Don't get server lock for a partial lock */
+ afs_flock_mode_strict, /* Always get a server lock for a partial lock */
+ afs_flock_mode_write, /* Get an exclusive server lock for a partial lock */
+};
+
+struct afs_fs_context {
+ bool force; /* T to force cell type */
+ bool autocell; /* T if set auto mount operation */
+ bool dyn_root; /* T if dynamic root */
+ bool no_cell; /* T if the source is "none" (for dynroot) */
+ enum afs_flock_mode flock_mode; /* Partial file-locking emulation mode */
+ afs_voltype_t type; /* type of volume requested */
+ unsigned int volnamesz; /* size of volume name */
+ const char *volname; /* name of volume to mount */
+ struct afs_net *net; /* the AFS net namespace stuff */
+ struct afs_cell *cell; /* cell in which to find volume */
+ struct afs_volume *volume; /* volume record */
+ struct key *key; /* key to use for secure mounting */
+};
+
+enum afs_call_state {
+ AFS_CALL_CL_REQUESTING, /* Client: Request is being sent */
+ AFS_CALL_CL_AWAIT_REPLY, /* Client: Awaiting reply */
+ AFS_CALL_CL_PROC_REPLY, /* Client: rxrpc call complete; processing reply */
+ AFS_CALL_SV_AWAIT_OP_ID, /* Server: Awaiting op ID */
+ AFS_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */
+ AFS_CALL_SV_REPLYING, /* Server: Replying */
+ AFS_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */
+ AFS_CALL_COMPLETE, /* Completed or failed */
+};
+
+/*
+ * List of server addresses.
+ */
+struct afs_addr_list {
+ struct rcu_head rcu;
+ refcount_t usage;
+ u32 version; /* Version */
+ unsigned char max_addrs;
+ unsigned char nr_addrs;
+ unsigned char preferred; /* Preferred address */
+ unsigned char nr_ipv4; /* Number of IPv4 addresses */
+ enum dns_record_source source:8;
+ enum dns_lookup_status status:8;
+ unsigned long failed; /* Mask of addrs that failed locally/ICMP */
+ unsigned long responded; /* Mask of addrs that responded */
+ struct sockaddr_rxrpc addrs[];
+#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
+};
+
+/*
+ * a record of an in-progress RxRPC call
+ */
+struct afs_call {
+ const struct afs_call_type *type; /* type of call */
+ struct afs_addr_list *alist; /* Address is alist[addr_ix] */
+ wait_queue_head_t waitq; /* processes awaiting completion */
+ struct work_struct async_work; /* async I/O processor */
+ struct work_struct work; /* actual work processor */
+ struct rxrpc_call *rxcall; /* RxRPC call handle */
+ struct key *key; /* security for this call */
+ struct afs_net *net; /* The network namespace */
+ struct afs_server *server; /* The fileserver record if fs op (pins ref) */
+ struct afs_vlserver *vlserver; /* The vlserver record if vl op */
+ void *request; /* request data (first part) */
+ size_t iov_len; /* Size of *iter to be used */
+ struct iov_iter def_iter; /* Default buffer/data iterator */
+ struct iov_iter *write_iter; /* Iterator defining write to be made */
+ struct iov_iter *iter; /* Iterator currently in use */
+ union { /* Convenience for ->def_iter */
+ struct kvec kvec[1];
+ struct bio_vec bvec[1];
+ };
+ void *buffer; /* reply receive buffer */
+ union {
+ long ret0; /* Value to reply with instead of 0 */
+ struct afs_addr_list *ret_alist;
+ struct afs_vldb_entry *ret_vldb;
+ char *ret_str;
+ };
+ struct afs_operation *op;
+ unsigned int server_index;
+ refcount_t ref;
+ enum afs_call_state state;
+ spinlock_t state_lock;
+ int error; /* error code */
+ u32 abort_code; /* Remote abort ID or 0 */
+ unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */
+ unsigned request_size; /* size of request data */
+ unsigned reply_max; /* maximum size of reply */
+ unsigned count2; /* count used in unmarshalling */
+ unsigned char unmarshall; /* unmarshalling phase */
+ unsigned char addr_ix; /* Address in ->alist */
+ bool drop_ref; /* T if need to drop ref for incoming call */
+ bool need_attention; /* T if RxRPC poked us */
+ bool async; /* T if asynchronous */
+ bool upgrade; /* T to request service upgrade */
+ bool intr; /* T if interruptible */
+ bool unmarshalling_error; /* T if an unmarshalling error occurred */
+ u16 service_id; /* Actual service ID (after upgrade) */
+ unsigned int debug_id; /* Trace ID */
+ u32 operation_ID; /* operation ID for an incoming call */
+ u32 count; /* count for use in unmarshalling */
+ union { /* place to extract temporary data */
+ struct {
+ __be32 tmp_u;
+ __be32 tmp;
+ } __attribute__((packed));
+ __be64 tmp64;
+ };
+ ktime_t issue_time; /* Time of issue of operation */
+};
+
+struct afs_call_type {
+ const char *name;
+ unsigned int op; /* Really enum afs_fs_operation */
+
+ /* deliver request or reply data to an call
+ * - returning an error will cause the call to be aborted
+ */
+ int (*deliver)(struct afs_call *call);
+
+ /* clean up a call */
+ void (*destructor)(struct afs_call *call);
+
+ /* Work function */
+ void (*work)(struct work_struct *work);
+
+ /* Call done function (gets called immediately on success or failure) */
+ void (*done)(struct afs_call *call);
+};
+
+/*
+ * Key available for writeback on a file.
+ */
+struct afs_wb_key {
+ refcount_t usage;
+ struct key *key;
+ struct list_head vnode_link; /* Link in vnode->wb_keys */
+};
+
+/*
+ * AFS open file information record. Pointed to by file->private_data.
+ */
+struct afs_file {
+ struct key *key; /* The key this file was opened with */
+ struct afs_wb_key *wb; /* Writeback key record for this file */
+};
+
+static inline struct key *afs_file_key(struct file *file)
+{
+ struct afs_file *af = file->private_data;
+
+ return af->key;
+}
+
+/*
+ * Record of an outstanding read operation on a vnode.
+ */
+struct afs_read {
+ loff_t pos; /* Where to start reading */
+ loff_t len; /* How much we're asking for */
+ loff_t actual_len; /* How much we're actually getting */
+ loff_t file_size; /* File size returned by server */
+ struct key *key; /* The key to use to reissue the read */
+ struct afs_vnode *vnode; /* The file being read into. */
+ struct netfs_io_subrequest *subreq; /* Fscache helper read request this belongs to */
+ afs_dataversion_t data_version; /* Version number returned by server */
+ refcount_t usage;
+ unsigned int call_debug_id;
+ unsigned int nr_pages;
+ int error;
+ void (*done)(struct afs_read *);
+ void (*cleanup)(struct afs_read *);
+ struct iov_iter *iter; /* Iterator representing the buffer */
+ struct iov_iter def_iter; /* Default iterator */
+};
+
+/*
+ * AFS superblock private data
+ * - there's one superblock per volume
+ */
+struct afs_super_info {
+ struct net *net_ns; /* Network namespace */
+ struct afs_cell *cell; /* The cell in which the volume resides */
+ struct afs_volume *volume; /* volume record */
+ enum afs_flock_mode flock_mode:8; /* File locking emulation mode */
+ bool dyn_root; /* True if dynamic root */
+};
+
+static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+extern struct file_system_type afs_fs_type;
+
+/*
+ * Set of substitutes for @sys.
+ */
+struct afs_sysnames {
+#define AFS_NR_SYSNAME 16
+ char *subs[AFS_NR_SYSNAME];
+ refcount_t usage;
+ unsigned short nr;
+ char blank[1];
+};
+
+/*
+ * AFS network namespace record.
+ */
+struct afs_net {
+ struct net *net; /* Backpointer to the owning net namespace */
+ struct afs_uuid uuid;
+ bool live; /* F if this namespace is being removed */
+
+ /* AF_RXRPC I/O stuff */
+ struct socket *socket;
+ struct afs_call *spare_incoming_call;
+ struct work_struct charge_preallocation_work;
+ struct mutex socket_mutex;
+ atomic_t nr_outstanding_calls;
+ atomic_t nr_superblocks;
+
+ /* Cell database */
+ struct rb_root cells;
+ struct afs_cell *ws_cell;
+ struct work_struct cells_manager;
+ struct timer_list cells_timer;
+ atomic_t cells_outstanding;
+ struct rw_semaphore cells_lock;
+ struct mutex cells_alias_lock;
+
+ struct mutex proc_cells_lock;
+ struct hlist_head proc_cells;
+
+ /* Known servers. Theoretically each fileserver can only be in one
+ * cell, but in practice, people create aliases and subsets and there's
+ * no easy way to distinguish them.
+ */
+ seqlock_t fs_lock; /* For fs_servers, fs_probe_*, fs_proc */
+ struct rb_root fs_servers; /* afs_server (by server UUID or address) */
+ struct list_head fs_probe_fast; /* List of afs_server to probe at 30s intervals */
+ struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */
+ struct hlist_head fs_proc; /* procfs servers list */
+
+ struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */
+ struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */
+ seqlock_t fs_addr_lock; /* For fs_addresses[46] */
+
+ struct work_struct fs_manager;
+ struct timer_list fs_timer;
+
+ struct work_struct fs_prober;
+ struct timer_list fs_probe_timer;
+ atomic_t servers_outstanding;
+
+ /* File locking renewal management */
+ struct mutex lock_manager_mutex;
+
+ /* Misc */
+ struct super_block *dynroot_sb; /* Dynamic root mount superblock */
+ struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */
+ struct afs_sysnames *sysnames;
+ rwlock_t sysnames_lock;
+
+ /* Statistics counters */
+ atomic_t n_lookup; /* Number of lookups done */
+ atomic_t n_reval; /* Number of dentries needing revalidation */
+ atomic_t n_inval; /* Number of invalidations by the server */
+ atomic_t n_relpg; /* Number of invalidations by release_folio */
+ atomic_t n_read_dir; /* Number of directory pages read */
+ atomic_t n_dir_cr; /* Number of directory entry creation edits */
+ atomic_t n_dir_rm; /* Number of directory entry removal edits */
+ atomic_t n_stores; /* Number of store ops */
+ atomic_long_t n_store_bytes; /* Number of bytes stored */
+ atomic_long_t n_fetch_bytes; /* Number of bytes fetched */
+ atomic_t n_fetches; /* Number of data fetch ops */
+};
+
+extern const char afs_init_sysname[];
+
+enum afs_cell_state {
+ AFS_CELL_UNSET,
+ AFS_CELL_ACTIVATING,
+ AFS_CELL_ACTIVE,
+ AFS_CELL_DEACTIVATING,
+ AFS_CELL_INACTIVE,
+ AFS_CELL_FAILED,
+ AFS_CELL_REMOVED,
+};
+
+/*
+ * AFS cell record.
+ *
+ * This is a tricky concept to get right as it is possible to create aliases
+ * simply by pointing AFSDB/SRV records for two names at the same set of VL
+ * servers; it is also possible to do things like setting up two sets of VL
+ * servers, one of which provides a superset of the volumes provided by the
+ * other (for internal/external division, for example).
+ *
+ * Cells only exist in the sense that (a) a cell's name maps to a set of VL
+ * servers and (b) a cell's name is used by the client to select the key to use
+ * for authentication and encryption. The cell name is not typically used in
+ * the protocol.
+ *
+ * Two cells are determined to be aliases if they have an explicit alias (YFS
+ * only), share any VL servers in common or have at least one volume in common.
+ * "In common" means that the address list of the VL servers or the fileservers
+ * share at least one endpoint.
+ */
+struct afs_cell {
+ union {
+ struct rcu_head rcu;
+ struct rb_node net_node; /* Node in net->cells */
+ };
+ struct afs_net *net;
+ struct afs_cell *alias_of; /* The cell this is an alias of */
+ struct afs_volume *root_volume; /* The root.cell volume if there is one */
+ struct key *anonymous_key; /* anonymous user key for this cell */
+ struct work_struct manager; /* Manager for init/deinit/dns */
+ struct hlist_node proc_link; /* /proc cell list link */
+ time64_t dns_expiry; /* Time AFSDB/SRV record expires */
+ time64_t last_inactive; /* Time of last drop of usage count */
+ refcount_t ref; /* Struct refcount */
+ atomic_t active; /* Active usage counter */
+ unsigned long flags;
+#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */
+#define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */
+#define AFS_CELL_FL_CHECK_ALIAS 2 /* Need to check for aliases */
+ enum afs_cell_state state;
+ short error;
+ enum dns_record_source dns_source:8; /* Latest source of data from lookup */
+ enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */
+ unsigned int dns_lookup_count; /* Counter of DNS lookups */
+ unsigned int debug_id;
+
+ /* The volumes belonging to this cell */
+ struct rb_root volumes; /* Tree of volumes on this server */
+ struct hlist_head proc_volumes; /* procfs volume list */
+ seqlock_t volume_lock; /* For volumes */
+
+ /* Active fileserver interaction state. */
+ struct rb_root fs_servers; /* afs_server (by server UUID) */
+ seqlock_t fs_lock; /* For fs_servers */
+ struct rw_semaphore fs_open_mmaps_lock;
+ struct list_head fs_open_mmaps; /* List of vnodes that are mmapped */
+ atomic_t fs_s_break; /* Counter of CB.InitCallBackState messages */
+
+ /* VL server list. */
+ rwlock_t vl_servers_lock; /* Lock on vl_servers */
+ struct afs_vlserver_list __rcu *vl_servers;
+
+ u8 name_len; /* Length of name */
+ char *name; /* Cell name, case-flattened and NUL-padded */
+};
+
+/*
+ * Volume Location server record.
+ */
+struct afs_vlserver {
+ struct rcu_head rcu;
+ struct afs_addr_list __rcu *addresses; /* List of addresses for this VL server */
+ unsigned long flags;
+#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */
+#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */
+#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */
+#define AFS_VLSERVER_FL_RESPONDING 3 /* VL server is responding */
+ rwlock_t lock; /* Lock on addresses */
+ refcount_t ref;
+ unsigned int rtt; /* Server's current RTT in uS */
+
+ /* Probe state */
+ wait_queue_head_t probe_wq;
+ atomic_t probe_outstanding;
+ spinlock_t probe_lock;
+ struct {
+ unsigned int rtt; /* RTT in uS */
+ u32 abort_code;
+ short error;
+ unsigned short flags;
+#define AFS_VLSERVER_PROBE_RESPONDED 0x01 /* At least once response (may be abort) */
+#define AFS_VLSERVER_PROBE_IS_YFS 0x02 /* The peer appears to be YFS */
+#define AFS_VLSERVER_PROBE_NOT_YFS 0x04 /* The peer appears not to be YFS */
+#define AFS_VLSERVER_PROBE_LOCAL_FAILURE 0x08 /* A local failure prevented a probe */
+ } probe;
+
+ u16 port;
+ u16 name_len; /* Length of name */
+ char name[]; /* Server name, case-flattened */
+};
+
+/*
+ * Weighted list of Volume Location servers.
+ */
+struct afs_vlserver_entry {
+ u16 priority; /* Preference (as SRV) */
+ u16 weight; /* Weight (as SRV) */
+ enum dns_record_source source:8;
+ enum dns_lookup_status status:8;
+ struct afs_vlserver *server;
+};
+
+struct afs_vlserver_list {
+ struct rcu_head rcu;
+ refcount_t ref;
+ u8 nr_servers;
+ u8 index; /* Server currently in use */
+ u8 preferred; /* Preferred server */
+ enum dns_record_source source:8;
+ enum dns_lookup_status status:8;
+ rwlock_t lock;
+ struct afs_vlserver_entry servers[];
+};
+
+/*
+ * Cached VLDB entry.
+ *
+ * This is pointed to by cell->vldb_entries, indexed by name.
+ */
+struct afs_vldb_entry {
+ afs_volid_t vid[3]; /* Volume IDs for R/W, R/O and Bak volumes */
+
+ unsigned long flags;
+#define AFS_VLDB_HAS_RW 0 /* - R/W volume exists */
+#define AFS_VLDB_HAS_RO 1 /* - R/O volume exists */
+#define AFS_VLDB_HAS_BAK 2 /* - Backup volume exists */
+#define AFS_VLDB_QUERY_VALID 3 /* - Record is valid */
+#define AFS_VLDB_QUERY_ERROR 4 /* - VL server returned error */
+
+ uuid_t fs_server[AFS_NMAXNSERVERS];
+ u32 addr_version[AFS_NMAXNSERVERS]; /* Registration change counters */
+ u8 fs_mask[AFS_NMAXNSERVERS];
+#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */
+#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */
+#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
+ short error;
+ u8 nr_servers; /* Number of server records */
+ u8 name_len;
+ u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
+};
+
+/*
+ * Record of fileserver with which we're actively communicating.
+ */
+struct afs_server {
+ struct rcu_head rcu;
+ union {
+ uuid_t uuid; /* Server ID */
+ struct afs_uuid _uuid;
+ };
+
+ struct afs_addr_list __rcu *addresses;
+ struct afs_cell *cell; /* Cell to which belongs (pins ref) */
+ struct rb_node uuid_rb; /* Link in net->fs_servers */
+ struct afs_server __rcu *uuid_next; /* Next server with same UUID */
+ struct afs_server *uuid_prev; /* Previous server with same UUID */
+ struct list_head probe_link; /* Link in net->fs_probe_list */
+ struct hlist_node addr4_link; /* Link in net->fs_addresses4 */
+ struct hlist_node addr6_link; /* Link in net->fs_addresses6 */
+ struct hlist_node proc_link; /* Link in net->fs_proc */
+ struct work_struct initcb_work; /* Work for CB.InitCallBackState* */
+ struct afs_server *gc_next; /* Next server in manager's list */
+ time64_t unuse_time; /* Time at which last unused */
+ unsigned long flags;
+#define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */
+#define AFS_SERVER_FL_UPDATING 1
+#define AFS_SERVER_FL_NEEDS_UPDATE 2 /* Fileserver address list is out of date */
+#define AFS_SERVER_FL_NOT_READY 4 /* The record is not ready for use */
+#define AFS_SERVER_FL_NOT_FOUND 5 /* VL server says no such server */
+#define AFS_SERVER_FL_VL_FAIL 6 /* Failed to access VL server */
+#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
+#define AFS_SERVER_FL_IS_YFS 16 /* Server is YFS not AFS */
+#define AFS_SERVER_FL_NO_IBULK 17 /* Fileserver doesn't support FS.InlineBulkStatus */
+#define AFS_SERVER_FL_NO_RM2 18 /* Fileserver doesn't support YFS.RemoveFile2 */
+#define AFS_SERVER_FL_HAS_FS64 19 /* Fileserver supports FS.{Fetch,Store}Data64 */
+ refcount_t ref; /* Object refcount */
+ atomic_t active; /* Active user count */
+ u32 addr_version; /* Address list version */
+ unsigned int rtt; /* Server's current RTT in uS */
+ unsigned int debug_id; /* Debugging ID for traces */
+
+ /* file service access */
+ rwlock_t fs_lock; /* access lock */
+
+ /* callback promise management */
+ unsigned cb_s_break; /* Break-everything counter. */
+
+ /* Probe state */
+ unsigned long probed_at; /* Time last probe was dispatched (jiffies) */
+ wait_queue_head_t probe_wq;
+ atomic_t probe_outstanding;
+ spinlock_t probe_lock;
+ struct {
+ unsigned int rtt; /* RTT in uS */
+ u32 abort_code;
+ short error;
+ bool responded:1;
+ bool is_yfs:1;
+ bool not_yfs:1;
+ bool local_failure:1;
+ } probe;
+};
+
+/*
+ * Replaceable volume server list.
+ */
+struct afs_server_entry {
+ struct afs_server *server;
+};
+
+struct afs_server_list {
+ struct rcu_head rcu;
+ afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */
+ refcount_t usage;
+ unsigned char nr_servers;
+ unsigned char preferred; /* Preferred server */
+ unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
+ unsigned int seq; /* Set to ->servers_seq when installed */
+ rwlock_t lock;
+ struct afs_server_entry servers[];
+};
+
+/*
+ * Live AFS volume management.
+ */
+struct afs_volume {
+ union {
+ struct rcu_head rcu;
+ afs_volid_t vid; /* volume ID */
+ };
+ refcount_t ref;
+ time64_t update_at; /* Time at which to next update */
+ struct afs_cell *cell; /* Cell to which belongs (pins ref) */
+ struct rb_node cell_node; /* Link in cell->volumes */
+ struct hlist_node proc_link; /* Link in cell->proc_volumes */
+ struct super_block __rcu *sb; /* Superblock on which inodes reside */
+ unsigned long flags;
+#define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */
+#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */
+#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */
+#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */
+#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */
+#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */
+#define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_RM_TREE 7 /* - Set if volume removed from cell->volumes */
+#ifdef CONFIG_AFS_FSCACHE
+ struct fscache_volume *cache; /* Caching cookie */
+#endif
+ struct afs_server_list __rcu *servers; /* List of servers on which volume resides */
+ rwlock_t servers_lock; /* Lock for ->servers */
+ unsigned int servers_seq; /* Incremented each time ->servers changes */
+
+ unsigned cb_v_break; /* Break-everything counter. */
+ rwlock_t cb_v_break_lock;
+
+ afs_voltype_t type; /* type of volume */
+ char type_force; /* force volume type (suppress R/O -> R/W) */
+ u8 name_len;
+ u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
+};
+
+enum afs_lock_state {
+ AFS_VNODE_LOCK_NONE, /* The vnode has no lock on the server */
+ AFS_VNODE_LOCK_WAITING_FOR_CB, /* We're waiting for the server to break the callback */
+ AFS_VNODE_LOCK_SETTING, /* We're asking the server for a lock */
+ AFS_VNODE_LOCK_GRANTED, /* We have a lock on the server */
+ AFS_VNODE_LOCK_EXTENDING, /* We're extending a lock on the server */
+ AFS_VNODE_LOCK_NEED_UNLOCK, /* We need to unlock on the server */
+ AFS_VNODE_LOCK_UNLOCKING, /* We're telling the server to unlock */
+ AFS_VNODE_LOCK_DELETED, /* The vnode has been deleted whilst we have a lock */
+};
+
+/*
+ * AFS inode private data.
+ *
+ * Note that afs_alloc_inode() *must* reset anything that could incorrectly
+ * leak from one inode to another.
+ */
+struct afs_vnode {
+ struct netfs_inode netfs; /* Netfslib context and vfs inode */
+ struct afs_volume *volume; /* volume on which vnode resides */
+ struct afs_fid fid; /* the file identifier for this inode */
+ struct afs_file_status status; /* AFS status info for this file */
+ afs_dataversion_t invalid_before; /* Child dentries are invalid before this */
+ struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
+ struct mutex io_lock; /* Lock for serialising I/O on this mutex */
+ struct rw_semaphore validate_lock; /* lock for validating this vnode */
+ struct rw_semaphore rmdir_lock; /* Lock for rmdir vs sillyrename */
+ struct key *silly_key; /* Silly rename key */
+ spinlock_t wb_lock; /* lock for wb_keys */
+ spinlock_t lock; /* waitqueue/flags lock */
+ unsigned long flags;
+#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */
+#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
+#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */
+#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
+#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
+#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
+#define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */
+#define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */
+#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */
+#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */
+#define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */
+
+ struct list_head wb_keys; /* List of keys available for writeback */
+ struct list_head pending_locks; /* locks waiting to be granted */
+ struct list_head granted_locks; /* locks granted on this file */
+ struct delayed_work lock_work; /* work to be done in locking */
+ struct key *lock_key; /* Key to be used in lock ops */
+ ktime_t locked_at; /* Time at which lock obtained */
+ enum afs_lock_state lock_state : 8;
+ afs_lock_type_t lock_type : 8;
+
+ /* outstanding callback notification on this file */
+ struct work_struct cb_work; /* Work for mmap'd files */
+ struct list_head cb_mmap_link; /* Link in cell->fs_open_mmaps */
+ void *cb_server; /* Server with callback/filelock */
+ atomic_t cb_nr_mmap; /* Number of mmaps */
+ unsigned int cb_fs_s_break; /* Mass server break counter (cell->fs_s_break) */
+ unsigned int cb_s_break; /* Mass break counter on ->server */
+ unsigned int cb_v_break; /* Mass break counter on ->volume */
+ unsigned int cb_break; /* Break counter on vnode */
+ seqlock_t cb_lock; /* Lock for ->cb_server, ->status, ->cb_*break */
+
+ time64_t cb_expires_at; /* time at which callback expires */
+};
+
+static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+ return netfs_i_cookie(&vnode->netfs);
+#else
+ return NULL;
+#endif
+}
+
+static inline void afs_vnode_set_cache(struct afs_vnode *vnode,
+ struct fscache_cookie *cookie)
+{
+#ifdef CONFIG_AFS_FSCACHE
+ vnode->netfs.cache = cookie;
+ if (cookie)
+ mapping_set_release_always(vnode->netfs.inode.i_mapping);
+#endif
+}
+
+/*
+ * cached security record for one user's attempt to access a vnode
+ */
+struct afs_permit {
+ struct key *key; /* RxRPC ticket holding a security context */
+ afs_access_t access; /* CallerAccess value for this key */
+};
+
+/*
+ * Immutable cache of CallerAccess records from attempts to access vnodes.
+ * These may be shared between multiple vnodes.
+ */
+struct afs_permits {
+ struct rcu_head rcu;
+ struct hlist_node hash_node; /* Link in hash */
+ unsigned long h; /* Hash value for this permit list */
+ refcount_t usage;
+ unsigned short nr_permits; /* Number of records */
+ bool invalidated; /* Invalidated due to key change */
+ struct afs_permit permits[]; /* List of permits sorted by key pointer */
+};
+
+/*
+ * Error prioritisation and accumulation.
+ */
+struct afs_error {
+ short error; /* Accumulated error */
+ bool responded; /* T if server responded */
+};
+
+/*
+ * Cursor for iterating over a server's address list.
+ */
+struct afs_addr_cursor {
+ struct afs_addr_list *alist; /* Current address list (pins ref) */
+ unsigned long tried; /* Tried addresses */
+ signed char index; /* Current address */
+ bool responded; /* T if the current address responded */
+ unsigned short nr_iterations; /* Number of address iterations */
+ short error;
+ u32 abort_code;
+};
+
+/*
+ * Cursor for iterating over a set of volume location servers.
+ */
+struct afs_vl_cursor {
+ struct afs_addr_cursor ac;
+ struct afs_cell *cell; /* The cell we're querying */
+ struct afs_vlserver_list *server_list; /* Current server list (pins ref) */
+ struct afs_vlserver *server; /* Server on which this resides */
+ struct key *key; /* Key for the server */
+ unsigned long untried; /* Bitmask of untried servers */
+ short index; /* Current server */
+ short error;
+ unsigned short flags;
+#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */
+#define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */
+#define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */
+ unsigned short nr_iterations; /* Number of server iterations */
+};
+
+/*
+ * Fileserver operation methods.
+ */
+struct afs_operation_ops {
+ void (*issue_afs_rpc)(struct afs_operation *op);
+ void (*issue_yfs_rpc)(struct afs_operation *op);
+ void (*success)(struct afs_operation *op);
+ void (*aborted)(struct afs_operation *op);
+ void (*failed)(struct afs_operation *op);
+ void (*edit_dir)(struct afs_operation *op);
+ void (*put)(struct afs_operation *op);
+};
+
+struct afs_vnode_param {
+ struct afs_vnode *vnode;
+ struct afs_fid fid; /* Fid to access */
+ struct afs_status_cb scb; /* Returned status and callback promise */
+ afs_dataversion_t dv_before; /* Data version before the call */
+ unsigned int cb_break_before; /* cb_break + cb_s_break before the call */
+ u8 dv_delta; /* Expected change in data version */
+ bool put_vnode:1; /* T if we have a ref on the vnode */
+ bool need_io_lock:1; /* T if we need the I/O lock on this */
+ bool update_ctime:1; /* Need to update the ctime */
+ bool set_size:1; /* Must update i_size */
+ bool op_unlinked:1; /* True if file was unlinked by op */
+ bool speculative:1; /* T if speculative status fetch (no vnode lock) */
+ bool modification:1; /* Set if the content gets modified */
+};
+
+/*
+ * Fileserver operation wrapper, handling server and address rotation
+ * asynchronously. May make simultaneous calls to multiple servers.
+ */
+struct afs_operation {
+ struct afs_net *net; /* Network namespace */
+ struct key *key; /* Key for the cell */
+ const struct afs_call_type *type; /* Type of call done */
+ const struct afs_operation_ops *ops;
+
+ /* Parameters/results for the operation */
+ struct afs_volume *volume; /* Volume being accessed */
+ struct afs_vnode_param file[2];
+ struct afs_vnode_param *more_files;
+ struct afs_volsync volsync;
+ struct dentry *dentry; /* Dentry to be altered */
+ struct dentry *dentry_2; /* Second dentry to be altered */
+ struct timespec64 mtime; /* Modification time to record */
+ struct timespec64 ctime; /* Change time to set */
+ short nr_files; /* Number of entries in file[], more_files */
+ short error;
+ unsigned int debug_id;
+
+ unsigned int cb_v_break; /* Volume break counter before op */
+ unsigned int cb_s_break; /* Server break counter before op */
+
+ union {
+ struct {
+ int which; /* Which ->file[] to fetch for */
+ } fetch_status;
+ struct {
+ int reason; /* enum afs_edit_dir_reason */
+ mode_t mode;
+ const char *symlink;
+ } create;
+ struct {
+ bool need_rehash;
+ } unlink;
+ struct {
+ struct dentry *rehash;
+ struct dentry *tmp;
+ bool new_negative;
+ } rename;
+ struct {
+ struct afs_read *req;
+ } fetch;
+ struct {
+ afs_lock_type_t type;
+ } lock;
+ struct {
+ struct iov_iter *write_iter;
+ loff_t pos;
+ loff_t size;
+ loff_t i_size;
+ bool laundering; /* Laundering page, PG_writeback not set */
+ } store;
+ struct {
+ struct iattr *attr;
+ loff_t old_i_size;
+ } setattr;
+ struct afs_acl *acl;
+ struct yfs_acl *yacl;
+ struct {
+ struct afs_volume_status vs;
+ struct kstatfs *buf;
+ } volstatus;
+ };
+
+ /* Fileserver iteration state */
+ struct afs_addr_cursor ac;
+ struct afs_server_list *server_list; /* Current server list (pins ref) */
+ struct afs_server *server; /* Server we're using (ref pinned by server_list) */
+ struct afs_call *call;
+ unsigned long untried; /* Bitmask of untried servers */
+ short index; /* Current server */
+ unsigned short nr_iterations; /* Number of server iterations */
+
+ unsigned int flags;
+#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */
+#define AFS_OPERATION_VBUSY 0x0002 /* Set if seen VBUSY */
+#define AFS_OPERATION_VMOVED 0x0004 /* Set if seen VMOVED */
+#define AFS_OPERATION_VNOVOL 0x0008 /* Set if seen VNOVOL */
+#define AFS_OPERATION_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
+#define AFS_OPERATION_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+#define AFS_OPERATION_UNINTR 0x0040 /* Set if op is uninterruptible */
+#define AFS_OPERATION_DOWNGRADE 0x0080 /* Set to retry with downgraded opcode */
+#define AFS_OPERATION_LOCK_0 0x0100 /* Set if have io_lock on file[0] */
+#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */
+#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */
+#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */
+#define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */
+};
+
+/*
+ * Cache auxiliary data.
+ */
+struct afs_vnode_cache_aux {
+ __be64 data_version;
+} __packed;
+
+static inline void afs_set_cache_aux(struct afs_vnode *vnode,
+ struct afs_vnode_cache_aux *aux)
+{
+ aux->data_version = cpu_to_be64(vnode->status.data_version);
+}
+
+static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int flags)
+{
+ struct afs_vnode_cache_aux aux;
+
+ afs_set_cache_aux(vnode, &aux);
+ fscache_invalidate(afs_vnode_cache(vnode), &aux,
+ i_size_read(&vnode->netfs.inode), flags);
+}
+
+/*
+ * We use folio->private to hold the amount of the folio that we've written to,
+ * splitting the field into two parts. However, we need to represent a range
+ * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio
+ * exceeds what we can encode.
+ */
+#ifdef CONFIG_64BIT
+#define __AFS_FOLIO_PRIV_MASK 0x7fffffffUL
+#define __AFS_FOLIO_PRIV_SHIFT 32
+#define __AFS_FOLIO_PRIV_MMAPPED 0x80000000UL
+#else
+#define __AFS_FOLIO_PRIV_MASK 0x7fffUL
+#define __AFS_FOLIO_PRIV_SHIFT 16
+#define __AFS_FOLIO_PRIV_MMAPPED 0x8000UL
+#endif
+
+static inline unsigned int afs_folio_dirty_resolution(struct folio *folio)
+{
+ int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1);
+ return (shift > 0) ? shift : 0;
+}
+
+static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv)
+{
+ unsigned long x = priv & __AFS_FOLIO_PRIV_MASK;
+
+ /* The lower bound is inclusive */
+ return x << afs_folio_dirty_resolution(folio);
+}
+
+static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv)
+{
+ unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK;
+
+ /* The upper bound is immediately beyond the region */
+ return (x + 1) << afs_folio_dirty_resolution(folio);
+}
+
+static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to)
+{
+ unsigned int res = afs_folio_dirty_resolution(folio);
+ from >>= res;
+ to = (to - 1) >> res;
+ return (to << __AFS_FOLIO_PRIV_SHIFT) | from;
+}
+
+static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv)
+{
+ return priv | __AFS_FOLIO_PRIV_MMAPPED;
+}
+
+static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
+{
+ return priv & __AFS_FOLIO_PRIV_MMAPPED;
+}
+
+#include <trace/events/afs.h>
+
+/*****************************************************************************/
+/*
+ * addr_list.c
+ */
+static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist)
+{
+ if (alist)
+ refcount_inc(&alist->usage);
+ return alist;
+}
+extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
+ unsigned short,
+ unsigned short);
+extern void afs_put_addrlist(struct afs_addr_list *);
+extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
+ const char *, size_t, char,
+ unsigned short, unsigned short);
+extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
+extern bool afs_iterate_addresses(struct afs_addr_cursor *);
+extern int afs_end_cursor(struct afs_addr_cursor *);
+
+extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
+extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
+
+/*
+ * cache.c
+ */
+#ifdef CONFIG_AFS_FSCACHE
+extern struct fscache_netfs afs_cache_netfs;
+#endif
+
+/*
+ * callback.c
+ */
+extern void afs_invalidate_mmap_work(struct work_struct *);
+extern void afs_server_init_callback_work(struct work_struct *work);
+extern void afs_init_callback_state(struct afs_server *);
+extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break *);
+
+static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
+{
+ return vnode->cb_break + vnode->cb_v_break;
+}
+
+static inline bool afs_cb_is_broken(unsigned int cb_break,
+ const struct afs_vnode *vnode)
+{
+ return cb_break != (vnode->cb_break + vnode->volume->cb_v_break);
+}
+
+/*
+ * cell.c
+ */
+extern int afs_cell_init(struct afs_net *, const char *);
+extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned,
+ enum afs_cell_trace);
+extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
+ const char *, bool);
+extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_unuse_cell(struct afs_net *, struct afs_cell *, enum afs_cell_trace);
+extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_manage_cells(struct work_struct *);
+extern void afs_cells_timer(struct timer_list *);
+extern void __net_exit afs_cell_purge(struct afs_net *);
+
+/*
+ * cmservice.c
+ */
+extern bool afs_cm_incoming_call(struct afs_call *);
+
+/*
+ * dir.c
+ */
+extern const struct file_operations afs_dir_file_operations;
+extern const struct inode_operations afs_dir_inode_operations;
+extern const struct address_space_operations afs_dir_aops;
+extern const struct dentry_operations afs_fs_dentry_operations;
+
+extern void afs_d_release(struct dentry *);
+extern void afs_check_for_remote_deletion(struct afs_operation *);
+
+/*
+ * dir_edit.c
+ */
+extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *,
+ enum afs_edit_dir_reason);
+extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
+
+/*
+ * dir_silly.c
+ */
+extern int afs_sillyrename(struct afs_vnode *, struct afs_vnode *,
+ struct dentry *, struct key *);
+extern int afs_silly_iput(struct dentry *, struct inode *);
+
+/*
+ * dynroot.c
+ */
+extern const struct inode_operations afs_dynroot_inode_operations;
+extern const struct dentry_operations afs_dynroot_dentry_operations;
+
+extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *);
+extern int afs_dynroot_mkdir(struct afs_net *, struct afs_cell *);
+extern void afs_dynroot_rmdir(struct afs_net *, struct afs_cell *);
+extern int afs_dynroot_populate(struct super_block *);
+extern void afs_dynroot_depopulate(struct super_block *);
+
+/*
+ * file.c
+ */
+extern const struct address_space_operations afs_file_aops;
+extern const struct address_space_operations afs_symlink_aops;
+extern const struct inode_operations afs_file_inode_operations;
+extern const struct file_operations afs_file_operations;
+extern const struct netfs_request_ops afs_req_ops;
+
+extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
+extern void afs_put_wb_key(struct afs_wb_key *);
+extern int afs_open(struct inode *, struct file *);
+extern int afs_release(struct inode *, struct file *);
+extern int afs_fetch_data(struct afs_vnode *, struct afs_read *);
+extern struct afs_read *afs_alloc_read(gfp_t);
+extern void afs_put_read(struct afs_read *);
+extern int afs_write_inode(struct inode *, struct writeback_control *);
+
+static inline struct afs_read *afs_get_read(struct afs_read *req)
+{
+ refcount_inc(&req->usage);
+ return req;
+}
+
+/*
+ * flock.c
+ */
+extern struct workqueue_struct *afs_lock_manager;
+
+extern void afs_lock_op_done(struct afs_call *);
+extern void afs_lock_work(struct work_struct *);
+extern void afs_lock_may_be_available(struct afs_vnode *);
+extern int afs_lock(struct file *, int, struct file_lock *);
+extern int afs_flock(struct file *, int, struct file_lock *);
+
+/*
+ * fsclient.c
+ */
+extern void afs_fs_fetch_status(struct afs_operation *);
+extern void afs_fs_fetch_data(struct afs_operation *);
+extern void afs_fs_create_file(struct afs_operation *);
+extern void afs_fs_make_dir(struct afs_operation *);
+extern void afs_fs_remove_file(struct afs_operation *);
+extern void afs_fs_remove_dir(struct afs_operation *);
+extern void afs_fs_link(struct afs_operation *);
+extern void afs_fs_symlink(struct afs_operation *);
+extern void afs_fs_rename(struct afs_operation *);
+extern void afs_fs_store_data(struct afs_operation *);
+extern void afs_fs_setattr(struct afs_operation *);
+extern void afs_fs_get_volume_status(struct afs_operation *);
+extern void afs_fs_set_lock(struct afs_operation *);
+extern void afs_fs_extend_lock(struct afs_operation *);
+extern void afs_fs_release_lock(struct afs_operation *);
+extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
+ struct afs_addr_cursor *, struct key *);
+extern bool afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
+ struct afs_addr_cursor *, struct key *);
+extern void afs_fs_inline_bulk_status(struct afs_operation *);
+
+struct afs_acl {
+ u32 size;
+ u8 data[];
+};
+
+extern void afs_fs_fetch_acl(struct afs_operation *);
+extern void afs_fs_store_acl(struct afs_operation *);
+
+/*
+ * fs_operation.c
+ */
+extern struct afs_operation *afs_alloc_operation(struct key *, struct afs_volume *);
+extern int afs_put_operation(struct afs_operation *);
+extern bool afs_begin_vnode_operation(struct afs_operation *);
+extern void afs_wait_for_operation(struct afs_operation *);
+extern int afs_do_sync_operation(struct afs_operation *);
+
+static inline void afs_op_nomem(struct afs_operation *op)
+{
+ op->error = -ENOMEM;
+}
+
+static inline void afs_op_set_vnode(struct afs_operation *op, unsigned int n,
+ struct afs_vnode *vnode)
+{
+ op->file[n].vnode = vnode;
+ op->file[n].need_io_lock = true;
+}
+
+static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n,
+ const struct afs_fid *fid)
+{
+ op->file[n].fid = *fid;
+}
+
+/*
+ * fs_probe.c
+ */
+extern void afs_fileserver_probe_result(struct afs_call *);
+extern void afs_fs_probe_fileserver(struct afs_net *, struct afs_server *, struct key *, bool);
+extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+extern void afs_probe_fileserver(struct afs_net *, struct afs_server *);
+extern void afs_fs_probe_dispatcher(struct work_struct *);
+extern int afs_wait_for_one_fs_probe(struct afs_server *, bool);
+extern void afs_fs_probe_cleanup(struct afs_net *);
+
+/*
+ * inode.c
+ */
+extern const struct afs_operation_ops afs_fetch_status_operation;
+
+extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
+extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
+extern int afs_ilookup5_test_by_fid(struct inode *, void *);
+extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
+extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *);
+extern struct inode *afs_root_iget(struct super_block *, struct key *);
+extern bool afs_check_validity(struct afs_vnode *);
+extern int afs_validate(struct afs_vnode *, struct key *);
+extern int afs_getattr(struct user_namespace *mnt_userns, const struct path *,
+ struct kstat *, u32, unsigned int);
+extern int afs_setattr(struct user_namespace *mnt_userns, struct dentry *, struct iattr *);
+extern void afs_evict_inode(struct inode *);
+extern int afs_drop_inode(struct inode *);
+
+/*
+ * main.c
+ */
+extern struct workqueue_struct *afs_wq;
+extern int afs_net_id;
+
+static inline struct afs_net *afs_net(struct net *net)
+{
+ return net_generic(net, afs_net_id);
+}
+
+static inline struct afs_net *afs_sb2net(struct super_block *sb)
+{
+ return afs_net(AFS_FS_S(sb)->net_ns);
+}
+
+static inline struct afs_net *afs_d2net(struct dentry *dentry)
+{
+ return afs_sb2net(dentry->d_sb);
+}
+
+static inline struct afs_net *afs_i2net(struct inode *inode)
+{
+ return afs_sb2net(inode->i_sb);
+}
+
+static inline struct afs_net *afs_v2net(struct afs_vnode *vnode)
+{
+ return afs_i2net(&vnode->netfs.inode);
+}
+
+static inline struct afs_net *afs_sock2net(struct sock *sk)
+{
+ return net_generic(sock_net(sk), afs_net_id);
+}
+
+static inline void __afs_stat(atomic_t *s)
+{
+ atomic_inc(s);
+}
+
+#define afs_stat_v(vnode, n) __afs_stat(&afs_v2net(vnode)->n)
+
+/*
+ * misc.c
+ */
+extern int afs_abort_to_error(u32);
+extern void afs_prioritise_error(struct afs_error *, int, u32);
+
+/*
+ * mntpt.c
+ */
+extern const struct inode_operations afs_mntpt_inode_operations;
+extern const struct inode_operations afs_autocell_inode_operations;
+extern const struct file_operations afs_mntpt_file_operations;
+
+extern struct vfsmount *afs_d_automount(struct path *);
+extern void afs_mntpt_kill_timer(void);
+
+/*
+ * proc.c
+ */
+#ifdef CONFIG_PROC_FS
+extern int __net_init afs_proc_init(struct afs_net *);
+extern void __net_exit afs_proc_cleanup(struct afs_net *);
+extern int afs_proc_cell_setup(struct afs_cell *);
+extern void afs_proc_cell_remove(struct afs_cell *);
+extern void afs_put_sysnames(struct afs_sysnames *);
+#else
+static inline int afs_proc_init(struct afs_net *net) { return 0; }
+static inline void afs_proc_cleanup(struct afs_net *net) {}
+static inline int afs_proc_cell_setup(struct afs_cell *cell) { return 0; }
+static inline void afs_proc_cell_remove(struct afs_cell *cell) {}
+static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {}
+#endif
+
+/*
+ * rotate.c
+ */
+extern bool afs_select_fileserver(struct afs_operation *);
+extern void afs_dump_edestaddrreq(const struct afs_operation *);
+
+/*
+ * rxrpc.c
+ */
+extern struct workqueue_struct *afs_async_calls;
+
+extern int __net_init afs_open_socket(struct afs_net *);
+extern void __net_exit afs_close_socket(struct afs_net *);
+extern void afs_charge_preallocation(struct work_struct *);
+extern void afs_put_call(struct afs_call *);
+extern void afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t);
+extern long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
+extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
+ const struct afs_call_type *,
+ size_t, size_t);
+extern void afs_flat_call_destructor(struct afs_call *);
+extern void afs_send_empty_reply(struct afs_call *);
+extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
+extern int afs_extract_data(struct afs_call *, bool);
+extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause);
+
+static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call,
+ gfp_t gfp)
+{
+ op->call = call;
+ op->type = call->type;
+ call->op = op;
+ call->key = op->key;
+ call->intr = !(op->flags & AFS_OPERATION_UNINTR);
+ afs_make_call(&op->ac, call, gfp);
+}
+
+static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
+{
+ call->iov_len = size;
+ call->kvec[0].iov_base = buf;
+ call->kvec[0].iov_len = size;
+ iov_iter_kvec(&call->def_iter, ITER_DEST, call->kvec, 1, size);
+}
+
+static inline void afs_extract_to_tmp(struct afs_call *call)
+{
+ call->iov_len = sizeof(call->tmp);
+ afs_extract_begin(call, &call->tmp, sizeof(call->tmp));
+}
+
+static inline void afs_extract_to_tmp64(struct afs_call *call)
+{
+ call->iov_len = sizeof(call->tmp64);
+ afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64));
+}
+
+static inline void afs_extract_discard(struct afs_call *call, size_t size)
+{
+ call->iov_len = size;
+ iov_iter_discard(&call->def_iter, ITER_DEST, size);
+}
+
+static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
+{
+ call->iov_len = size;
+ afs_extract_begin(call, call->buffer, size);
+}
+
+static inline int afs_transfer_reply(struct afs_call *call)
+{
+ return afs_extract_data(call, false);
+}
+
+static inline bool afs_check_call_state(struct afs_call *call,
+ enum afs_call_state state)
+{
+ return READ_ONCE(call->state) == state;
+}
+
+static inline bool afs_set_call_state(struct afs_call *call,
+ enum afs_call_state from,
+ enum afs_call_state to)
+{
+ bool ok = false;
+
+ spin_lock_bh(&call->state_lock);
+ if (call->state == from) {
+ call->state = to;
+ trace_afs_call_state(call, from, to, 0, 0);
+ ok = true;
+ }
+ spin_unlock_bh(&call->state_lock);
+ return ok;
+}
+
+static inline void afs_set_call_complete(struct afs_call *call,
+ int error, u32 remote_abort)
+{
+ enum afs_call_state state;
+ bool ok = false;
+
+ spin_lock_bh(&call->state_lock);
+ state = call->state;
+ if (state != AFS_CALL_COMPLETE) {
+ call->abort_code = remote_abort;
+ call->error = error;
+ call->state = AFS_CALL_COMPLETE;
+ trace_afs_call_state(call, state, AFS_CALL_COMPLETE,
+ error, remote_abort);
+ ok = true;
+ }
+ spin_unlock_bh(&call->state_lock);
+ if (ok) {
+ trace_afs_call_done(call);
+
+ /* Asynchronous calls have two refs to release - one from the alloc and
+ * one queued with the work item - and we can't just deallocate the
+ * call because the work item may be queued again.
+ */
+ if (call->drop_ref)
+ afs_put_call(call);
+ }
+}
+
+/*
+ * security.c
+ */
+extern void afs_put_permits(struct afs_permits *);
+extern void afs_clear_permits(struct afs_vnode *);
+extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int,
+ struct afs_status_cb *);
+extern void afs_zap_permits(struct rcu_head *);
+extern struct key *afs_request_key(struct afs_cell *);
+extern struct key *afs_request_key_rcu(struct afs_cell *);
+extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
+extern int afs_permission(struct user_namespace *, struct inode *, int);
+extern void __exit afs_clean_up_permit_cache(void);
+
+/*
+ * server.c
+ */
+extern spinlock_t afs_server_peer_lock;
+
+extern struct afs_server *afs_find_server(struct afs_net *,
+ const struct sockaddr_rxrpc *);
+extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
+extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32);
+extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace);
+extern struct afs_server *afs_use_server(struct afs_server *, enum afs_server_trace);
+extern void afs_unuse_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
+extern void afs_unuse_server_notime(struct afs_net *, struct afs_server *, enum afs_server_trace);
+extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
+extern void afs_manage_servers(struct work_struct *);
+extern void afs_servers_timer(struct timer_list *);
+extern void afs_fs_probe_timer(struct timer_list *);
+extern void __net_exit afs_purge_servers(struct afs_net *);
+extern bool afs_check_server_record(struct afs_operation *, struct afs_server *);
+
+static inline void afs_inc_servers_outstanding(struct afs_net *net)
+{
+ atomic_inc(&net->servers_outstanding);
+}
+
+static inline void afs_dec_servers_outstanding(struct afs_net *net)
+{
+ if (atomic_dec_and_test(&net->servers_outstanding))
+ wake_up_var(&net->servers_outstanding);
+}
+
+static inline bool afs_is_probing_server(struct afs_server *server)
+{
+ return list_empty(&server->probe_link);
+}
+
+/*
+ * server_list.c
+ */
+static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list *slist)
+{
+ refcount_inc(&slist->usage);
+ return slist;
+}
+
+extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *);
+extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *,
+ struct afs_vldb_entry *,
+ u8);
+extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *);
+
+/*
+ * super.c
+ */
+extern int __init afs_fs_init(void);
+extern void afs_fs_exit(void);
+
+/*
+ * vlclient.c
+ */
+extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
+ const char *, int);
+extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
+extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *,
+ struct key *, struct afs_vlserver *, unsigned int);
+extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
+extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *);
+
+/*
+ * vl_alias.c
+ */
+extern int afs_cell_detect_alias(struct afs_cell *, struct key *);
+
+/*
+ * vl_probe.c
+ */
+extern void afs_vlserver_probe_result(struct afs_call *);
+extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *);
+extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long);
+
+/*
+ * vl_rotate.c
+ */
+extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
+ struct afs_cell *, struct key *);
+extern bool afs_select_vlserver(struct afs_vl_cursor *);
+extern bool afs_select_current_vlserver(struct afs_vl_cursor *);
+extern int afs_end_vlserver_operation(struct afs_vl_cursor *);
+
+/*
+ * vlserver_list.c
+ */
+static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver)
+{
+ refcount_inc(&vlserver->ref);
+ return vlserver;
+}
+
+static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist)
+{
+ if (vllist)
+ refcount_inc(&vllist->ref);
+ return vllist;
+}
+
+extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short);
+extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *);
+extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int);
+extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *);
+extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
+ const void *, size_t);
+
+/*
+ * volume.c
+ */
+extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
+extern int afs_activate_volume(struct afs_volume *);
+extern void afs_deactivate_volume(struct afs_volume *);
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason);
+extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace);
+extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace);
+extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
+
+/*
+ * write.c
+ */
+#ifdef CONFIG_AFS_FSCACHE
+bool afs_dirty_folio(struct address_space *, struct folio *);
+#else
+#define afs_dirty_folio filemap_dirty_folio
+#endif
+extern int afs_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct page **pagep, void **fsdata);
+extern int afs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata);
+extern int afs_writepage(struct page *, struct writeback_control *);
+extern int afs_writepages(struct address_space *, struct writeback_control *);
+extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
+extern int afs_fsync(struct file *, loff_t, loff_t, int);
+extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
+extern void afs_prune_wb_keys(struct afs_vnode *);
+int afs_launder_folio(struct folio *);
+
+/*
+ * xattr.c
+ */
+extern const struct xattr_handler *afs_xattr_handlers[];
+
+/*
+ * yfsclient.c
+ */
+extern void yfs_fs_fetch_data(struct afs_operation *);
+extern void yfs_fs_create_file(struct afs_operation *);
+extern void yfs_fs_make_dir(struct afs_operation *);
+extern void yfs_fs_remove_file2(struct afs_operation *);
+extern void yfs_fs_remove_file(struct afs_operation *);
+extern void yfs_fs_remove_dir(struct afs_operation *);
+extern void yfs_fs_link(struct afs_operation *);
+extern void yfs_fs_symlink(struct afs_operation *);
+extern void yfs_fs_rename(struct afs_operation *);
+extern void yfs_fs_store_data(struct afs_operation *);
+extern void yfs_fs_setattr(struct afs_operation *);
+extern void yfs_fs_get_volume_status(struct afs_operation *);
+extern void yfs_fs_set_lock(struct afs_operation *);
+extern void yfs_fs_extend_lock(struct afs_operation *);
+extern void yfs_fs_release_lock(struct afs_operation *);
+extern void yfs_fs_fetch_status(struct afs_operation *);
+extern void yfs_fs_inline_bulk_status(struct afs_operation *);
+
+struct yfs_acl {
+ struct afs_acl *acl; /* Dir/file/symlink ACL */
+ struct afs_acl *vol_acl; /* Whole volume ACL */
+ u32 inherit_flag; /* True if ACL is inherited from parent dir */
+ u32 num_cleaned; /* Number of ACEs removed due to subject removal */
+ unsigned int flags;
+#define YFS_ACL_WANT_ACL 0x01 /* Set if caller wants ->acl */
+#define YFS_ACL_WANT_VOL_ACL 0x02 /* Set if caller wants ->vol_acl */
+};
+
+extern void yfs_free_opaque_acl(struct yfs_acl *);
+extern void yfs_fs_fetch_opaque_acl(struct afs_operation *);
+extern void yfs_fs_store_opaque_acl2(struct afs_operation *);
+
+/*
+ * Miscellaneous inline functions.
+ */
+static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
+{
+ return container_of(inode, struct afs_vnode, netfs.inode);
+}
+
+static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
+{
+ return &vnode->netfs.inode;
+}
+
+/*
+ * Note that a dentry got changed. We need to set d_fsdata to the data version
+ * number derived from the result of the operation. It doesn't matter if
+ * d_fsdata goes backwards as we'll just revalidate.
+ */
+static inline void afs_update_dentry_version(struct afs_operation *op,
+ struct afs_vnode_param *dir_vp,
+ struct dentry *dentry)
+{
+ if (!op->error)
+ dentry->d_fsdata =
+ (void *)(unsigned long)dir_vp->scb.status.data_version;
+}
+
+/*
+ * Set the file size and block count. Estimate the number of 512 bytes blocks
+ * used, rounded up to nearest 1K for consistency with other AFS clients.
+ */
+static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size)
+{
+ i_size_write(&vnode->netfs.inode, size);
+ vnode->netfs.inode.i_blocks = ((size + 1023) >> 10) << 1;
+}
+
+/*
+ * Check for a conflicting operation on a directory that we just unlinked from.
+ * If someone managed to sneak a link or an unlink in on the file we just
+ * unlinked, we won't be able to trust nlink on an AFS file (but not YFS).
+ */
+static inline void afs_check_dir_conflict(struct afs_operation *op,
+ struct afs_vnode_param *dvp)
+{
+ if (dvp->dv_before + dvp->dv_delta != dvp->scb.status.data_version)
+ op->flags |= AFS_OPERATION_DIR_CONFLICT;
+}
+
+static inline int afs_io_error(struct afs_call *call, enum afs_io_error where)
+{
+ trace_afs_io_error(call->debug_id, -EIO, where);
+ return -EIO;
+}
+
+static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
+{
+ trace_afs_file_error(vnode, -EIO, where);
+ return -EIO;
+}
+
+/*****************************************************************************/
+/*
+ * debug tracing
+ */
+extern unsigned afs_debug;
+
+#define dbgprintk(FMT,...) \
+ printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__)
+
+#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
+#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
+#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
+
+#elif defined(CONFIG_AFS_DEBUG)
+#define AFS_DEBUG_KENTER 0x01
+#define AFS_DEBUG_KLEAVE 0x02
+#define AFS_DEBUG_KDEBUG 0x04
+
+#define _enter(FMT,...) \
+do { \
+ if (unlikely(afs_debug & AFS_DEBUG_KENTER)) \
+ kenter(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _leave(FMT,...) \
+do { \
+ if (unlikely(afs_debug & AFS_DEBUG_KLEAVE)) \
+ kleave(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _debug(FMT,...) \
+do { \
+ if (unlikely(afs_debug & AFS_DEBUG_KDEBUG)) \
+ kdebug(FMT,##__VA_ARGS__); \
+} while (0)
+
+#else
+#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
+#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
+#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__)
+#endif
+
+/*
+ * debug assertion checking
+ */
+#if 1 // defined(__KDEBUGALL)
+
+#define ASSERT(X) \
+do { \
+ if (unlikely(!(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "AFS: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+ if (unlikely(!((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "AFS: Assertion failed\n"); \
+ printk(KERN_ERR "%lu " #OP " %lu is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTRANGE(L, OP1, N, OP2, H) \
+do { \
+ if (unlikely(!((L) OP1 (N)) || !((N) OP2 (H)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "AFS: Assertion failed\n"); \
+ printk(KERN_ERR "%lu "#OP1" %lu "#OP2" %lu is false\n", \
+ (unsigned long)(L), (unsigned long)(N), \
+ (unsigned long)(H)); \
+ printk(KERN_ERR "0x%lx "#OP1" 0x%lx "#OP2" 0x%lx is false\n", \
+ (unsigned long)(L), (unsigned long)(N), \
+ (unsigned long)(H)); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTIF(C, X) \
+do { \
+ if (unlikely((C) && !(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "AFS: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+ if (unlikely((C) && !((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "AFS: Assertion failed\n"); \
+ printk(KERN_ERR "%lu " #OP " %lu is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while(0)
+
+#else
+
+#define ASSERT(X) \
+do { \
+} while(0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+} while(0)
+
+#define ASSERTRANGE(L, OP1, N, OP2, H) \
+do { \
+} while(0)
+
+#define ASSERTIF(C, X) \
+do { \
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+} while(0)
+
+#endif /* __KDEBUGALL */
diff --git a/fs/afs/main.c b/fs/afs/main.c
new file mode 100644
index 000000000..eae288c8d
--- /dev/null
+++ b/fs/afs/main.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS client file system
+ *
+ * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/random.h>
+#include <linux/proc_fs.h>
+#define CREATE_TRACE_POINTS
+#include "internal.h"
+
+MODULE_DESCRIPTION("AFS Client File System");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+unsigned afs_debug;
+module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(debug, "AFS debugging mask");
+
+static char *rootcell;
+
+module_param(rootcell, charp, 0);
+MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
+
+struct workqueue_struct *afs_wq;
+static struct proc_dir_entry *afs_proc_symlink;
+
+#if defined(CONFIG_ALPHA)
+const char afs_init_sysname[] = "alpha_linux26";
+#elif defined(CONFIG_X86_64)
+const char afs_init_sysname[] = "amd64_linux26";
+#elif defined(CONFIG_ARM)
+const char afs_init_sysname[] = "arm_linux26";
+#elif defined(CONFIG_ARM64)
+const char afs_init_sysname[] = "aarch64_linux26";
+#elif defined(CONFIG_X86_32)
+const char afs_init_sysname[] = "i386_linux26";
+#elif defined(CONFIG_IA64)
+const char afs_init_sysname[] = "ia64_linux26";
+#elif defined(CONFIG_PPC64)
+const char afs_init_sysname[] = "ppc64_linux26";
+#elif defined(CONFIG_PPC32)
+const char afs_init_sysname[] = "ppc_linux26";
+#elif defined(CONFIG_S390)
+#ifdef CONFIG_64BIT
+const char afs_init_sysname[] = "s390x_linux26";
+#else
+const char afs_init_sysname[] = "s390_linux26";
+#endif
+#elif defined(CONFIG_SPARC64)
+const char afs_init_sysname[] = "sparc64_linux26";
+#elif defined(CONFIG_SPARC32)
+const char afs_init_sysname[] = "sparc_linux26";
+#else
+const char afs_init_sysname[] = "unknown_linux26";
+#endif
+
+/*
+ * Initialise an AFS network namespace record.
+ */
+static int __net_init afs_net_init(struct net *net_ns)
+{
+ struct afs_sysnames *sysnames;
+ struct afs_net *net = afs_net(net_ns);
+ int ret;
+
+ net->net = net_ns;
+ net->live = true;
+ generate_random_uuid((unsigned char *)&net->uuid);
+
+ INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation);
+ mutex_init(&net->socket_mutex);
+
+ net->cells = RB_ROOT;
+ init_rwsem(&net->cells_lock);
+ INIT_WORK(&net->cells_manager, afs_manage_cells);
+ timer_setup(&net->cells_timer, afs_cells_timer, 0);
+
+ mutex_init(&net->cells_alias_lock);
+ mutex_init(&net->proc_cells_lock);
+ INIT_HLIST_HEAD(&net->proc_cells);
+
+ seqlock_init(&net->fs_lock);
+ net->fs_servers = RB_ROOT;
+ INIT_LIST_HEAD(&net->fs_probe_fast);
+ INIT_LIST_HEAD(&net->fs_probe_slow);
+ INIT_HLIST_HEAD(&net->fs_proc);
+
+ INIT_HLIST_HEAD(&net->fs_addresses4);
+ INIT_HLIST_HEAD(&net->fs_addresses6);
+ seqlock_init(&net->fs_addr_lock);
+
+ INIT_WORK(&net->fs_manager, afs_manage_servers);
+ timer_setup(&net->fs_timer, afs_servers_timer, 0);
+ INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher);
+ timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0);
+ atomic_set(&net->servers_outstanding, 1);
+
+ ret = -ENOMEM;
+ sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL);
+ if (!sysnames)
+ goto error_sysnames;
+ sysnames->subs[0] = (char *)&afs_init_sysname;
+ sysnames->nr = 1;
+ refcount_set(&sysnames->usage, 1);
+ net->sysnames = sysnames;
+ rwlock_init(&net->sysnames_lock);
+
+ /* Register the /proc stuff */
+ ret = afs_proc_init(net);
+ if (ret < 0)
+ goto error_proc;
+
+ /* Initialise the cell DB */
+ ret = afs_cell_init(net, rootcell);
+ if (ret < 0)
+ goto error_cell_init;
+
+ /* Create the RxRPC transport */
+ ret = afs_open_socket(net);
+ if (ret < 0)
+ goto error_open_socket;
+
+ return 0;
+
+error_open_socket:
+ net->live = false;
+ afs_fs_probe_cleanup(net);
+ afs_cell_purge(net);
+ afs_purge_servers(net);
+error_cell_init:
+ net->live = false;
+ afs_proc_cleanup(net);
+error_proc:
+ afs_put_sysnames(net->sysnames);
+error_sysnames:
+ net->live = false;
+ return ret;
+}
+
+/*
+ * Clean up and destroy an AFS network namespace record.
+ */
+static void __net_exit afs_net_exit(struct net *net_ns)
+{
+ struct afs_net *net = afs_net(net_ns);
+
+ net->live = false;
+ afs_fs_probe_cleanup(net);
+ afs_cell_purge(net);
+ afs_purge_servers(net);
+ afs_close_socket(net);
+ afs_proc_cleanup(net);
+ afs_put_sysnames(net->sysnames);
+}
+
+static struct pernet_operations afs_net_ops = {
+ .init = afs_net_init,
+ .exit = afs_net_exit,
+ .id = &afs_net_id,
+ .size = sizeof(struct afs_net),
+};
+
+/*
+ * initialise the AFS client FS module
+ */
+static int __init afs_init(void)
+{
+ int ret = -ENOMEM;
+
+ printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
+
+ afs_wq = alloc_workqueue("afs", 0, 0);
+ if (!afs_wq)
+ goto error_afs_wq;
+ afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
+ if (!afs_async_calls)
+ goto error_async;
+ afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
+ if (!afs_lock_manager)
+ goto error_lockmgr;
+
+ ret = register_pernet_device(&afs_net_ops);
+ if (ret < 0)
+ goto error_net;
+
+ /* register the filesystems */
+ ret = afs_fs_init();
+ if (ret < 0)
+ goto error_fs;
+
+ afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs");
+ if (!afs_proc_symlink) {
+ ret = -ENOMEM;
+ goto error_proc;
+ }
+
+ return ret;
+
+error_proc:
+ afs_fs_exit();
+error_fs:
+ unregister_pernet_device(&afs_net_ops);
+error_net:
+ destroy_workqueue(afs_lock_manager);
+error_lockmgr:
+ destroy_workqueue(afs_async_calls);
+error_async:
+ destroy_workqueue(afs_wq);
+error_afs_wq:
+ rcu_barrier();
+ printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
+ return ret;
+}
+
+/* XXX late_initcall is kludgy, but the only alternative seems to create
+ * a transport upon the first mount, which is worse. Or is it?
+ */
+late_initcall(afs_init); /* must be called after net/ to create socket */
+
+/*
+ * clean up on module removal
+ */
+static void __exit afs_exit(void)
+{
+ printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
+
+ proc_remove(afs_proc_symlink);
+ afs_fs_exit();
+ unregister_pernet_device(&afs_net_ops);
+ destroy_workqueue(afs_lock_manager);
+ destroy_workqueue(afs_async_calls);
+ destroy_workqueue(afs_wq);
+ afs_clean_up_permit_cache();
+ rcu_barrier();
+}
+
+module_exit(afs_exit);
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
new file mode 100644
index 000000000..805328ca5
--- /dev/null
+++ b/fs/afs/misc.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* miscellaneous bits
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "protocol_uae.h"
+
+/*
+ * convert an AFS abort code to a Linux error number
+ */
+int afs_abort_to_error(u32 abort_code)
+{
+ switch (abort_code) {
+ /* Low errno codes inserted into abort namespace */
+ case 13: return -EACCES;
+ case 27: return -EFBIG;
+ case 30: return -EROFS;
+
+ /* VICE "special error" codes; 101 - 111 */
+ case VSALVAGE: return -EIO;
+ case VNOVNODE: return -ENOENT;
+ case VNOVOL: return -ENOMEDIUM;
+ case VVOLEXISTS: return -EEXIST;
+ case VNOSERVICE: return -EIO;
+ case VOFFLINE: return -ENOENT;
+ case VONLINE: return -EEXIST;
+ case VDISKFULL: return -ENOSPC;
+ case VOVERQUOTA: return -EDQUOT;
+ case VBUSY: return -EBUSY;
+ case VMOVED: return -ENXIO;
+
+ /* Volume Location server errors */
+ case AFSVL_IDEXIST: return -EEXIST;
+ case AFSVL_IO: return -EREMOTEIO;
+ case AFSVL_NAMEEXIST: return -EEXIST;
+ case AFSVL_CREATEFAIL: return -EREMOTEIO;
+ case AFSVL_NOENT: return -ENOMEDIUM;
+ case AFSVL_EMPTY: return -ENOMEDIUM;
+ case AFSVL_ENTDELETED: return -ENOMEDIUM;
+ case AFSVL_BADNAME: return -EINVAL;
+ case AFSVL_BADINDEX: return -EINVAL;
+ case AFSVL_BADVOLTYPE: return -EINVAL;
+ case AFSVL_BADSERVER: return -EINVAL;
+ case AFSVL_BADPARTITION: return -EINVAL;
+ case AFSVL_REPSFULL: return -EFBIG;
+ case AFSVL_NOREPSERVER: return -ENOENT;
+ case AFSVL_DUPREPSERVER: return -EEXIST;
+ case AFSVL_RWNOTFOUND: return -ENOENT;
+ case AFSVL_BADREFCOUNT: return -EINVAL;
+ case AFSVL_SIZEEXCEEDED: return -EINVAL;
+ case AFSVL_BADENTRY: return -EINVAL;
+ case AFSVL_BADVOLIDBUMP: return -EINVAL;
+ case AFSVL_IDALREADYHASHED: return -EINVAL;
+ case AFSVL_ENTRYLOCKED: return -EBUSY;
+ case AFSVL_BADVOLOPER: return -EBADRQC;
+ case AFSVL_BADRELLOCKTYPE: return -EINVAL;
+ case AFSVL_RERELEASE: return -EREMOTEIO;
+ case AFSVL_BADSERVERFLAG: return -EINVAL;
+ case AFSVL_PERM: return -EACCES;
+ case AFSVL_NOMEM: return -EREMOTEIO;
+
+ /* Unified AFS error table */
+ case UAEPERM: return -EPERM;
+ case UAENOENT: return -ENOENT;
+ case UAEAGAIN: return -EAGAIN;
+ case UAEACCES: return -EACCES;
+ case UAEBUSY: return -EBUSY;
+ case UAEEXIST: return -EEXIST;
+ case UAENOTDIR: return -ENOTDIR;
+ case UAEISDIR: return -EISDIR;
+ case UAEFBIG: return -EFBIG;
+ case UAENOSPC: return -ENOSPC;
+ case UAEROFS: return -EROFS;
+ case UAEMLINK: return -EMLINK;
+ case UAEDEADLK: return -EDEADLK;
+ case UAENAMETOOLONG: return -ENAMETOOLONG;
+ case UAENOLCK: return -ENOLCK;
+ case UAENOTEMPTY: return -ENOTEMPTY;
+ case UAELOOP: return -ELOOP;
+ case UAEOVERFLOW: return -EOVERFLOW;
+ case UAENOMEDIUM: return -ENOMEDIUM;
+ case UAEDQUOT: return -EDQUOT;
+
+ /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
+ case RXKADINCONSISTENCY: return -EPROTO;
+ case RXKADPACKETSHORT: return -EPROTO;
+ case RXKADLEVELFAIL: return -EKEYREJECTED;
+ case RXKADTICKETLEN: return -EKEYREJECTED;
+ case RXKADOUTOFSEQUENCE: return -EPROTO;
+ case RXKADNOAUTH: return -EKEYREJECTED;
+ case RXKADBADKEY: return -EKEYREJECTED;
+ case RXKADBADTICKET: return -EKEYREJECTED;
+ case RXKADUNKNOWNKEY: return -EKEYREJECTED;
+ case RXKADEXPIRED: return -EKEYEXPIRED;
+ case RXKADSEALEDINCON: return -EKEYREJECTED;
+ case RXKADDATALEN: return -EKEYREJECTED;
+ case RXKADILLEGALLEVEL: return -EKEYREJECTED;
+
+ case RXGEN_OPCODE: return -ENOTSUPP;
+
+ default: return -EREMOTEIO;
+ }
+}
+
+/*
+ * Select the error to report from a set of errors.
+ */
+void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
+{
+ switch (error) {
+ case 0:
+ return;
+ default:
+ if (e->error == -ETIMEDOUT ||
+ e->error == -ETIME)
+ return;
+ fallthrough;
+ case -ETIMEDOUT:
+ case -ETIME:
+ if (e->error == -ENOMEM ||
+ e->error == -ENONET)
+ return;
+ fallthrough;
+ case -ENOMEM:
+ case -ENONET:
+ if (e->error == -ERFKILL)
+ return;
+ fallthrough;
+ case -ERFKILL:
+ if (e->error == -EADDRNOTAVAIL)
+ return;
+ fallthrough;
+ case -EADDRNOTAVAIL:
+ if (e->error == -ENETUNREACH)
+ return;
+ fallthrough;
+ case -ENETUNREACH:
+ if (e->error == -EHOSTUNREACH)
+ return;
+ fallthrough;
+ case -EHOSTUNREACH:
+ if (e->error == -EHOSTDOWN)
+ return;
+ fallthrough;
+ case -EHOSTDOWN:
+ if (e->error == -ECONNREFUSED)
+ return;
+ fallthrough;
+ case -ECONNREFUSED:
+ if (e->error == -ECONNRESET)
+ return;
+ fallthrough;
+ case -ECONNRESET: /* Responded, but call expired. */
+ if (e->responded)
+ return;
+ e->error = error;
+ return;
+
+ case -ECONNABORTED:
+ error = afs_abort_to_error(abort_code);
+ fallthrough;
+ case -ENETRESET: /* Responded, but we seem to have changed address */
+ e->responded = true;
+ e->error = error;
+ return;
+ }
+}
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
new file mode 100644
index 000000000..97f50e9fd
--- /dev/null
+++ b/fs/afs/mntpt.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* mountpoint management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/gfp.h>
+#include <linux/fs_context.h>
+#include "internal.h"
+
+
+static struct dentry *afs_mntpt_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags);
+static int afs_mntpt_open(struct inode *inode, struct file *file);
+static void afs_mntpt_expiry_timed_out(struct work_struct *work);
+
+const struct file_operations afs_mntpt_file_operations = {
+ .open = afs_mntpt_open,
+ .llseek = noop_llseek,
+};
+
+const struct inode_operations afs_mntpt_inode_operations = {
+ .lookup = afs_mntpt_lookup,
+ .readlink = page_readlink,
+ .getattr = afs_getattr,
+};
+
+const struct inode_operations afs_autocell_inode_operations = {
+ .getattr = afs_getattr,
+};
+
+static LIST_HEAD(afs_vfsmounts);
+static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
+
+static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
+
+static const char afs_root_volume[] = "root.cell";
+
+/*
+ * no valid lookup procedure on this sort of dir
+ */
+static struct dentry *afs_mntpt_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ _enter("%p,%p{%pd2}", dir, dentry, dentry);
+ return ERR_PTR(-EREMOTE);
+}
+
+/*
+ * no valid open procedure on this sort of dir
+ */
+static int afs_mntpt_open(struct inode *inode, struct file *file)
+{
+ _enter("%p,%p{%pD2}", inode, file, file);
+ return -EREMOTE;
+}
+
+/*
+ * Set the parameters for the proposed superblock.
+ */
+static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
+{
+ struct afs_fs_context *ctx = fc->fs_private;
+ struct afs_super_info *src_as = AFS_FS_S(mntpt->d_sb);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(mntpt));
+ struct afs_cell *cell;
+ const char *p;
+ int ret;
+
+ if (fc->net_ns != src_as->net_ns) {
+ put_net(fc->net_ns);
+ fc->net_ns = get_net(src_as->net_ns);
+ }
+
+ if (src_as->volume && src_as->volume->type == AFSVL_RWVOL) {
+ ctx->type = AFSVL_RWVOL;
+ ctx->force = true;
+ }
+ if (ctx->cell) {
+ afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_mntpt);
+ ctx->cell = NULL;
+ }
+ if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
+ /* if the directory is a pseudo directory, use the d_name */
+ unsigned size = mntpt->d_name.len;
+
+ if (size < 2)
+ return -ENOENT;
+
+ p = mntpt->d_name.name;
+ if (mntpt->d_name.name[0] == '.') {
+ size--;
+ p++;
+ ctx->type = AFSVL_RWVOL;
+ ctx->force = true;
+ }
+ if (size > AFS_MAXCELLNAME)
+ return -ENAMETOOLONG;
+
+ cell = afs_lookup_cell(ctx->net, p, size, NULL, false);
+ if (IS_ERR(cell)) {
+ pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt);
+ return PTR_ERR(cell);
+ }
+ ctx->cell = cell;
+
+ ctx->volname = afs_root_volume;
+ ctx->volnamesz = sizeof(afs_root_volume) - 1;
+ } else {
+ /* read the contents of the AFS special symlink */
+ struct page *page;
+ loff_t size = i_size_read(d_inode(mntpt));
+ char *buf;
+
+ if (src_as->cell)
+ ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt);
+
+ if (size < 2 || size > PAGE_SIZE - 1)
+ return -EINVAL;
+
+ page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ buf = kmap(page);
+ ret = -EINVAL;
+ if (buf[size - 1] == '.')
+ ret = vfs_parse_fs_string(fc, "source", buf, size - 1);
+ kunmap(page);
+ put_page(page);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * create a vfsmount to be automounted
+ */
+static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
+{
+ struct fs_context *fc;
+ struct vfsmount *mnt;
+ int ret;
+
+ BUG_ON(!d_inode(mntpt));
+
+ fc = fs_context_for_submount(&afs_fs_type, mntpt);
+ if (IS_ERR(fc))
+ return ERR_CAST(fc);
+
+ ret = afs_mntpt_set_params(fc, mntpt);
+ if (!ret)
+ mnt = fc_mount(fc);
+ else
+ mnt = ERR_PTR(ret);
+
+ put_fs_context(fc);
+ return mnt;
+}
+
+/*
+ * handle an automount point
+ */
+struct vfsmount *afs_d_automount(struct path *path)
+{
+ struct vfsmount *newmnt;
+
+ _enter("{%pd}", path->dentry);
+
+ newmnt = afs_mntpt_do_automount(path->dentry);
+ if (IS_ERR(newmnt))
+ return newmnt;
+
+ mntget(newmnt); /* prevent immediate expiration */
+ mnt_set_expiry(newmnt, &afs_vfsmounts);
+ queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
+ afs_mntpt_expiry_timeout * HZ);
+ _leave(" = %p", newmnt);
+ return newmnt;
+}
+
+/*
+ * handle mountpoint expiry timer going off
+ */
+static void afs_mntpt_expiry_timed_out(struct work_struct *work)
+{
+ _enter("");
+
+ if (!list_empty(&afs_vfsmounts)) {
+ mark_mounts_for_expiry(&afs_vfsmounts);
+ queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
+ afs_mntpt_expiry_timeout * HZ);
+ }
+
+ _leave("");
+}
+
+/*
+ * kill the AFS mountpoint timer if it's still running
+ */
+void afs_mntpt_kill_timer(void)
+{
+ _enter("");
+
+ ASSERT(list_empty(&afs_vfsmounts));
+ cancel_delayed_work_sync(&afs_mntpt_expiry_timer);
+}
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
new file mode 100644
index 000000000..2a0c83d71
--- /dev/null
+++ b/fs/afs/proc.c
@@ -0,0 +1,705 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* /proc interface for AFS
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include "internal.h"
+
+struct afs_vl_seq_net_private {
+ struct seq_net_private seq; /* Must be first */
+ struct afs_vlserver_list *vllist;
+};
+
+static inline struct afs_net *afs_seq2net(struct seq_file *m)
+{
+ return afs_net(seq_file_net(m));
+}
+
+static inline struct afs_net *afs_seq2net_single(struct seq_file *m)
+{
+ return afs_net(seq_file_single_net(m));
+}
+
+/*
+ * Display the list of cells known to the namespace.
+ */
+static int afs_proc_cells_show(struct seq_file *m, void *v)
+{
+ struct afs_vlserver_list *vllist;
+ struct afs_cell *cell;
+
+ if (v == SEQ_START_TOKEN) {
+ /* display header on line 1 */
+ seq_puts(m, "USE ACT TTL SV ST NAME\n");
+ return 0;
+ }
+
+ cell = list_entry(v, struct afs_cell, proc_link);
+ vllist = rcu_dereference(cell->vl_servers);
+
+ /* display one cell per line on subsequent lines */
+ seq_printf(m, "%3u %3u %6lld %2u %2u %s\n",
+ refcount_read(&cell->ref),
+ atomic_read(&cell->active),
+ cell->dns_expiry - ktime_get_real_seconds(),
+ vllist ? vllist->nr_servers : 0,
+ cell->state,
+ cell->name);
+ return 0;
+}
+
+static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
+ __acquires(rcu)
+{
+ rcu_read_lock();
+ return seq_hlist_start_head_rcu(&afs_seq2net(m)->proc_cells, *_pos);
+}
+
+static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return seq_hlist_next_rcu(v, &afs_seq2net(m)->proc_cells, pos);
+}
+
+static void afs_proc_cells_stop(struct seq_file *m, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+static const struct seq_operations afs_proc_cells_ops = {
+ .start = afs_proc_cells_start,
+ .next = afs_proc_cells_next,
+ .stop = afs_proc_cells_stop,
+ .show = afs_proc_cells_show,
+};
+
+/*
+ * handle writes to /proc/fs/afs/cells
+ * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]"
+ */
+static int afs_proc_cells_write(struct file *file, char *buf, size_t size)
+{
+ struct seq_file *m = file->private_data;
+ struct afs_net *net = afs_seq2net(m);
+ char *name, *args;
+ int ret;
+
+ /* trim to first NL */
+ name = memchr(buf, '\n', size);
+ if (name)
+ *name = 0;
+
+ /* split into command, name and argslist */
+ name = strchr(buf, ' ');
+ if (!name)
+ goto inval;
+ do {
+ *name++ = 0;
+ } while(*name == ' ');
+ if (!*name)
+ goto inval;
+
+ args = strchr(name, ' ');
+ if (args) {
+ do {
+ *args++ = 0;
+ } while(*args == ' ');
+ if (!*args)
+ goto inval;
+ }
+
+ /* determine command to perform */
+ _debug("cmd=%s name=%s args=%s", buf, name, args);
+
+ if (strcmp(buf, "add") == 0) {
+ struct afs_cell *cell;
+
+ cell = afs_lookup_cell(net, name, strlen(name), args, true);
+ if (IS_ERR(cell)) {
+ ret = PTR_ERR(cell);
+ goto done;
+ }
+
+ if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags))
+ afs_unuse_cell(net, cell, afs_cell_trace_unuse_no_pin);
+ } else {
+ goto inval;
+ }
+
+ ret = 0;
+
+done:
+ _leave(" = %d", ret);
+ return ret;
+
+inval:
+ ret = -EINVAL;
+ printk("kAFS: Invalid Command on /proc/fs/afs/cells file\n");
+ goto done;
+}
+
+/*
+ * Display the name of the current workstation cell.
+ */
+static int afs_proc_rootcell_show(struct seq_file *m, void *v)
+{
+ struct afs_cell *cell;
+ struct afs_net *net;
+
+ net = afs_seq2net_single(m);
+ down_read(&net->cells_lock);
+ cell = net->ws_cell;
+ if (cell)
+ seq_printf(m, "%s\n", cell->name);
+ up_read(&net->cells_lock);
+ return 0;
+}
+
+/*
+ * Set the current workstation cell and optionally supply its list of volume
+ * location servers.
+ *
+ * echo "cell.name:192.168.231.14" >/proc/fs/afs/rootcell
+ */
+static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size)
+{
+ struct seq_file *m = file->private_data;
+ struct afs_net *net = afs_seq2net_single(m);
+ char *s;
+ int ret;
+
+ ret = -EINVAL;
+ if (buf[0] == '.')
+ goto out;
+ if (memchr(buf, '/', size))
+ goto out;
+
+ /* trim to first NL */
+ s = memchr(buf, '\n', size);
+ if (s)
+ *s = 0;
+
+ /* determine command to perform */
+ _debug("rootcell=%s", buf);
+
+ ret = afs_cell_init(net, buf);
+
+out:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+static const char afs_vol_types[3][3] = {
+ [AFSVL_RWVOL] = "RW",
+ [AFSVL_ROVOL] = "RO",
+ [AFSVL_BACKVOL] = "BK",
+};
+
+/*
+ * Display the list of volumes known to a cell.
+ */
+static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
+{
+ struct afs_volume *vol = hlist_entry(v, struct afs_volume, proc_link);
+
+ /* Display header on line 1 */
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(m, "USE VID TY NAME\n");
+ return 0;
+ }
+
+ seq_printf(m, "%3d %08llx %s %s\n",
+ refcount_read(&vol->ref), vol->vid,
+ afs_vol_types[vol->type],
+ vol->name);
+
+ return 0;
+}
+
+static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
+ __acquires(cell->proc_lock)
+{
+ struct afs_cell *cell = pde_data(file_inode(m->file));
+
+ rcu_read_lock();
+ return seq_hlist_start_head_rcu(&cell->proc_volumes, *_pos);
+}
+
+static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v,
+ loff_t *_pos)
+{
+ struct afs_cell *cell = pde_data(file_inode(m->file));
+
+ return seq_hlist_next_rcu(v, &cell->proc_volumes, _pos);
+}
+
+static void afs_proc_cell_volumes_stop(struct seq_file *m, void *v)
+ __releases(cell->proc_lock)
+{
+ rcu_read_unlock();
+}
+
+static const struct seq_operations afs_proc_cell_volumes_ops = {
+ .start = afs_proc_cell_volumes_start,
+ .next = afs_proc_cell_volumes_next,
+ .stop = afs_proc_cell_volumes_stop,
+ .show = afs_proc_cell_volumes_show,
+};
+
+static const char *const dns_record_sources[NR__dns_record_source + 1] = {
+ [DNS_RECORD_UNAVAILABLE] = "unav",
+ [DNS_RECORD_FROM_CONFIG] = "cfg",
+ [DNS_RECORD_FROM_DNS_A] = "A",
+ [DNS_RECORD_FROM_DNS_AFSDB] = "AFSDB",
+ [DNS_RECORD_FROM_DNS_SRV] = "SRV",
+ [DNS_RECORD_FROM_NSS] = "nss",
+ [NR__dns_record_source] = "[weird]"
+};
+
+static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = {
+ [DNS_LOOKUP_NOT_DONE] = "no-lookup",
+ [DNS_LOOKUP_GOOD] = "good",
+ [DNS_LOOKUP_GOOD_WITH_BAD] = "good/bad",
+ [DNS_LOOKUP_BAD] = "bad",
+ [DNS_LOOKUP_GOT_NOT_FOUND] = "not-found",
+ [DNS_LOOKUP_GOT_LOCAL_FAILURE] = "local-failure",
+ [DNS_LOOKUP_GOT_TEMP_FAILURE] = "temp-failure",
+ [DNS_LOOKUP_GOT_NS_FAILURE] = "ns-failure",
+ [NR__dns_lookup_status] = "[weird]"
+};
+
+/*
+ * Display the list of Volume Location servers we're using for a cell.
+ */
+static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
+{
+ const struct afs_vl_seq_net_private *priv = m->private;
+ const struct afs_vlserver_list *vllist = priv->vllist;
+ const struct afs_vlserver_entry *entry;
+ const struct afs_vlserver *vlserver;
+ const struct afs_addr_list *alist;
+ int i;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(m, "# source %s, status %s\n",
+ dns_record_sources[vllist ? vllist->source : 0],
+ dns_lookup_statuses[vllist ? vllist->status : 0]);
+ return 0;
+ }
+
+ entry = v;
+ vlserver = entry->server;
+ alist = rcu_dereference(vlserver->addresses);
+
+ seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n",
+ vlserver->name, entry->priority, entry->weight,
+ dns_record_sources[alist ? alist->source : entry->source],
+ dns_lookup_statuses[alist ? alist->status : entry->status]);
+ if (alist) {
+ for (i = 0; i < alist->nr_addrs; i++)
+ seq_printf(m, " %c %pISpc\n",
+ alist->preferred == i ? '>' : '-',
+ &alist->addrs[i].transport);
+ }
+ seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt);
+ seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n",
+ vlserver->probe.flags, vlserver->probe.error,
+ vlserver->probe.abort_code,
+ atomic_read(&vlserver->probe_outstanding));
+ return 0;
+}
+
+static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
+ __acquires(rcu)
+{
+ struct afs_vl_seq_net_private *priv = m->private;
+ struct afs_vlserver_list *vllist;
+ struct afs_cell *cell = pde_data(file_inode(m->file));
+ loff_t pos = *_pos;
+
+ rcu_read_lock();
+
+ vllist = rcu_dereference(cell->vl_servers);
+ priv->vllist = vllist;
+
+ if (pos < 0)
+ *_pos = pos = 0;
+ if (pos == 0)
+ return SEQ_START_TOKEN;
+
+ if (pos - 1 >= vllist->nr_servers)
+ return NULL;
+
+ return &vllist->servers[pos - 1];
+}
+
+static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v,
+ loff_t *_pos)
+{
+ struct afs_vl_seq_net_private *priv = m->private;
+ struct afs_vlserver_list *vllist = priv->vllist;
+ loff_t pos;
+
+ pos = *_pos;
+ pos++;
+ *_pos = pos;
+ if (!vllist || pos - 1 >= vllist->nr_servers)
+ return NULL;
+
+ return &vllist->servers[pos - 1];
+}
+
+static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+static const struct seq_operations afs_proc_cell_vlservers_ops = {
+ .start = afs_proc_cell_vlservers_start,
+ .next = afs_proc_cell_vlservers_next,
+ .stop = afs_proc_cell_vlservers_stop,
+ .show = afs_proc_cell_vlservers_show,
+};
+
+/*
+ * Display the list of fileservers we're using within a namespace.
+ */
+static int afs_proc_servers_show(struct seq_file *m, void *v)
+{
+ struct afs_server *server;
+ struct afs_addr_list *alist;
+ int i;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(m, "UUID REF ACT\n");
+ return 0;
+ }
+
+ server = list_entry(v, struct afs_server, proc_link);
+ alist = rcu_dereference(server->addresses);
+ seq_printf(m, "%pU %3d %3d\n",
+ &server->uuid,
+ refcount_read(&server->ref),
+ atomic_read(&server->active));
+ seq_printf(m, " - info: fl=%lx rtt=%u brk=%x\n",
+ server->flags, server->rtt, server->cb_s_break);
+ seq_printf(m, " - probe: last=%d out=%d\n",
+ (int)(jiffies - server->probed_at) / HZ,
+ atomic_read(&server->probe_outstanding));
+ seq_printf(m, " - ALIST v=%u rsp=%lx f=%lx\n",
+ alist->version, alist->responded, alist->failed);
+ for (i = 0; i < alist->nr_addrs; i++)
+ seq_printf(m, " [%x] %pISpc%s\n",
+ i, &alist->addrs[i].transport,
+ alist->preferred == i ? "*" : "");
+ return 0;
+}
+
+static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos)
+ __acquires(rcu)
+{
+ rcu_read_lock();
+ return seq_hlist_start_head_rcu(&afs_seq2net(m)->fs_proc, *_pos);
+}
+
+static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos)
+{
+ return seq_hlist_next_rcu(v, &afs_seq2net(m)->fs_proc, _pos);
+}
+
+static void afs_proc_servers_stop(struct seq_file *m, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+static const struct seq_operations afs_proc_servers_ops = {
+ .start = afs_proc_servers_start,
+ .next = afs_proc_servers_next,
+ .stop = afs_proc_servers_stop,
+ .show = afs_proc_servers_show,
+};
+
+/*
+ * Display the list of strings that may be substituted for the @sys pathname
+ * macro.
+ */
+static int afs_proc_sysname_show(struct seq_file *m, void *v)
+{
+ struct afs_net *net = afs_seq2net(m);
+ struct afs_sysnames *sysnames = net->sysnames;
+ unsigned int i = (unsigned long)v - 1;
+
+ if (i < sysnames->nr)
+ seq_printf(m, "%s\n", sysnames->subs[i]);
+ return 0;
+}
+
+static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos)
+ __acquires(&net->sysnames_lock)
+{
+ struct afs_net *net = afs_seq2net(m);
+ struct afs_sysnames *names;
+
+ read_lock(&net->sysnames_lock);
+
+ names = net->sysnames;
+ if (*pos >= names->nr)
+ return NULL;
+ return (void *)(unsigned long)(*pos + 1);
+}
+
+static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct afs_net *net = afs_seq2net(m);
+ struct afs_sysnames *names = net->sysnames;
+
+ *pos += 1;
+ if (*pos >= names->nr)
+ return NULL;
+ return (void *)(unsigned long)(*pos + 1);
+}
+
+static void afs_proc_sysname_stop(struct seq_file *m, void *v)
+ __releases(&net->sysnames_lock)
+{
+ struct afs_net *net = afs_seq2net(m);
+
+ read_unlock(&net->sysnames_lock);
+}
+
+static const struct seq_operations afs_proc_sysname_ops = {
+ .start = afs_proc_sysname_start,
+ .next = afs_proc_sysname_next,
+ .stop = afs_proc_sysname_stop,
+ .show = afs_proc_sysname_show,
+};
+
+/*
+ * Allow the @sys substitution to be configured.
+ */
+static int afs_proc_sysname_write(struct file *file, char *buf, size_t size)
+{
+ struct afs_sysnames *sysnames, *kill;
+ struct seq_file *m = file->private_data;
+ struct afs_net *net = afs_seq2net(m);
+ char *s, *p, *sub;
+ int ret, len;
+
+ sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL);
+ if (!sysnames)
+ return -ENOMEM;
+ refcount_set(&sysnames->usage, 1);
+ kill = sysnames;
+
+ p = buf;
+ while ((s = strsep(&p, " \t\n"))) {
+ len = strlen(s);
+ if (len == 0)
+ continue;
+ ret = -ENAMETOOLONG;
+ if (len >= AFSNAMEMAX)
+ goto error;
+
+ if (len >= 4 &&
+ s[len - 4] == '@' &&
+ s[len - 3] == 's' &&
+ s[len - 2] == 'y' &&
+ s[len - 1] == 's')
+ /* Protect against recursion */
+ goto invalid;
+
+ if (s[0] == '.' &&
+ (len < 2 || (len == 2 && s[1] == '.')))
+ goto invalid;
+
+ if (memchr(s, '/', len))
+ goto invalid;
+
+ ret = -EFBIG;
+ if (sysnames->nr >= AFS_NR_SYSNAME)
+ goto out;
+
+ if (strcmp(s, afs_init_sysname) == 0) {
+ sub = (char *)afs_init_sysname;
+ } else {
+ ret = -ENOMEM;
+ sub = kmemdup(s, len + 1, GFP_KERNEL);
+ if (!sub)
+ goto out;
+ }
+
+ sysnames->subs[sysnames->nr] = sub;
+ sysnames->nr++;
+ }
+
+ if (sysnames->nr == 0) {
+ sysnames->subs[0] = sysnames->blank;
+ sysnames->nr++;
+ }
+
+ write_lock(&net->sysnames_lock);
+ kill = net->sysnames;
+ net->sysnames = sysnames;
+ write_unlock(&net->sysnames_lock);
+ ret = 0;
+out:
+ afs_put_sysnames(kill);
+ return ret;
+
+invalid:
+ ret = -EINVAL;
+error:
+ goto out;
+}
+
+void afs_put_sysnames(struct afs_sysnames *sysnames)
+{
+ int i;
+
+ if (sysnames && refcount_dec_and_test(&sysnames->usage)) {
+ for (i = 0; i < sysnames->nr; i++)
+ if (sysnames->subs[i] != afs_init_sysname &&
+ sysnames->subs[i] != sysnames->blank)
+ kfree(sysnames->subs[i]);
+ kfree(sysnames);
+ }
+}
+
+/*
+ * Display general per-net namespace statistics
+ */
+static int afs_proc_stats_show(struct seq_file *m, void *v)
+{
+ struct afs_net *net = afs_seq2net_single(m);
+
+ seq_puts(m, "kAFS statistics\n");
+
+ seq_printf(m, "dir-mgmt: look=%u reval=%u inval=%u relpg=%u\n",
+ atomic_read(&net->n_lookup),
+ atomic_read(&net->n_reval),
+ atomic_read(&net->n_inval),
+ atomic_read(&net->n_relpg));
+
+ seq_printf(m, "dir-data: rdpg=%u\n",
+ atomic_read(&net->n_read_dir));
+
+ seq_printf(m, "dir-edit: cr=%u rm=%u\n",
+ atomic_read(&net->n_dir_cr),
+ atomic_read(&net->n_dir_rm));
+
+ seq_printf(m, "file-rd : n=%u nb=%lu\n",
+ atomic_read(&net->n_fetches),
+ atomic_long_read(&net->n_fetch_bytes));
+ seq_printf(m, "file-wr : n=%u nb=%lu\n",
+ atomic_read(&net->n_stores),
+ atomic_long_read(&net->n_store_bytes));
+ return 0;
+}
+
+/*
+ * initialise /proc/fs/afs/<cell>/
+ */
+int afs_proc_cell_setup(struct afs_cell *cell)
+{
+ struct proc_dir_entry *dir;
+ struct afs_net *net = cell->net;
+
+ _enter("%p{%s},%p", cell, cell->name, net->proc_afs);
+
+ dir = proc_net_mkdir(net->net, cell->name, net->proc_afs);
+ if (!dir)
+ goto error_dir;
+
+ if (!proc_create_net_data("vlservers", 0444, dir,
+ &afs_proc_cell_vlservers_ops,
+ sizeof(struct afs_vl_seq_net_private),
+ cell) ||
+ !proc_create_net_data("volumes", 0444, dir,
+ &afs_proc_cell_volumes_ops,
+ sizeof(struct seq_net_private),
+ cell))
+ goto error_tree;
+
+ _leave(" = 0");
+ return 0;
+
+error_tree:
+ remove_proc_subtree(cell->name, net->proc_afs);
+error_dir:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+
+/*
+ * remove /proc/fs/afs/<cell>/
+ */
+void afs_proc_cell_remove(struct afs_cell *cell)
+{
+ struct afs_net *net = cell->net;
+
+ _enter("");
+ remove_proc_subtree(cell->name, net->proc_afs);
+ _leave("");
+}
+
+/*
+ * initialise the /proc/fs/afs/ directory
+ */
+int afs_proc_init(struct afs_net *net)
+{
+ struct proc_dir_entry *p;
+
+ _enter("");
+
+ p = proc_net_mkdir(net->net, "afs", net->net->proc_net);
+ if (!p)
+ goto error_dir;
+
+ if (!proc_create_net_data_write("cells", 0644, p,
+ &afs_proc_cells_ops,
+ afs_proc_cells_write,
+ sizeof(struct seq_net_private),
+ NULL) ||
+ !proc_create_net_single_write("rootcell", 0644, p,
+ afs_proc_rootcell_show,
+ afs_proc_rootcell_write,
+ NULL) ||
+ !proc_create_net("servers", 0444, p, &afs_proc_servers_ops,
+ sizeof(struct seq_net_private)) ||
+ !proc_create_net_single("stats", 0444, p, afs_proc_stats_show, NULL) ||
+ !proc_create_net_data_write("sysname", 0644, p,
+ &afs_proc_sysname_ops,
+ afs_proc_sysname_write,
+ sizeof(struct seq_net_private),
+ NULL))
+ goto error_tree;
+
+ net->proc_afs = p;
+ _leave(" = 0");
+ return 0;
+
+error_tree:
+ proc_remove(p);
+error_dir:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/afs/ directory
+ */
+void afs_proc_cleanup(struct afs_net *net)
+{
+ proc_remove(net->proc_afs);
+ net->proc_afs = NULL;
+}
diff --git a/fs/afs/protocol_afs.h b/fs/afs/protocol_afs.h
new file mode 100644
index 000000000..0c39358c8
--- /dev/null
+++ b/fs/afs/protocol_afs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AFS protocol bits
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+
+#define AFSCAPABILITIESMAX 196 /* Maximum number of words in a capability set */
+
+/* AFS3 Fileserver capabilities word 0 */
+#define AFS3_VICED_CAPABILITY_ERRORTRANS 0x0001 /* Uses UAE errors */
+#define AFS3_VICED_CAPABILITY_64BITFILES 0x0002 /* FetchData64 & StoreData64 supported */
+#define AFS3_VICED_CAPABILITY_WRITELOCKACL 0x0004 /* Can lock a file even without lock perm */
+#define AFS3_VICED_CAPABILITY_SANEACLS 0x0008 /* ACLs reviewed for sanity - don't use */
diff --git a/fs/afs/protocol_uae.h b/fs/afs/protocol_uae.h
new file mode 100644
index 000000000..1b3d1060b
--- /dev/null
+++ b/fs/afs/protocol_uae.h
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Universal AFS Error codes (UAE).
+ *
+ * Copyright (C) 2003, Daria Phoebe Brashear
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ */
+
+enum {
+ UAEPERM = 0x2f6df00, /* Operation not permitted */
+ UAENOENT = 0x2f6df01, /* No such file or directory */
+ UAESRCH = 0x2f6df02, /* No such process */
+ UAEINTR = 0x2f6df03, /* Interrupted system call */
+ UAEIO = 0x2f6df04, /* I/O error */
+ UAENXIO = 0x2f6df05, /* No such device or address */
+ UAE2BIG = 0x2f6df06, /* Arg list too long */
+ UAENOEXEC = 0x2f6df07, /* Exec format error */
+ UAEBADF = 0x2f6df08, /* Bad file number */
+ UAECHILD = 0x2f6df09, /* No child processes */
+ UAEAGAIN = 0x2f6df0a, /* Try again */
+ UAENOMEM = 0x2f6df0b, /* Out of memory */
+ UAEACCES = 0x2f6df0c, /* Permission denied */
+ UAEFAULT = 0x2f6df0d, /* Bad address */
+ UAENOTBLK = 0x2f6df0e, /* Block device required */
+ UAEBUSY = 0x2f6df0f, /* Device or resource busy */
+ UAEEXIST = 0x2f6df10, /* File exists */
+ UAEXDEV = 0x2f6df11, /* Cross-device link */
+ UAENODEV = 0x2f6df12, /* No such device */
+ UAENOTDIR = 0x2f6df13, /* Not a directory */
+ UAEISDIR = 0x2f6df14, /* Is a directory */
+ UAEINVAL = 0x2f6df15, /* Invalid argument */
+ UAENFILE = 0x2f6df16, /* File table overflow */
+ UAEMFILE = 0x2f6df17, /* Too many open files */
+ UAENOTTY = 0x2f6df18, /* Not a typewriter */
+ UAETXTBSY = 0x2f6df19, /* Text file busy */
+ UAEFBIG = 0x2f6df1a, /* File too large */
+ UAENOSPC = 0x2f6df1b, /* No space left on device */
+ UAESPIPE = 0x2f6df1c, /* Illegal seek */
+ UAEROFS = 0x2f6df1d, /* Read-only file system */
+ UAEMLINK = 0x2f6df1e, /* Too many links */
+ UAEPIPE = 0x2f6df1f, /* Broken pipe */
+ UAEDOM = 0x2f6df20, /* Math argument out of domain of func */
+ UAERANGE = 0x2f6df21, /* Math result not representable */
+ UAEDEADLK = 0x2f6df22, /* Resource deadlock would occur */
+ UAENAMETOOLONG = 0x2f6df23, /* File name too long */
+ UAENOLCK = 0x2f6df24, /* No record locks available */
+ UAENOSYS = 0x2f6df25, /* Function not implemented */
+ UAENOTEMPTY = 0x2f6df26, /* Directory not empty */
+ UAELOOP = 0x2f6df27, /* Too many symbolic links encountered */
+ UAEWOULDBLOCK = 0x2f6df28, /* Operation would block */
+ UAENOMSG = 0x2f6df29, /* No message of desired type */
+ UAEIDRM = 0x2f6df2a, /* Identifier removed */
+ UAECHRNG = 0x2f6df2b, /* Channel number out of range */
+ UAEL2NSYNC = 0x2f6df2c, /* Level 2 not synchronized */
+ UAEL3HLT = 0x2f6df2d, /* Level 3 halted */
+ UAEL3RST = 0x2f6df2e, /* Level 3 reset */
+ UAELNRNG = 0x2f6df2f, /* Link number out of range */
+ UAEUNATCH = 0x2f6df30, /* Protocol driver not attached */
+ UAENOCSI = 0x2f6df31, /* No CSI structure available */
+ UAEL2HLT = 0x2f6df32, /* Level 2 halted */
+ UAEBADE = 0x2f6df33, /* Invalid exchange */
+ UAEBADR = 0x2f6df34, /* Invalid request descriptor */
+ UAEXFULL = 0x2f6df35, /* Exchange full */
+ UAENOANO = 0x2f6df36, /* No anode */
+ UAEBADRQC = 0x2f6df37, /* Invalid request code */
+ UAEBADSLT = 0x2f6df38, /* Invalid slot */
+ UAEBFONT = 0x2f6df39, /* Bad font file format */
+ UAENOSTR = 0x2f6df3a, /* Device not a stream */
+ UAENODATA = 0x2f6df3b, /* No data available */
+ UAETIME = 0x2f6df3c, /* Timer expired */
+ UAENOSR = 0x2f6df3d, /* Out of streams resources */
+ UAENONET = 0x2f6df3e, /* Machine is not on the network */
+ UAENOPKG = 0x2f6df3f, /* Package not installed */
+ UAEREMOTE = 0x2f6df40, /* Object is remote */
+ UAENOLINK = 0x2f6df41, /* Link has been severed */
+ UAEADV = 0x2f6df42, /* Advertise error */
+ UAESRMNT = 0x2f6df43, /* Srmount error */
+ UAECOMM = 0x2f6df44, /* Communication error on send */
+ UAEPROTO = 0x2f6df45, /* Protocol error */
+ UAEMULTIHOP = 0x2f6df46, /* Multihop attempted */
+ UAEDOTDOT = 0x2f6df47, /* RFS specific error */
+ UAEBADMSG = 0x2f6df48, /* Not a data message */
+ UAEOVERFLOW = 0x2f6df49, /* Value too large for defined data type */
+ UAENOTUNIQ = 0x2f6df4a, /* Name not unique on network */
+ UAEBADFD = 0x2f6df4b, /* File descriptor in bad state */
+ UAEREMCHG = 0x2f6df4c, /* Remote address changed */
+ UAELIBACC = 0x2f6df4d, /* Can not access a needed shared library */
+ UAELIBBAD = 0x2f6df4e, /* Accessing a corrupted shared library */
+ UAELIBSCN = 0x2f6df4f, /* .lib section in a.out corrupted */
+ UAELIBMAX = 0x2f6df50, /* Attempting to link in too many shared libraries */
+ UAELIBEXEC = 0x2f6df51, /* Cannot exec a shared library directly */
+ UAEILSEQ = 0x2f6df52, /* Illegal byte sequence */
+ UAERESTART = 0x2f6df53, /* Interrupted system call should be restarted */
+ UAESTRPIPE = 0x2f6df54, /* Streams pipe error */
+ UAEUSERS = 0x2f6df55, /* Too many users */
+ UAENOTSOCK = 0x2f6df56, /* Socket operation on non-socket */
+ UAEDESTADDRREQ = 0x2f6df57, /* Destination address required */
+ UAEMSGSIZE = 0x2f6df58, /* Message too long */
+ UAEPROTOTYPE = 0x2f6df59, /* Protocol wrong type for socket */
+ UAENOPROTOOPT = 0x2f6df5a, /* Protocol not available */
+ UAEPROTONOSUPPORT = 0x2f6df5b, /* Protocol not supported */
+ UAESOCKTNOSUPPORT = 0x2f6df5c, /* Socket type not supported */
+ UAEOPNOTSUPP = 0x2f6df5d, /* Operation not supported on transport endpoint */
+ UAEPFNOSUPPORT = 0x2f6df5e, /* Protocol family not supported */
+ UAEAFNOSUPPORT = 0x2f6df5f, /* Address family not supported by protocol */
+ UAEADDRINUSE = 0x2f6df60, /* Address already in use */
+ UAEADDRNOTAVAIL = 0x2f6df61, /* Cannot assign requested address */
+ UAENETDOWN = 0x2f6df62, /* Network is down */
+ UAENETUNREACH = 0x2f6df63, /* Network is unreachable */
+ UAENETRESET = 0x2f6df64, /* Network dropped connection because of reset */
+ UAECONNABORTED = 0x2f6df65, /* Software caused connection abort */
+ UAECONNRESET = 0x2f6df66, /* Connection reset by peer */
+ UAENOBUFS = 0x2f6df67, /* No buffer space available */
+ UAEISCONN = 0x2f6df68, /* Transport endpoint is already connected */
+ UAENOTCONN = 0x2f6df69, /* Transport endpoint is not connected */
+ UAESHUTDOWN = 0x2f6df6a, /* Cannot send after transport endpoint shutdown */
+ UAETOOMANYREFS = 0x2f6df6b, /* Too many references: cannot splice */
+ UAETIMEDOUT = 0x2f6df6c, /* Connection timed out */
+ UAECONNREFUSED = 0x2f6df6d, /* Connection refused */
+ UAEHOSTDOWN = 0x2f6df6e, /* Host is down */
+ UAEHOSTUNREACH = 0x2f6df6f, /* No route to host */
+ UAEALREADY = 0x2f6df70, /* Operation already in progress */
+ UAEINPROGRESS = 0x2f6df71, /* Operation now in progress */
+ UAESTALE = 0x2f6df72, /* Stale NFS file handle */
+ UAEUCLEAN = 0x2f6df73, /* Structure needs cleaning */
+ UAENOTNAM = 0x2f6df74, /* Not a XENIX named type file */
+ UAENAVAIL = 0x2f6df75, /* No XENIX semaphores available */
+ UAEISNAM = 0x2f6df76, /* Is a named type file */
+ UAEREMOTEIO = 0x2f6df77, /* Remote I/O error */
+ UAEDQUOT = 0x2f6df78, /* Quota exceeded */
+ UAENOMEDIUM = 0x2f6df79, /* No medium found */
+ UAEMEDIUMTYPE = 0x2f6df7a, /* Wrong medium type */
+};
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
new file mode 100644
index 000000000..e4cd89c44
--- /dev/null
+++ b/fs/afs/protocol_yfs.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* YFS protocol bits
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define YFS_FS_SERVICE 2500
+#define YFS_CM_SERVICE 2501
+
+#define YFSCBMAX 1024
+
+enum YFS_CM_Operations {
+ YFSCBProbe = 206, /* probe client */
+ YFSCBGetLock = 207, /* get contents of CM lock table */
+ YFSCBXStatsVersion = 209, /* get version of extended statistics */
+ YFSCBGetXStats = 210, /* get contents of extended statistics data */
+ YFSCBInitCallBackState3 = 213, /* initialise callback state, version 3 */
+ YFSCBProbeUuid = 214, /* check the client hasn't rebooted */
+ YFSCBGetServerPrefs = 215,
+ YFSCBGetCellServDV = 216,
+ YFSCBGetLocalCell = 217,
+ YFSCBGetCacheConfig = 218,
+ YFSCBGetCellByNum = 65537,
+ YFSCBTellMeAboutYourself = 65538, /* get client capabilities */
+ YFSCBCallBack = 64204,
+};
+
+enum YFS_FS_Operations {
+ YFSFETCHACL = 64131, /* YFS Fetch file AFS3 ACL */
+ YFSFETCHSTATUS = 64132, /* YFS Fetch file status */
+ YFSSTOREACL = 64134, /* YFS Store file AFS3 ACL */
+ YFSSTORESTATUS = 64135, /* YFS Store file status */
+ YFSREMOVEFILE = 64136, /* YFS Remove a file */
+ YFSCREATEFILE = 64137, /* YFS Create a file */
+ YFSRENAME = 64138, /* YFS Rename or move a file or directory */
+ YFSSYMLINK = 64139, /* YFS Create a symbolic link */
+ YFSLINK = 64140, /* YFS Create a hard link */
+ YFSMAKEDIR = 64141, /* YFS Create a directory */
+ YFSREMOVEDIR = 64142, /* YFS Remove a directory */
+ YFSGETVOLUMESTATUS = 64149, /* YFS Get volume status information */
+ YFSSETVOLUMESTATUS = 64150, /* YFS Set volume status information */
+ YFSSETLOCK = 64156, /* YFS Request a file lock */
+ YFSEXTENDLOCK = 64157, /* YFS Extend a file lock */
+ YFSRELEASELOCK = 64158, /* YFS Release a file lock */
+ YFSLOOKUP = 64161, /* YFS lookup file in directory */
+ YFSFLUSHCPS = 64165,
+ YFSFETCHOPAQUEACL = 64168, /* YFS Fetch file YFS ACL */
+ YFSWHOAMI = 64170,
+ YFSREMOVEACL = 64171,
+ YFSREMOVEFILE2 = 64173,
+ YFSSTOREOPAQUEACL2 = 64174,
+ YFSINLINEBULKSTATUS = 64536, /* YFS Fetch multiple file statuses with errors */
+ YFSFETCHDATA64 = 64537, /* YFS Fetch file data */
+ YFSSTOREDATA64 = 64538, /* YFS Store file data */
+ YFSUPDATESYMLINK = 64540,
+};
+
+struct yfs_xdr_u64 {
+ __be32 msw;
+ __be32 lsw;
+} __packed;
+
+static inline u64 xdr_to_u64(const struct yfs_xdr_u64 x)
+{
+ return ((u64)ntohl(x.msw) << 32) | ntohl(x.lsw);
+}
+
+static inline struct yfs_xdr_u64 u64_to_xdr(const u64 x)
+{
+ return (struct yfs_xdr_u64){ .msw = htonl(x >> 32), .lsw = htonl(x) };
+}
+
+struct yfs_xdr_vnode {
+ struct yfs_xdr_u64 lo;
+ __be32 hi;
+ __be32 unique;
+} __packed;
+
+struct yfs_xdr_YFSFid {
+ struct yfs_xdr_u64 volume;
+ struct yfs_xdr_vnode vnode;
+} __packed;
+
+
+struct yfs_xdr_YFSFetchStatus {
+ __be32 type;
+ __be32 nlink;
+ struct yfs_xdr_u64 size;
+ struct yfs_xdr_u64 data_version;
+ struct yfs_xdr_u64 author;
+ struct yfs_xdr_u64 owner;
+ struct yfs_xdr_u64 group;
+ __be32 mode;
+ __be32 caller_access;
+ __be32 anon_access;
+ struct yfs_xdr_vnode parent;
+ __be32 data_access_protocol;
+ struct yfs_xdr_u64 mtime_client;
+ struct yfs_xdr_u64 mtime_server;
+ __be32 lock_count;
+ __be32 abort_code;
+} __packed;
+
+struct yfs_xdr_YFSCallBack {
+ __be32 version;
+ struct yfs_xdr_u64 expiration_time;
+ __be32 type;
+} __packed;
+
+struct yfs_xdr_YFSStoreStatus {
+ __be32 mask;
+ __be32 mode;
+ struct yfs_xdr_u64 mtime_client;
+ struct yfs_xdr_u64 owner;
+ struct yfs_xdr_u64 group;
+} __packed;
+
+struct yfs_xdr_RPCFlags {
+ __be32 rpc_flags;
+} __packed;
+
+struct yfs_xdr_YFSVolSync {
+ struct yfs_xdr_u64 vol_creation_date;
+ struct yfs_xdr_u64 vol_update_date;
+ struct yfs_xdr_u64 max_quota;
+ struct yfs_xdr_u64 blocks_in_use;
+ struct yfs_xdr_u64 blocks_avail;
+} __packed;
+
+enum yfs_volume_type {
+ yfs_volume_type_ro = 0,
+ yfs_volume_type_rw = 1,
+};
+
+#define yfs_FVSOnline 0x1
+#define yfs_FVSInservice 0x2
+#define yfs_FVSBlessed 0x4
+#define yfs_FVSNeedsSalvage 0x8
+
+struct yfs_xdr_YFSFetchVolumeStatus {
+ struct yfs_xdr_u64 vid;
+ struct yfs_xdr_u64 parent_id;
+ __be32 flags;
+ __be32 type;
+ struct yfs_xdr_u64 max_quota;
+ struct yfs_xdr_u64 blocks_in_use;
+ struct yfs_xdr_u64 part_blocks_avail;
+ struct yfs_xdr_u64 part_max_blocks;
+ struct yfs_xdr_u64 vol_copy_date;
+ struct yfs_xdr_u64 vol_backup_date;
+} __packed;
+
+struct yfs_xdr_YFSStoreVolumeStatus {
+ __be32 mask;
+ struct yfs_xdr_u64 min_quota;
+ struct yfs_xdr_u64 max_quota;
+ struct yfs_xdr_u64 file_quota;
+} __packed;
+
+enum yfs_lock_type {
+ yfs_LockNone = -1,
+ yfs_LockRead = 0,
+ yfs_LockWrite = 1,
+ yfs_LockExtend = 2,
+ yfs_LockRelease = 3,
+ yfs_LockMandatoryRead = 0x100,
+ yfs_LockMandatoryWrite = 0x101,
+ yfs_LockMandatoryExtend = 0x102,
+};
+
+/* RXYFS Viced Capability Flags */
+#define YFS_VICED_CAPABILITY_ERRORTRANS 0x0001 /* Deprecated v0.195 */
+#define YFS_VICED_CAPABILITY_64BITFILES 0x0002 /* Deprecated v0.195 */
+#define YFS_VICED_CAPABILITY_WRITELOCKACL 0x0004 /* Can lock a file even without lock perm */
+#define YFS_VICED_CAPABILITY_SANEACLS 0x0008 /* Deprecated v0.195 */
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
new file mode 100644
index 000000000..a840c3588
--- /dev/null
+++ b/fs/afs/rotate.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Handle fileserver selection and rotation.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/sched/signal.h>
+#include "internal.h"
+#include "afs_fs.h"
+
+/*
+ * Begin iteration through a server list, starting with the vnode's last used
+ * server if possible, or the last recorded good server if not.
+ */
+static bool afs_start_fs_iteration(struct afs_operation *op,
+ struct afs_vnode *vnode)
+{
+ struct afs_server *server;
+ void *cb_server;
+ int i;
+
+ read_lock(&op->volume->servers_lock);
+ op->server_list = afs_get_serverlist(
+ rcu_dereference_protected(op->volume->servers,
+ lockdep_is_held(&op->volume->servers_lock)));
+ read_unlock(&op->volume->servers_lock);
+
+ op->untried = (1UL << op->server_list->nr_servers) - 1;
+ op->index = READ_ONCE(op->server_list->preferred);
+
+ cb_server = vnode->cb_server;
+ if (cb_server) {
+ /* See if the vnode's preferred record is still available */
+ for (i = 0; i < op->server_list->nr_servers; i++) {
+ server = op->server_list->servers[i].server;
+ if (server == cb_server) {
+ op->index = i;
+ goto found_interest;
+ }
+ }
+
+ /* If we have a lock outstanding on a server that's no longer
+ * serving this vnode, then we can't switch to another server
+ * and have to return an error.
+ */
+ if (op->flags & AFS_OPERATION_CUR_ONLY) {
+ op->error = -ESTALE;
+ return false;
+ }
+
+ /* Note that the callback promise is effectively broken */
+ write_seqlock(&vnode->cb_lock);
+ ASSERTCMP(cb_server, ==, vnode->cb_server);
+ vnode->cb_server = NULL;
+ if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ vnode->cb_break++;
+ write_sequnlock(&vnode->cb_lock);
+ }
+
+found_interest:
+ return true;
+}
+
+/*
+ * Post volume busy note.
+ */
+static void afs_busy(struct afs_volume *volume, u32 abort_code)
+{
+ const char *m;
+
+ switch (abort_code) {
+ case VOFFLINE: m = "offline"; break;
+ case VRESTARTING: m = "restarting"; break;
+ case VSALVAGING: m = "being salvaged"; break;
+ default: m = "busy"; break;
+ }
+
+ pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
+}
+
+/*
+ * Sleep and retry the operation to the same fileserver.
+ */
+static bool afs_sleep_and_retry(struct afs_operation *op)
+{
+ if (!(op->flags & AFS_OPERATION_UNINTR)) {
+ msleep_interruptible(1000);
+ if (signal_pending(current)) {
+ op->error = -ERESTARTSYS;
+ return false;
+ }
+ } else {
+ msleep(1000);
+ }
+
+ return true;
+}
+
+/*
+ * Select the fileserver to use. May be called multiple times to rotate
+ * through the fileservers.
+ */
+bool afs_select_fileserver(struct afs_operation *op)
+{
+ struct afs_addr_list *alist;
+ struct afs_server *server;
+ struct afs_vnode *vnode = op->file[0].vnode;
+ struct afs_error e;
+ u32 rtt;
+ int error = op->ac.error, i;
+
+ _enter("%lx[%d],%lx[%d],%d,%d",
+ op->untried, op->index,
+ op->ac.tried, op->ac.index,
+ error, op->ac.abort_code);
+
+ if (op->flags & AFS_OPERATION_STOP) {
+ _leave(" = f [stopped]");
+ return false;
+ }
+
+ op->nr_iterations++;
+
+ /* Evaluate the result of the previous operation, if there was one. */
+ switch (error) {
+ case SHRT_MAX:
+ goto start;
+
+ case 0:
+ default:
+ /* Success or local failure. Stop. */
+ op->error = error;
+ op->flags |= AFS_OPERATION_STOP;
+ _leave(" = f [okay/local %d]", error);
+ return false;
+
+ case -ECONNABORTED:
+ /* The far side rejected the operation on some grounds. This
+ * might involve the server being busy or the volume having been moved.
+ */
+ switch (op->ac.abort_code) {
+ case VNOVOL:
+ /* This fileserver doesn't know about the volume.
+ * - May indicate that the VL is wrong - retry once and compare
+ * the results.
+ * - May indicate that the fileserver couldn't attach to the vol.
+ */
+ if (op->flags & AFS_OPERATION_VNOVOL) {
+ op->error = -EREMOTEIO;
+ goto next_server;
+ }
+
+ write_lock(&op->volume->servers_lock);
+ op->server_list->vnovol_mask |= 1 << op->index;
+ write_unlock(&op->volume->servers_lock);
+
+ set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
+ error = afs_check_volume_status(op->volume, op);
+ if (error < 0)
+ goto failed_set_error;
+
+ if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
+ op->error = -ENOMEDIUM;
+ goto failed;
+ }
+
+ /* If the server list didn't change, then assume that
+ * it's the fileserver having trouble.
+ */
+ if (rcu_access_pointer(op->volume->servers) == op->server_list) {
+ op->error = -EREMOTEIO;
+ goto next_server;
+ }
+
+ /* Try again */
+ op->flags |= AFS_OPERATION_VNOVOL;
+ _leave(" = t [vnovol]");
+ return true;
+
+ case VSALVAGE: /* TODO: Should this return an error or iterate? */
+ case VVOLEXISTS:
+ case VNOSERVICE:
+ case VONLINE:
+ case VDISKFULL:
+ case VOVERQUOTA:
+ op->error = afs_abort_to_error(op->ac.abort_code);
+ goto next_server;
+
+ case VOFFLINE:
+ if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
+ afs_busy(op->volume, op->ac.abort_code);
+ clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
+ }
+ if (op->flags & AFS_OPERATION_NO_VSLEEP) {
+ op->error = -EADV;
+ goto failed;
+ }
+ if (op->flags & AFS_OPERATION_CUR_ONLY) {
+ op->error = -ESTALE;
+ goto failed;
+ }
+ goto busy;
+
+ case VSALVAGING:
+ case VRESTARTING:
+ case VBUSY:
+ /* Retry after going round all the servers unless we
+ * have a file lock we need to maintain.
+ */
+ if (op->flags & AFS_OPERATION_NO_VSLEEP) {
+ op->error = -EBUSY;
+ goto failed;
+ }
+ if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
+ afs_busy(op->volume, op->ac.abort_code);
+ clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
+ }
+ busy:
+ if (op->flags & AFS_OPERATION_CUR_ONLY) {
+ if (!afs_sleep_and_retry(op))
+ goto failed;
+
+ /* Retry with same server & address */
+ _leave(" = t [vbusy]");
+ return true;
+ }
+
+ op->flags |= AFS_OPERATION_VBUSY;
+ goto next_server;
+
+ case VMOVED:
+ /* The volume migrated to another server. We consider
+ * consider all locks and callbacks broken and request
+ * an update from the VLDB.
+ *
+ * We also limit the number of VMOVED hops we will
+ * honour, just in case someone sets up a loop.
+ */
+ if (op->flags & AFS_OPERATION_VMOVED) {
+ op->error = -EREMOTEIO;
+ goto failed;
+ }
+ op->flags |= AFS_OPERATION_VMOVED;
+
+ set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
+ set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
+ error = afs_check_volume_status(op->volume, op);
+ if (error < 0)
+ goto failed_set_error;
+
+ /* If the server list didn't change, then the VLDB is
+ * out of sync with the fileservers. This is hopefully
+ * a temporary condition, however, so we don't want to
+ * permanently block access to the file.
+ *
+ * TODO: Try other fileservers if we can.
+ *
+ * TODO: Retry a few times with sleeps.
+ */
+ if (rcu_access_pointer(op->volume->servers) == op->server_list) {
+ op->error = -ENOMEDIUM;
+ goto failed;
+ }
+
+ goto restart_from_beginning;
+
+ default:
+ clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
+ clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
+ op->error = afs_abort_to_error(op->ac.abort_code);
+ goto failed;
+ }
+
+ case -ETIMEDOUT:
+ case -ETIME:
+ if (op->error != -EDESTADDRREQ)
+ goto iterate_address;
+ fallthrough;
+ case -ERFKILL:
+ case -EADDRNOTAVAIL:
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -EHOSTDOWN:
+ case -ECONNREFUSED:
+ _debug("no conn");
+ op->error = error;
+ goto iterate_address;
+
+ case -ENETRESET:
+ pr_warn("kAFS: Peer reset %s (op=%x)\n",
+ op->type ? op->type->name : "???", op->debug_id);
+ fallthrough;
+ case -ECONNRESET:
+ _debug("call reset");
+ op->error = error;
+ goto failed;
+ }
+
+restart_from_beginning:
+ _debug("restart");
+ afs_end_cursor(&op->ac);
+ op->server = NULL;
+ afs_put_serverlist(op->net, op->server_list);
+ op->server_list = NULL;
+start:
+ _debug("start");
+ /* See if we need to do an update of the volume record. Note that the
+ * volume may have moved or even have been deleted.
+ */
+ error = afs_check_volume_status(op->volume, op);
+ if (error < 0)
+ goto failed_set_error;
+
+ if (!afs_start_fs_iteration(op, vnode))
+ goto failed;
+
+ _debug("__ VOL %llx __", op->volume->vid);
+
+pick_server:
+ _debug("pick [%lx]", op->untried);
+
+ error = afs_wait_for_fs_probes(op->server_list, op->untried);
+ if (error < 0)
+ goto failed_set_error;
+
+ /* Pick the untried server with the lowest RTT. If we have outstanding
+ * callbacks, we stick with the server we're already using if we can.
+ */
+ if (op->server) {
+ _debug("server %u", op->index);
+ if (test_bit(op->index, &op->untried))
+ goto selected_server;
+ op->server = NULL;
+ _debug("no server");
+ }
+
+ op->index = -1;
+ rtt = U32_MAX;
+ for (i = 0; i < op->server_list->nr_servers; i++) {
+ struct afs_server *s = op->server_list->servers[i].server;
+
+ if (!test_bit(i, &op->untried) ||
+ !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
+ continue;
+ if (s->probe.rtt < rtt) {
+ op->index = i;
+ rtt = s->probe.rtt;
+ }
+ }
+
+ if (op->index == -1)
+ goto no_more_servers;
+
+selected_server:
+ _debug("use %d", op->index);
+ __clear_bit(op->index, &op->untried);
+
+ /* We're starting on a different fileserver from the list. We need to
+ * check it, create a callback intercept, find its address list and
+ * probe its capabilities before we use it.
+ */
+ ASSERTCMP(op->ac.alist, ==, NULL);
+ server = op->server_list->servers[op->index].server;
+
+ if (!afs_check_server_record(op, server))
+ goto failed;
+
+ _debug("USING SERVER: %pU", &server->uuid);
+
+ op->flags |= AFS_OPERATION_RETRY_SERVER;
+ op->server = server;
+ if (vnode->cb_server != server) {
+ vnode->cb_server = server;
+ vnode->cb_s_break = server->cb_s_break;
+ vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break);
+ vnode->cb_v_break = vnode->volume->cb_v_break;
+ clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
+
+ read_lock(&server->fs_lock);
+ alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->fs_lock));
+ afs_get_addrlist(alist);
+ read_unlock(&server->fs_lock);
+
+retry_server:
+ memset(&op->ac, 0, sizeof(op->ac));
+
+ if (!op->ac.alist)
+ op->ac.alist = alist;
+ else
+ afs_put_addrlist(alist);
+
+ op->ac.index = -1;
+
+iterate_address:
+ ASSERT(op->ac.alist);
+ /* Iterate over the current server's address list to try and find an
+ * address on which it will respond to us.
+ */
+ if (!afs_iterate_addresses(&op->ac))
+ goto out_of_addresses;
+
+ _debug("address [%u] %u/%u %pISp",
+ op->index, op->ac.index, op->ac.alist->nr_addrs,
+ &op->ac.alist->addrs[op->ac.index].transport);
+
+ _leave(" = t");
+ return true;
+
+out_of_addresses:
+ /* We've now had a failure to respond on all of a server's addresses -
+ * immediately probe them again and consider retrying the server.
+ */
+ afs_probe_fileserver(op->net, op->server);
+ if (op->flags & AFS_OPERATION_RETRY_SERVER) {
+ alist = op->ac.alist;
+ error = afs_wait_for_one_fs_probe(
+ op->server, !(op->flags & AFS_OPERATION_UNINTR));
+ switch (error) {
+ case 0:
+ op->flags &= ~AFS_OPERATION_RETRY_SERVER;
+ goto retry_server;
+ case -ERESTARTSYS:
+ goto failed_set_error;
+ case -ETIME:
+ case -EDESTADDRREQ:
+ goto next_server;
+ }
+ }
+
+next_server:
+ _debug("next");
+ afs_end_cursor(&op->ac);
+ goto pick_server;
+
+no_more_servers:
+ /* That's all the servers poked to no good effect. Try again if some
+ * of them were busy.
+ */
+ if (op->flags & AFS_OPERATION_VBUSY)
+ goto restart_from_beginning;
+
+ e.error = -EDESTADDRREQ;
+ e.responded = false;
+ for (i = 0; i < op->server_list->nr_servers; i++) {
+ struct afs_server *s = op->server_list->servers[i].server;
+
+ afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+ s->probe.abort_code);
+ }
+
+ error = e.error;
+
+failed_set_error:
+ op->error = error;
+failed:
+ op->flags |= AFS_OPERATION_STOP;
+ afs_end_cursor(&op->ac);
+ _leave(" = f [failed %d]", op->error);
+ return false;
+}
+
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+void afs_dump_edestaddrreq(const struct afs_operation *op)
+{
+ static int count;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+ return;
+ count++;
+
+ rcu_read_lock();
+
+ pr_notice("EDESTADDR occurred\n");
+ pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n",
+ op->file[0].cb_break_before,
+ op->file[1].cb_break_before, op->flags, op->error);
+ pr_notice("FC: ut=%lx ix=%d ni=%u\n",
+ op->untried, op->index, op->nr_iterations);
+
+ if (op->server_list) {
+ const struct afs_server_list *sl = op->server_list;
+ pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
+ sl->nr_servers, sl->preferred, sl->vnovol_mask);
+ for (i = 0; i < sl->nr_servers; i++) {
+ const struct afs_server *s = sl->servers[i].server;
+ pr_notice("FC: server fl=%lx av=%u %pU\n",
+ s->flags, s->addr_version, &s->uuid);
+ if (s->addresses) {
+ const struct afs_addr_list *a =
+ rcu_dereference(s->addresses);
+ pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n",
+ a->version,
+ a->nr_ipv4, a->nr_addrs, a->max_addrs,
+ a->preferred);
+ pr_notice("FC: - R=%lx F=%lx\n",
+ a->responded, a->failed);
+ if (a == op->ac.alist)
+ pr_notice("FC: - current\n");
+ }
+ }
+ }
+
+ pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
+ op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error,
+ op->ac.responded, op->ac.nr_iterations);
+ rcu_read_unlock();
+}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
new file mode 100644
index 000000000..37036db63
--- /dev/null
+++ b/fs/afs/rxrpc.c
@@ -0,0 +1,938 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Maintain an RxRPC server socket to do AFS communications through
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/sched/signal.h>
+
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "internal.h"
+#include "afs_cm.h"
+#include "protocol_yfs.h"
+
+struct workqueue_struct *afs_async_calls;
+
+static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_process_async_call(struct work_struct *);
+static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
+static int afs_deliver_cm_op_id(struct afs_call *);
+
+/* asynchronous incoming call initial processing */
+static const struct afs_call_type afs_RXCMxxxx = {
+ .name = "CB.xxxx",
+ .deliver = afs_deliver_cm_op_id,
+};
+
+/*
+ * open an RxRPC socket and bind it to be a server for callback notifications
+ * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
+ */
+int afs_open_socket(struct afs_net *net)
+{
+ struct sockaddr_rxrpc srx;
+ struct socket *socket;
+ int ret;
+
+ _enter("");
+
+ ret = sock_create_kern(net->net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket);
+ if (ret < 0)
+ goto error_1;
+
+ socket->sk->sk_allocation = GFP_NOFS;
+
+ /* bind the callback manager's address to make this a server socket */
+ memset(&srx, 0, sizeof(srx));
+ srx.srx_family = AF_RXRPC;
+ srx.srx_service = CM_SERVICE;
+ srx.transport_type = SOCK_DGRAM;
+ srx.transport_len = sizeof(srx.transport.sin6);
+ srx.transport.sin6.sin6_family = AF_INET6;
+ srx.transport.sin6.sin6_port = htons(AFS_CM_PORT);
+
+ ret = rxrpc_sock_set_min_security_level(socket->sk,
+ RXRPC_SECURITY_ENCRYPT);
+ if (ret < 0)
+ goto error_2;
+
+ ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+ if (ret == -EADDRINUSE) {
+ srx.transport.sin6.sin6_port = 0;
+ ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+ }
+ if (ret < 0)
+ goto error_2;
+
+ srx.srx_service = YFS_CM_SERVICE;
+ ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+ if (ret < 0)
+ goto error_2;
+
+ /* Ideally, we'd turn on service upgrade here, but we can't because
+ * OpenAFS is buggy and leaks the userStatus field from packet to
+ * packet and between FS packets and CB packets - so if we try to do an
+ * upgrade on an FS packet, OpenAFS will leak that into the CB packet
+ * it sends back to us.
+ */
+
+ rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
+ afs_rx_discard_new_call);
+
+ ret = kernel_listen(socket, INT_MAX);
+ if (ret < 0)
+ goto error_2;
+
+ net->socket = socket;
+ afs_charge_preallocation(&net->charge_preallocation_work);
+ _leave(" = 0");
+ return 0;
+
+error_2:
+ sock_release(socket);
+error_1:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * close the RxRPC socket AFS was using
+ */
+void afs_close_socket(struct afs_net *net)
+{
+ _enter("");
+
+ kernel_listen(net->socket, 0);
+ flush_workqueue(afs_async_calls);
+
+ if (net->spare_incoming_call) {
+ afs_put_call(net->spare_incoming_call);
+ net->spare_incoming_call = NULL;
+ }
+
+ _debug("outstanding %u", atomic_read(&net->nr_outstanding_calls));
+ wait_var_event(&net->nr_outstanding_calls,
+ !atomic_read(&net->nr_outstanding_calls));
+ _debug("no outstanding calls");
+
+ kernel_sock_shutdown(net->socket, SHUT_RDWR);
+ flush_workqueue(afs_async_calls);
+ sock_release(net->socket);
+
+ _debug("dework");
+ _leave("");
+}
+
+/*
+ * Allocate a call.
+ */
+static struct afs_call *afs_alloc_call(struct afs_net *net,
+ const struct afs_call_type *type,
+ gfp_t gfp)
+{
+ struct afs_call *call;
+ int o;
+
+ call = kzalloc(sizeof(*call), gfp);
+ if (!call)
+ return NULL;
+
+ call->type = type;
+ call->net = net;
+ call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ refcount_set(&call->ref, 1);
+ INIT_WORK(&call->async_work, afs_process_async_call);
+ init_waitqueue_head(&call->waitq);
+ spin_lock_init(&call->state_lock);
+ call->iter = &call->def_iter;
+
+ o = atomic_inc_return(&net->nr_outstanding_calls);
+ trace_afs_call(call->debug_id, afs_call_trace_alloc, 1, o,
+ __builtin_return_address(0));
+ return call;
+}
+
+/*
+ * Dispose of a reference on a call.
+ */
+void afs_put_call(struct afs_call *call)
+{
+ struct afs_net *net = call->net;
+ unsigned int debug_id = call->debug_id;
+ bool zero;
+ int r, o;
+
+ zero = __refcount_dec_and_test(&call->ref, &r);
+ o = atomic_read(&net->nr_outstanding_calls);
+ trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
+ __builtin_return_address(0));
+
+ if (zero) {
+ ASSERT(!work_pending(&call->async_work));
+ ASSERT(call->type->name != NULL);
+
+ if (call->rxcall) {
+ rxrpc_kernel_end_call(net->socket, call->rxcall);
+ call->rxcall = NULL;
+ }
+ if (call->type->destructor)
+ call->type->destructor(call);
+
+ afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call);
+ afs_put_addrlist(call->alist);
+ kfree(call->request);
+
+ trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
+ __builtin_return_address(0));
+ kfree(call);
+
+ o = atomic_dec_return(&net->nr_outstanding_calls);
+ if (o == 0)
+ wake_up_var(&net->nr_outstanding_calls);
+ }
+}
+
+static struct afs_call *afs_get_call(struct afs_call *call,
+ enum afs_call_trace why)
+{
+ int r;
+
+ __refcount_inc(&call->ref, &r);
+
+ trace_afs_call(call->debug_id, why, r + 1,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+ return call;
+}
+
+/*
+ * Queue the call for actual work.
+ */
+static void afs_queue_call_work(struct afs_call *call)
+{
+ if (call->type->work) {
+ INIT_WORK(&call->work, call->type->work);
+
+ afs_get_call(call, afs_call_trace_work);
+ if (!queue_work(afs_wq, &call->work))
+ afs_put_call(call);
+ }
+}
+
+/*
+ * allocate a call with flat request and reply buffers
+ */
+struct afs_call *afs_alloc_flat_call(struct afs_net *net,
+ const struct afs_call_type *type,
+ size_t request_size, size_t reply_max)
+{
+ struct afs_call *call;
+
+ call = afs_alloc_call(net, type, GFP_NOFS);
+ if (!call)
+ goto nomem_call;
+
+ if (request_size) {
+ call->request_size = request_size;
+ call->request = kmalloc(request_size, GFP_NOFS);
+ if (!call->request)
+ goto nomem_free;
+ }
+
+ if (reply_max) {
+ call->reply_max = reply_max;
+ call->buffer = kmalloc(reply_max, GFP_NOFS);
+ if (!call->buffer)
+ goto nomem_free;
+ }
+
+ afs_extract_to_buf(call, call->reply_max);
+ call->operation_ID = type->op;
+ init_waitqueue_head(&call->waitq);
+ return call;
+
+nomem_free:
+ afs_put_call(call);
+nomem_call:
+ return NULL;
+}
+
+/*
+ * clean up a call with flat buffer
+ */
+void afs_flat_call_destructor(struct afs_call *call)
+{
+ _enter("");
+
+ kfree(call->request);
+ call->request = NULL;
+ kfree(call->buffer);
+ call->buffer = NULL;
+}
+
+/*
+ * Advance the AFS call state when the RxRPC call ends the transmit phase.
+ */
+static void afs_notify_end_request_tx(struct sock *sock,
+ struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ struct afs_call *call = (struct afs_call *)call_user_ID;
+
+ afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY);
+}
+
+/*
+ * Initiate a call and synchronously queue up the parameters for dispatch. Any
+ * error is stored into the call struct, which the caller must check for.
+ */
+void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
+{
+ struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index];
+ struct rxrpc_call *rxcall;
+ struct msghdr msg;
+ struct kvec iov[1];
+ size_t len;
+ s64 tx_total_len;
+ int ret;
+
+ _enter(",{%pISp},", &srx->transport);
+
+ ASSERT(call->type != NULL);
+ ASSERT(call->type->name != NULL);
+
+ _debug("____MAKE %p{%s,%x} [%d]____",
+ call, call->type->name, key_serial(call->key),
+ atomic_read(&call->net->nr_outstanding_calls));
+
+ call->addr_ix = ac->index;
+ call->alist = afs_get_addrlist(ac->alist);
+
+ /* Work out the length we're going to transmit. This is awkward for
+ * calls such as FS.StoreData where there's an extra injection of data
+ * after the initial fixed part.
+ */
+ tx_total_len = call->request_size;
+ if (call->write_iter)
+ tx_total_len += iov_iter_count(call->write_iter);
+
+ /* If the call is going to be asynchronous, we need an extra ref for
+ * the call to hold itself so the caller need not hang on to its ref.
+ */
+ if (call->async) {
+ afs_get_call(call, afs_call_trace_get);
+ call->drop_ref = true;
+ }
+
+ /* create a call */
+ rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
+ (unsigned long)call,
+ tx_total_len, gfp,
+ (call->async ?
+ afs_wake_up_async_call :
+ afs_wake_up_call_waiter),
+ call->upgrade,
+ (call->intr ? RXRPC_PREINTERRUPTIBLE :
+ RXRPC_UNINTERRUPTIBLE),
+ call->debug_id);
+ if (IS_ERR(rxcall)) {
+ ret = PTR_ERR(rxcall);
+ call->error = ret;
+ goto error_kill_call;
+ }
+
+ call->rxcall = rxcall;
+
+ if (call->max_lifespan)
+ rxrpc_kernel_set_max_life(call->net->socket, rxcall,
+ call->max_lifespan);
+ call->issue_time = ktime_get_real();
+
+ /* send the request */
+ iov[0].iov_base = call->request;
+ iov[0].iov_len = call->request_size;
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, call->request_size);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = MSG_WAITALL | (call->write_iter ? MSG_MORE : 0);
+
+ ret = rxrpc_kernel_send_data(call->net->socket, rxcall,
+ &msg, call->request_size,
+ afs_notify_end_request_tx);
+ if (ret < 0)
+ goto error_do_abort;
+
+ if (call->write_iter) {
+ msg.msg_iter = *call->write_iter;
+ msg.msg_flags &= ~MSG_MORE;
+ trace_afs_send_data(call, &msg);
+
+ ret = rxrpc_kernel_send_data(call->net->socket,
+ call->rxcall, &msg,
+ iov_iter_count(&msg.msg_iter),
+ afs_notify_end_request_tx);
+ *call->write_iter = msg.msg_iter;
+
+ trace_afs_sent_data(call, &msg, ret);
+ if (ret < 0)
+ goto error_do_abort;
+ }
+
+ /* Note that at this point, we may have received the reply or an abort
+ * - and an asynchronous call may already have completed.
+ *
+ * afs_wait_for_call_to_complete(call, ac)
+ * must be called to synchronously clean up.
+ */
+ return;
+
+error_do_abort:
+ if (ret != -ECONNABORTED) {
+ rxrpc_kernel_abort_call(call->net->socket, rxcall,
+ RX_USER_ABORT, ret, "KSD");
+ } else {
+ len = 0;
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
+ rxrpc_kernel_recv_data(call->net->socket, rxcall,
+ &msg.msg_iter, &len, false,
+ &call->abort_code, &call->service_id);
+ ac->abort_code = call->abort_code;
+ ac->responded = true;
+ }
+ call->error = ret;
+ trace_afs_call_done(call);
+error_kill_call:
+ if (call->type->done)
+ call->type->done(call);
+
+ /* We need to dispose of the extra ref we grabbed for an async call.
+ * The call, however, might be queued on afs_async_calls and we need to
+ * make sure we don't get any more notifications that might requeue it.
+ */
+ if (call->rxcall) {
+ rxrpc_kernel_end_call(call->net->socket, call->rxcall);
+ call->rxcall = NULL;
+ }
+ if (call->async) {
+ if (cancel_work_sync(&call->async_work))
+ afs_put_call(call);
+ afs_set_call_complete(call, ret, 0);
+ }
+
+ ac->error = ret;
+ call->state = AFS_CALL_COMPLETE;
+ _leave(" = %d", ret);
+}
+
+/*
+ * Log remote abort codes that indicate that we have a protocol disagreement
+ * with the server.
+ */
+static void afs_log_error(struct afs_call *call, s32 remote_abort)
+{
+ static int max = 0;
+ const char *msg;
+ int m;
+
+ switch (remote_abort) {
+ case RX_EOF: msg = "unexpected EOF"; break;
+ case RXGEN_CC_MARSHAL: msg = "client marshalling"; break;
+ case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling"; break;
+ case RXGEN_SS_MARSHAL: msg = "server marshalling"; break;
+ case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling"; break;
+ case RXGEN_DECODE: msg = "opcode decode"; break;
+ case RXGEN_SS_XDRFREE: msg = "server XDR cleanup"; break;
+ case RXGEN_CC_XDRFREE: msg = "client XDR cleanup"; break;
+ case -32: msg = "insufficient data"; break;
+ default:
+ return;
+ }
+
+ m = max;
+ if (m < 3) {
+ max = m + 1;
+ pr_notice("kAFS: Peer reported %s failure on %s [%pISp]\n",
+ msg, call->type->name,
+ &call->alist->addrs[call->addr_ix].transport);
+ }
+}
+
+/*
+ * deliver messages to a call
+ */
+static void afs_deliver_to_call(struct afs_call *call)
+{
+ enum afs_call_state state;
+ size_t len;
+ u32 abort_code, remote_abort = 0;
+ int ret;
+
+ _enter("%s", call->type->name);
+
+ while (state = READ_ONCE(call->state),
+ state == AFS_CALL_CL_AWAIT_REPLY ||
+ state == AFS_CALL_SV_AWAIT_OP_ID ||
+ state == AFS_CALL_SV_AWAIT_REQUEST ||
+ state == AFS_CALL_SV_AWAIT_ACK
+ ) {
+ if (state == AFS_CALL_SV_AWAIT_ACK) {
+ len = 0;
+ iov_iter_kvec(&call->def_iter, ITER_DEST, NULL, 0, 0);
+ ret = rxrpc_kernel_recv_data(call->net->socket,
+ call->rxcall, &call->def_iter,
+ &len, false, &remote_abort,
+ &call->service_id);
+ trace_afs_receive_data(call, &call->def_iter, false, ret);
+
+ if (ret == -EINPROGRESS || ret == -EAGAIN)
+ return;
+ if (ret < 0 || ret == 1) {
+ if (ret == 1)
+ ret = 0;
+ goto call_complete;
+ }
+ return;
+ }
+
+ ret = call->type->deliver(call);
+ state = READ_ONCE(call->state);
+ if (ret == 0 && call->unmarshalling_error)
+ ret = -EBADMSG;
+ switch (ret) {
+ case 0:
+ afs_queue_call_work(call);
+ if (state == AFS_CALL_CL_PROC_REPLY) {
+ if (call->op)
+ set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
+ &call->op->server->flags);
+ goto call_complete;
+ }
+ ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
+ goto done;
+ case -EINPROGRESS:
+ case -EAGAIN:
+ goto out;
+ case -ECONNABORTED:
+ ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
+ afs_log_error(call, call->abort_code);
+ goto done;
+ case -ENOTSUPP:
+ abort_code = RXGEN_OPCODE;
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, ret, "KIV");
+ goto local_abort;
+ case -EIO:
+ pr_err("kAFS: Call %u in bad state %u\n",
+ call->debug_id, state);
+ fallthrough;
+ case -ENODATA:
+ case -EBADMSG:
+ case -EMSGSIZE:
+ case -ENOMEM:
+ case -EFAULT:
+ abort_code = RXGEN_CC_UNMARSHAL;
+ if (state != AFS_CALL_CL_AWAIT_REPLY)
+ abort_code = RXGEN_SS_UNMARSHAL;
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, ret, "KUM");
+ goto local_abort;
+ default:
+ abort_code = RX_CALL_DEAD;
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, ret, "KER");
+ goto local_abort;
+ }
+ }
+
+done:
+ if (call->type->done)
+ call->type->done(call);
+out:
+ _leave("");
+ return;
+
+local_abort:
+ abort_code = 0;
+call_complete:
+ afs_set_call_complete(call, ret, remote_abort);
+ state = AFS_CALL_COMPLETE;
+ goto done;
+}
+
+/*
+ * Wait synchronously for a call to complete and clean up the call struct.
+ */
+long afs_wait_for_call_to_complete(struct afs_call *call,
+ struct afs_addr_cursor *ac)
+{
+ long ret;
+ bool rxrpc_complete = false;
+
+ DECLARE_WAITQUEUE(myself, current);
+
+ _enter("");
+
+ ret = call->error;
+ if (ret < 0)
+ goto out;
+
+ add_wait_queue(&call->waitq, &myself);
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ /* deliver any messages that are in the queue */
+ if (!afs_check_call_state(call, AFS_CALL_COMPLETE) &&
+ call->need_attention) {
+ call->need_attention = false;
+ __set_current_state(TASK_RUNNING);
+ afs_deliver_to_call(call);
+ continue;
+ }
+
+ if (afs_check_call_state(call, AFS_CALL_COMPLETE))
+ break;
+
+ if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) {
+ /* rxrpc terminated the call. */
+ rxrpc_complete = true;
+ break;
+ }
+
+ schedule();
+ }
+
+ remove_wait_queue(&call->waitq, &myself);
+ __set_current_state(TASK_RUNNING);
+
+ if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
+ if (rxrpc_complete) {
+ afs_set_call_complete(call, call->error, call->abort_code);
+ } else {
+ /* Kill off the call if it's still live. */
+ _debug("call interrupted");
+ if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ RX_USER_ABORT, -EINTR, "KWI"))
+ afs_set_call_complete(call, -EINTR, 0);
+ }
+ }
+
+ spin_lock_bh(&call->state_lock);
+ ac->abort_code = call->abort_code;
+ ac->error = call->error;
+ spin_unlock_bh(&call->state_lock);
+
+ ret = ac->error;
+ switch (ret) {
+ case 0:
+ ret = call->ret0;
+ call->ret0 = 0;
+
+ fallthrough;
+ case -ECONNABORTED:
+ ac->responded = true;
+ break;
+ }
+
+out:
+ _debug("call complete");
+ afs_put_call(call);
+ _leave(" = %p", (void *)ret);
+ return ret;
+}
+
+/*
+ * wake up a waiting call
+ */
+static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ struct afs_call *call = (struct afs_call *)call_user_ID;
+
+ call->need_attention = true;
+ wake_up(&call->waitq);
+}
+
+/*
+ * wake up an asynchronous call
+ */
+static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ struct afs_call *call = (struct afs_call *)call_user_ID;
+ int r;
+
+ trace_afs_notify_call(rxcall, call);
+ call->need_attention = true;
+
+ if (__refcount_inc_not_zero(&call->ref, &r)) {
+ trace_afs_call(call->debug_id, afs_call_trace_wake, r + 1,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+
+ if (!queue_work(afs_async_calls, &call->async_work))
+ afs_put_call(call);
+ }
+}
+
+/*
+ * Perform I/O processing on an asynchronous call. The work item carries a ref
+ * to the call struct that we either need to release or to pass on.
+ */
+static void afs_process_async_call(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
+ _enter("");
+
+ if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+ call->need_attention = false;
+ afs_deliver_to_call(call);
+ }
+
+ afs_put_call(call);
+ _leave("");
+}
+
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
+{
+ struct afs_call *call = (struct afs_call *)user_call_ID;
+
+ call->rxcall = rxcall;
+}
+
+/*
+ * Charge the incoming call preallocation.
+ */
+void afs_charge_preallocation(struct work_struct *work)
+{
+ struct afs_net *net =
+ container_of(work, struct afs_net, charge_preallocation_work);
+ struct afs_call *call = net->spare_incoming_call;
+
+ for (;;) {
+ if (!call) {
+ call = afs_alloc_call(net, &afs_RXCMxxxx, GFP_KERNEL);
+ if (!call)
+ break;
+
+ call->drop_ref = true;
+ call->async = true;
+ call->state = AFS_CALL_SV_AWAIT_OP_ID;
+ init_waitqueue_head(&call->waitq);
+ afs_extract_to_tmp(call);
+ }
+
+ if (rxrpc_kernel_charge_accept(net->socket,
+ afs_wake_up_async_call,
+ afs_rx_attach,
+ (unsigned long)call,
+ GFP_KERNEL,
+ call->debug_id) < 0)
+ break;
+ call = NULL;
+ }
+ net->spare_incoming_call = call;
+}
+
+/*
+ * Discard a preallocated call when a socket is shut down.
+ */
+static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ struct afs_call *call = (struct afs_call *)user_call_ID;
+
+ call->rxcall = NULL;
+ afs_put_call(call);
+}
+
+/*
+ * Notification of an incoming call.
+ */
+static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ struct afs_net *net = afs_sock2net(sk);
+
+ queue_work(afs_wq, &net->charge_preallocation_work);
+}
+
+/*
+ * Grab the operation ID from an incoming cache manager call. The socket
+ * buffer is discarded on error or if we don't yet have sufficient data.
+ */
+static int afs_deliver_cm_op_id(struct afs_call *call)
+{
+ int ret;
+
+ _enter("{%zu}", iov_iter_count(call->iter));
+
+ /* the operation ID forms the first four bytes of the request data */
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->operation_ID = ntohl(call->tmp);
+ afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
+
+ /* ask the cache manager to route the call (it'll change the call type
+ * if successful) */
+ if (!afs_cm_incoming_call(call))
+ return -ENOTSUPP;
+
+ trace_afs_cb_call(call);
+
+ /* pass responsibility for the remainer of this message off to the
+ * cache manager op */
+ return call->type->deliver(call);
+}
+
+/*
+ * Advance the AFS call state when an RxRPC service call ends the transmit
+ * phase.
+ */
+static void afs_notify_end_reply_tx(struct sock *sock,
+ struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ struct afs_call *call = (struct afs_call *)call_user_ID;
+
+ afs_set_call_state(call, AFS_CALL_SV_REPLYING, AFS_CALL_SV_AWAIT_ACK);
+}
+
+/*
+ * send an empty reply
+ */
+void afs_send_empty_reply(struct afs_call *call)
+{
+ struct afs_net *net = call->net;
+ struct msghdr msg;
+
+ _enter("");
+
+ rxrpc_kernel_set_tx_length(net->socket, call->rxcall, 0);
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, NULL, 0, 0);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0,
+ afs_notify_end_reply_tx)) {
+ case 0:
+ _leave(" [replied]");
+ return;
+
+ case -ENOMEM:
+ _debug("oom");
+ rxrpc_kernel_abort_call(net->socket, call->rxcall,
+ RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+ fallthrough;
+ default:
+ _leave(" [error]");
+ return;
+ }
+}
+
+/*
+ * send a simple reply
+ */
+void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
+{
+ struct afs_net *net = call->net;
+ struct msghdr msg;
+ struct kvec iov[1];
+ int n;
+
+ _enter("");
+
+ rxrpc_kernel_set_tx_length(net->socket, call->rxcall, len);
+
+ iov[0].iov_base = (void *) buf;
+ iov[0].iov_len = len;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, len);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len,
+ afs_notify_end_reply_tx);
+ if (n >= 0) {
+ /* Success */
+ _leave(" [replied]");
+ return;
+ }
+
+ if (n == -ENOMEM) {
+ _debug("oom");
+ rxrpc_kernel_abort_call(net->socket, call->rxcall,
+ RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+ }
+ _leave(" [error]");
+}
+
+/*
+ * Extract a piece of data from the received data socket buffers.
+ */
+int afs_extract_data(struct afs_call *call, bool want_more)
+{
+ struct afs_net *net = call->net;
+ struct iov_iter *iter = call->iter;
+ enum afs_call_state state;
+ u32 remote_abort = 0;
+ int ret;
+
+ _enter("{%s,%zu,%zu},%d",
+ call->type->name, call->iov_len, iov_iter_count(iter), want_more);
+
+ ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
+ &call->iov_len, want_more, &remote_abort,
+ &call->service_id);
+ if (ret == 0 || ret == -EAGAIN)
+ return ret;
+
+ state = READ_ONCE(call->state);
+ if (ret == 1) {
+ switch (state) {
+ case AFS_CALL_CL_AWAIT_REPLY:
+ afs_set_call_state(call, state, AFS_CALL_CL_PROC_REPLY);
+ break;
+ case AFS_CALL_SV_AWAIT_REQUEST:
+ afs_set_call_state(call, state, AFS_CALL_SV_REPLYING);
+ break;
+ case AFS_CALL_COMPLETE:
+ kdebug("prem complete %d", call->error);
+ return afs_io_error(call, afs_io_error_extract);
+ default:
+ break;
+ }
+ return 0;
+ }
+
+ afs_set_call_complete(call, ret, remote_abort);
+ return ret;
+}
+
+/*
+ * Log protocol error production.
+ */
+noinline int afs_protocol_error(struct afs_call *call,
+ enum afs_eproto_cause cause)
+{
+ trace_afs_protocol_error(call, cause);
+ if (call)
+ call->unmarshalling_error = true;
+ return -EBADMSG;
+}
diff --git a/fs/afs/security.c b/fs/afs/security.c
new file mode 100644
index 000000000..7c6a63a30
--- /dev/null
+++ b/fs/afs/security.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS security handling
+ *
+ * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <linux/sched.h>
+#include <linux/hashtable.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+
+static DEFINE_HASHTABLE(afs_permits_cache, 10);
+static DEFINE_SPINLOCK(afs_permits_lock);
+
+/*
+ * get a key
+ */
+struct key *afs_request_key(struct afs_cell *cell)
+{
+ struct key *key;
+
+ _enter("{%x}", key_serial(cell->anonymous_key));
+
+ _debug("key %s", cell->anonymous_key->description);
+ key = request_key_net(&key_type_rxrpc, cell->anonymous_key->description,
+ cell->net->net, NULL);
+ if (IS_ERR(key)) {
+ if (PTR_ERR(key) != -ENOKEY) {
+ _leave(" = %ld", PTR_ERR(key));
+ return key;
+ }
+
+ /* act as anonymous user */
+ _leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
+ return key_get(cell->anonymous_key);
+ } else {
+ /* act as authorised user */
+ _leave(" = {%x} [auth]", key_serial(key));
+ return key;
+ }
+}
+
+/*
+ * Get a key when pathwalk is in rcuwalk mode.
+ */
+struct key *afs_request_key_rcu(struct afs_cell *cell)
+{
+ struct key *key;
+
+ _enter("{%x}", key_serial(cell->anonymous_key));
+
+ _debug("key %s", cell->anonymous_key->description);
+ key = request_key_net_rcu(&key_type_rxrpc,
+ cell->anonymous_key->description,
+ cell->net->net);
+ if (IS_ERR(key)) {
+ if (PTR_ERR(key) != -ENOKEY) {
+ _leave(" = %ld", PTR_ERR(key));
+ return key;
+ }
+
+ /* act as anonymous user */
+ _leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
+ return key_get(cell->anonymous_key);
+ } else {
+ /* act as authorised user */
+ _leave(" = {%x} [auth]", key_serial(key));
+ return key;
+ }
+}
+
+/*
+ * Dispose of a list of permits.
+ */
+static void afs_permits_rcu(struct rcu_head *rcu)
+{
+ struct afs_permits *permits =
+ container_of(rcu, struct afs_permits, rcu);
+ int i;
+
+ for (i = 0; i < permits->nr_permits; i++)
+ key_put(permits->permits[i].key);
+ kfree(permits);
+}
+
+/*
+ * Discard a permission cache.
+ */
+void afs_put_permits(struct afs_permits *permits)
+{
+ if (permits && refcount_dec_and_test(&permits->usage)) {
+ spin_lock(&afs_permits_lock);
+ hash_del_rcu(&permits->hash_node);
+ spin_unlock(&afs_permits_lock);
+ call_rcu(&permits->rcu, afs_permits_rcu);
+ }
+}
+
+/*
+ * Clear a permit cache on callback break.
+ */
+void afs_clear_permits(struct afs_vnode *vnode)
+{
+ struct afs_permits *permits;
+
+ spin_lock(&vnode->lock);
+ permits = rcu_dereference_protected(vnode->permit_cache,
+ lockdep_is_held(&vnode->lock));
+ RCU_INIT_POINTER(vnode->permit_cache, NULL);
+ spin_unlock(&vnode->lock);
+
+ afs_put_permits(permits);
+}
+
+/*
+ * Hash a list of permits. Use simple addition to make it easy to add an extra
+ * one at an as-yet indeterminate position in the list.
+ */
+static void afs_hash_permits(struct afs_permits *permits)
+{
+ unsigned long h = permits->nr_permits;
+ int i;
+
+ for (i = 0; i < permits->nr_permits; i++) {
+ h += (unsigned long)permits->permits[i].key / sizeof(void *);
+ h += permits->permits[i].access;
+ }
+
+ permits->h = h;
+}
+
+/*
+ * Cache the CallerAccess result obtained from doing a fileserver operation
+ * that returned a vnode status for a particular key. If a callback break
+ * occurs whilst the operation was in progress then we have to ditch the cache
+ * as the ACL *may* have changed.
+ */
+void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
+ unsigned int cb_break, struct afs_status_cb *scb)
+{
+ struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL;
+ afs_access_t caller_access = scb->status.caller_access;
+ size_t size = 0;
+ bool changed = false;
+ int i, j;
+
+ _enter("{%llx:%llu},%x,%x",
+ vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
+
+ rcu_read_lock();
+
+ /* Check for the common case first: We got back the same access as last
+ * time we tried and already have it recorded.
+ */
+ permits = rcu_dereference(vnode->permit_cache);
+ if (permits) {
+ if (!permits->invalidated) {
+ for (i = 0; i < permits->nr_permits; i++) {
+ if (permits->permits[i].key < key)
+ continue;
+ if (permits->permits[i].key > key)
+ break;
+ if (permits->permits[i].access != caller_access) {
+ changed = true;
+ break;
+ }
+
+ if (afs_cb_is_broken(cb_break, vnode)) {
+ changed = true;
+ break;
+ }
+
+ /* The cache is still good. */
+ rcu_read_unlock();
+ return;
+ }
+ }
+
+ changed |= permits->invalidated;
+ size = permits->nr_permits;
+
+ /* If this set of permits is now wrong, clear the permits
+ * pointer so that no one tries to use the stale information.
+ */
+ if (changed) {
+ spin_lock(&vnode->lock);
+ if (permits != rcu_access_pointer(vnode->permit_cache))
+ goto someone_else_changed_it_unlock;
+ RCU_INIT_POINTER(vnode->permit_cache, NULL);
+ spin_unlock(&vnode->lock);
+
+ afs_put_permits(permits);
+ permits = NULL;
+ size = 0;
+ }
+ }
+
+ if (afs_cb_is_broken(cb_break, vnode))
+ goto someone_else_changed_it;
+
+ /* We need a ref on any permits list we want to copy as we'll have to
+ * drop the lock to do memory allocation.
+ */
+ if (permits && !refcount_inc_not_zero(&permits->usage))
+ goto someone_else_changed_it;
+
+ rcu_read_unlock();
+
+ /* Speculatively create a new list with the revised permission set. We
+ * discard this if we find an extant match already in the hash, but
+ * it's easier to compare with memcmp this way.
+ *
+ * We fill in the key pointers at this time, but we don't get the refs
+ * yet.
+ */
+ size++;
+ new = kzalloc(struct_size(new, permits, size), GFP_NOFS);
+ if (!new)
+ goto out_put;
+
+ refcount_set(&new->usage, 1);
+ new->nr_permits = size;
+ i = j = 0;
+ if (permits) {
+ for (i = 0; i < permits->nr_permits; i++) {
+ if (j == i && permits->permits[i].key > key) {
+ new->permits[j].key = key;
+ new->permits[j].access = caller_access;
+ j++;
+ }
+ new->permits[j].key = permits->permits[i].key;
+ new->permits[j].access = permits->permits[i].access;
+ j++;
+ }
+ }
+
+ if (j == i) {
+ new->permits[j].key = key;
+ new->permits[j].access = caller_access;
+ }
+
+ afs_hash_permits(new);
+
+ /* Now see if the permit list we want is actually already available */
+ spin_lock(&afs_permits_lock);
+
+ hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) {
+ if (xpermits->h != new->h ||
+ xpermits->invalidated ||
+ xpermits->nr_permits != new->nr_permits ||
+ memcmp(xpermits->permits, new->permits,
+ new->nr_permits * sizeof(struct afs_permit)) != 0)
+ continue;
+
+ if (refcount_inc_not_zero(&xpermits->usage)) {
+ replacement = xpermits;
+ goto found;
+ }
+
+ break;
+ }
+
+ for (i = 0; i < new->nr_permits; i++)
+ key_get(new->permits[i].key);
+ hash_add_rcu(afs_permits_cache, &new->hash_node, new->h);
+ replacement = new;
+ new = NULL;
+
+found:
+ spin_unlock(&afs_permits_lock);
+
+ kfree(new);
+
+ rcu_read_lock();
+ spin_lock(&vnode->lock);
+ zap = rcu_access_pointer(vnode->permit_cache);
+ if (!afs_cb_is_broken(cb_break, vnode) && zap == permits)
+ rcu_assign_pointer(vnode->permit_cache, replacement);
+ else
+ zap = replacement;
+ spin_unlock(&vnode->lock);
+ rcu_read_unlock();
+ afs_put_permits(zap);
+out_put:
+ afs_put_permits(permits);
+ return;
+
+someone_else_changed_it_unlock:
+ spin_unlock(&vnode->lock);
+someone_else_changed_it:
+ /* Someone else changed the cache under us - don't recheck at this
+ * time.
+ */
+ rcu_read_unlock();
+ return;
+}
+
+static bool afs_check_permit_rcu(struct afs_vnode *vnode, struct key *key,
+ afs_access_t *_access)
+{
+ const struct afs_permits *permits;
+ int i;
+
+ _enter("{%llx:%llu},%x",
+ vnode->fid.vid, vnode->fid.vnode, key_serial(key));
+
+ /* check the permits to see if we've got one yet */
+ if (key == vnode->volume->cell->anonymous_key) {
+ *_access = vnode->status.anon_access;
+ _leave(" = t [anon %x]", *_access);
+ return true;
+ }
+
+ permits = rcu_dereference(vnode->permit_cache);
+ if (permits) {
+ for (i = 0; i < permits->nr_permits; i++) {
+ if (permits->permits[i].key < key)
+ continue;
+ if (permits->permits[i].key > key)
+ break;
+
+ *_access = permits->permits[i].access;
+ _leave(" = %u [perm %x]", !permits->invalidated, *_access);
+ return !permits->invalidated;
+ }
+ }
+
+ _leave(" = f");
+ return false;
+}
+
+/*
+ * check with the fileserver to see if the directory or parent directory is
+ * permitted to be accessed with this authorisation, and if so, what access it
+ * is granted
+ */
+int afs_check_permit(struct afs_vnode *vnode, struct key *key,
+ afs_access_t *_access)
+{
+ struct afs_permits *permits;
+ bool valid = false;
+ int i, ret;
+
+ _enter("{%llx:%llu},%x",
+ vnode->fid.vid, vnode->fid.vnode, key_serial(key));
+
+ /* check the permits to see if we've got one yet */
+ if (key == vnode->volume->cell->anonymous_key) {
+ _debug("anon");
+ *_access = vnode->status.anon_access;
+ valid = true;
+ } else {
+ rcu_read_lock();
+ permits = rcu_dereference(vnode->permit_cache);
+ if (permits) {
+ for (i = 0; i < permits->nr_permits; i++) {
+ if (permits->permits[i].key < key)
+ continue;
+ if (permits->permits[i].key > key)
+ break;
+
+ *_access = permits->permits[i].access;
+ valid = !permits->invalidated;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ if (!valid) {
+ /* Check the status on the file we're actually interested in
+ * (the post-processing will cache the result).
+ */
+ _debug("no valid permit");
+
+ ret = afs_fetch_status(vnode, key, false, _access);
+ if (ret < 0) {
+ *_access = 0;
+ _leave(" = %d", ret);
+ return ret;
+ }
+ }
+
+ _leave(" = 0 [access %x]", *_access);
+ return 0;
+}
+
+/*
+ * check the permissions on an AFS file
+ * - AFS ACLs are attached to directories only, and a file is controlled by its
+ * parent directory's ACL
+ */
+int afs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ afs_access_t access;
+ struct key *key;
+ int ret = 0;
+
+ _enter("{{%llx:%llu},%lx},%x,",
+ vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
+
+ if (mask & MAY_NOT_BLOCK) {
+ key = afs_request_key_rcu(vnode->volume->cell);
+ if (IS_ERR(key))
+ return -ECHILD;
+
+ ret = -ECHILD;
+ if (!afs_check_validity(vnode) ||
+ !afs_check_permit_rcu(vnode, key, &access))
+ goto error;
+ } else {
+ key = afs_request_key(vnode->volume->cell);
+ if (IS_ERR(key)) {
+ _leave(" = %ld [key]", PTR_ERR(key));
+ return PTR_ERR(key);
+ }
+
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ goto error;
+
+ /* check the permits to see if we've got one yet */
+ ret = afs_check_permit(vnode, key, &access);
+ if (ret < 0)
+ goto error;
+ }
+
+ /* interpret the access mask */
+ _debug("REQ %x ACC %x on %s",
+ mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file");
+
+ ret = 0;
+ if (S_ISDIR(inode->i_mode)) {
+ if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) {
+ if (!(access & AFS_ACE_LOOKUP))
+ goto permission_denied;
+ }
+ if (mask & MAY_WRITE) {
+ if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
+ AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */
+ goto permission_denied;
+ }
+ } else {
+ if (!(access & AFS_ACE_LOOKUP))
+ goto permission_denied;
+ if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+ goto permission_denied;
+ if (mask & (MAY_EXEC | MAY_READ)) {
+ if (!(access & AFS_ACE_READ))
+ goto permission_denied;
+ if (!(inode->i_mode & S_IRUSR))
+ goto permission_denied;
+ } else if (mask & MAY_WRITE) {
+ if (!(access & AFS_ACE_WRITE))
+ goto permission_denied;
+ if (!(inode->i_mode & S_IWUSR))
+ goto permission_denied;
+ }
+ }
+
+ key_put(key);
+ _leave(" = %d", ret);
+ return ret;
+
+permission_denied:
+ ret = -EACCES;
+error:
+ key_put(key);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+void __exit afs_clean_up_permit_cache(void)
+{
+ int i;
+
+ for (i = 0; i < HASH_SIZE(afs_permits_cache); i++)
+ WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i]));
+
+}
diff --git a/fs/afs/server.c b/fs/afs/server.c
new file mode 100644
index 000000000..b5237206e
--- /dev/null
+++ b/fs/afs/server.c
@@ -0,0 +1,725 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS server record management
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
+static atomic_t afs_server_debug_id;
+
+static struct afs_server *afs_maybe_use_server(struct afs_server *,
+ enum afs_server_trace);
+static void __afs_put_server(struct afs_net *, struct afs_server *);
+
+/*
+ * Find a server by one of its addresses.
+ */
+struct afs_server *afs_find_server(struct afs_net *net,
+ const struct sockaddr_rxrpc *srx)
+{
+ const struct afs_addr_list *alist;
+ struct afs_server *server = NULL;
+ unsigned int i;
+ int seq = 0, diff;
+
+ rcu_read_lock();
+
+ do {
+ if (server)
+ afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq);
+ server = NULL;
+ read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
+
+ if (srx->transport.family == AF_INET6) {
+ const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
+ hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
+ alist = rcu_dereference(server->addresses);
+ for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+ b = &alist->addrs[i].transport.sin6;
+ diff = ((u16 __force)a->sin6_port -
+ (u16 __force)b->sin6_port);
+ if (diff == 0)
+ diff = memcmp(&a->sin6_addr,
+ &b->sin6_addr,
+ sizeof(struct in6_addr));
+ if (diff == 0)
+ goto found;
+ }
+ }
+ } else {
+ const struct sockaddr_in *a = &srx->transport.sin, *b;
+ hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
+ alist = rcu_dereference(server->addresses);
+ for (i = 0; i < alist->nr_ipv4; i++) {
+ b = &alist->addrs[i].transport.sin;
+ diff = ((u16 __force)a->sin_port -
+ (u16 __force)b->sin_port);
+ if (diff == 0)
+ diff = ((u32 __force)a->sin_addr.s_addr -
+ (u32 __force)b->sin_addr.s_addr);
+ if (diff == 0)
+ goto found;
+ }
+ }
+ }
+
+ server = NULL;
+ continue;
+ found:
+ server = afs_maybe_use_server(server, afs_server_trace_get_by_addr);
+
+ } while (need_seqretry(&net->fs_addr_lock, seq));
+
+ done_seqretry(&net->fs_addr_lock, seq);
+
+ rcu_read_unlock();
+ return server;
+}
+
+/*
+ * Look up a server by its UUID and mark it active.
+ */
+struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
+{
+ struct afs_server *server = NULL;
+ struct rb_node *p;
+ int diff, seq = 0;
+
+ _enter("%pU", uuid);
+
+ do {
+ /* Unfortunately, rbtree walking doesn't give reliable results
+ * under just the RCU read lock, so we have to check for
+ * changes.
+ */
+ if (server)
+ afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq);
+ server = NULL;
+
+ read_seqbegin_or_lock(&net->fs_lock, &seq);
+
+ p = net->fs_servers.rb_node;
+ while (p) {
+ server = rb_entry(p, struct afs_server, uuid_rb);
+
+ diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
+ if (diff < 0) {
+ p = p->rb_left;
+ } else if (diff > 0) {
+ p = p->rb_right;
+ } else {
+ afs_use_server(server, afs_server_trace_get_by_uuid);
+ break;
+ }
+
+ server = NULL;
+ }
+ } while (need_seqretry(&net->fs_lock, seq));
+
+ done_seqretry(&net->fs_lock, seq);
+
+ _leave(" = %p", server);
+ return server;
+}
+
+/*
+ * Install a server record in the namespace tree. If there's a clash, we stick
+ * it into a list anchored on whichever afs_server struct is actually in the
+ * tree.
+ */
+static struct afs_server *afs_install_server(struct afs_cell *cell,
+ struct afs_server *candidate)
+{
+ const struct afs_addr_list *alist;
+ struct afs_server *server, *next;
+ struct afs_net *net = cell->net;
+ struct rb_node **pp, *p;
+ int diff;
+
+ _enter("%p", candidate);
+
+ write_seqlock(&net->fs_lock);
+
+ /* Firstly install the server in the UUID lookup tree */
+ pp = &net->fs_servers.rb_node;
+ p = NULL;
+ while (*pp) {
+ p = *pp;
+ _debug("- consider %p", p);
+ server = rb_entry(p, struct afs_server, uuid_rb);
+ diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
+ if (diff < 0) {
+ pp = &(*pp)->rb_left;
+ } else if (diff > 0) {
+ pp = &(*pp)->rb_right;
+ } else {
+ if (server->cell == cell)
+ goto exists;
+
+ /* We have the same UUID representing servers in
+ * different cells. Append the new server to the list.
+ */
+ for (;;) {
+ next = rcu_dereference_protected(
+ server->uuid_next,
+ lockdep_is_held(&net->fs_lock.lock));
+ if (!next)
+ break;
+ server = next;
+ }
+ rcu_assign_pointer(server->uuid_next, candidate);
+ candidate->uuid_prev = server;
+ server = candidate;
+ goto added_dup;
+ }
+ }
+
+ server = candidate;
+ rb_link_node(&server->uuid_rb, p, pp);
+ rb_insert_color(&server->uuid_rb, &net->fs_servers);
+ hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
+
+added_dup:
+ write_seqlock(&net->fs_addr_lock);
+ alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&net->fs_addr_lock.lock));
+
+ /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
+ * it in the IPv4 and/or IPv6 reverse-map lists.
+ *
+ * TODO: For speed we want to use something other than a flat list
+ * here; even sorting the list in terms of lowest address would help a
+ * bit, but anything we might want to do gets messy and memory
+ * intensive.
+ */
+ if (alist->nr_ipv4 > 0)
+ hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
+ if (alist->nr_addrs > alist->nr_ipv4)
+ hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
+
+ write_sequnlock(&net->fs_addr_lock);
+
+exists:
+ afs_get_server(server, afs_server_trace_get_install);
+ write_sequnlock(&net->fs_lock);
+ return server;
+}
+
+/*
+ * Allocate a new server record and mark it active.
+ */
+static struct afs_server *afs_alloc_server(struct afs_cell *cell,
+ const uuid_t *uuid,
+ struct afs_addr_list *alist)
+{
+ struct afs_server *server;
+ struct afs_net *net = cell->net;
+
+ _enter("");
+
+ server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
+ if (!server)
+ goto enomem;
+
+ refcount_set(&server->ref, 1);
+ atomic_set(&server->active, 1);
+ server->debug_id = atomic_inc_return(&afs_server_debug_id);
+ RCU_INIT_POINTER(server->addresses, alist);
+ server->addr_version = alist->version;
+ server->uuid = *uuid;
+ rwlock_init(&server->fs_lock);
+ INIT_WORK(&server->initcb_work, afs_server_init_callback_work);
+ init_waitqueue_head(&server->probe_wq);
+ INIT_LIST_HEAD(&server->probe_link);
+ spin_lock_init(&server->probe_lock);
+ server->cell = cell;
+ server->rtt = UINT_MAX;
+
+ afs_inc_servers_outstanding(net);
+ trace_afs_server(server->debug_id, 1, 1, afs_server_trace_alloc);
+ _leave(" = %p", server);
+ return server;
+
+enomem:
+ _leave(" = NULL [nomem]");
+ return NULL;
+}
+
+/*
+ * Look up an address record for a server
+ */
+static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
+ struct key *key, const uuid_t *uuid)
+{
+ struct afs_vl_cursor vc;
+ struct afs_addr_list *alist = NULL;
+ int ret;
+
+ ret = -ERESTARTSYS;
+ if (afs_begin_vlserver_operation(&vc, cell, key)) {
+ while (afs_select_vlserver(&vc)) {
+ if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
+ alist = afs_yfsvl_get_endpoints(&vc, uuid);
+ else
+ alist = afs_vl_get_addrs_u(&vc, uuid);
+ }
+
+ ret = afs_end_vlserver_operation(&vc);
+ }
+
+ return ret < 0 ? ERR_PTR(ret) : alist;
+}
+
+/*
+ * Get or create a fileserver record.
+ */
+struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
+ const uuid_t *uuid, u32 addr_version)
+{
+ struct afs_addr_list *alist;
+ struct afs_server *server, *candidate;
+
+ _enter("%p,%pU", cell->net, uuid);
+
+ server = afs_find_server_by_uuid(cell->net, uuid);
+ if (server) {
+ if (server->addr_version != addr_version)
+ set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
+ return server;
+ }
+
+ alist = afs_vl_lookup_addrs(cell, key, uuid);
+ if (IS_ERR(alist))
+ return ERR_CAST(alist);
+
+ candidate = afs_alloc_server(cell, uuid, alist);
+ if (!candidate) {
+ afs_put_addrlist(alist);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ server = afs_install_server(cell, candidate);
+ if (server != candidate) {
+ afs_put_addrlist(alist);
+ kfree(candidate);
+ } else {
+ /* Immediately dispatch an asynchronous probe to each interface
+ * on the fileserver. This will make sure the repeat-probing
+ * service is started.
+ */
+ afs_fs_probe_fileserver(cell->net, server, key, true);
+ }
+
+ return server;
+}
+
+/*
+ * Set the server timer to fire after a given delay, assuming it's not already
+ * set for an earlier time.
+ */
+static void afs_set_server_timer(struct afs_net *net, time64_t delay)
+{
+ if (net->live) {
+ afs_inc_servers_outstanding(net);
+ if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
+ afs_dec_servers_outstanding(net);
+ }
+}
+
+/*
+ * Server management timer. We have an increment on fs_outstanding that we
+ * need to pass along to the work item.
+ */
+void afs_servers_timer(struct timer_list *timer)
+{
+ struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
+
+ _enter("");
+ if (!queue_work(afs_wq, &net->fs_manager))
+ afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Get a reference on a server object.
+ */
+struct afs_server *afs_get_server(struct afs_server *server,
+ enum afs_server_trace reason)
+{
+ unsigned int a;
+ int r;
+
+ __refcount_inc(&server->ref, &r);
+ a = atomic_read(&server->active);
+ trace_afs_server(server->debug_id, r + 1, a, reason);
+ return server;
+}
+
+/*
+ * Try to get a reference on a server object.
+ */
+static struct afs_server *afs_maybe_use_server(struct afs_server *server,
+ enum afs_server_trace reason)
+{
+ unsigned int a;
+ int r;
+
+ if (!__refcount_inc_not_zero(&server->ref, &r))
+ return NULL;
+
+ a = atomic_inc_return(&server->active);
+ trace_afs_server(server->debug_id, r + 1, a, reason);
+ return server;
+}
+
+/*
+ * Get an active count on a server object.
+ */
+struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_trace reason)
+{
+ unsigned int a;
+ int r;
+
+ __refcount_inc(&server->ref, &r);
+ a = atomic_inc_return(&server->active);
+
+ trace_afs_server(server->debug_id, r + 1, a, reason);
+ return server;
+}
+
+/*
+ * Release a reference on a server record.
+ */
+void afs_put_server(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason)
+{
+ unsigned int a, debug_id = server->debug_id;
+ bool zero;
+ int r;
+
+ if (!server)
+ return;
+
+ a = atomic_read(&server->active);
+ zero = __refcount_dec_and_test(&server->ref, &r);
+ trace_afs_server(debug_id, r - 1, a, reason);
+ if (unlikely(zero))
+ __afs_put_server(net, server);
+}
+
+/*
+ * Drop an active count on a server object without updating the last-unused
+ * time.
+ */
+void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason)
+{
+ if (server) {
+ unsigned int active = atomic_dec_return(&server->active);
+
+ if (active == 0)
+ afs_set_server_timer(net, afs_server_gc_delay);
+ afs_put_server(net, server, reason);
+ }
+}
+
+/*
+ * Drop an active count on a server object.
+ */
+void afs_unuse_server(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason)
+{
+ if (server) {
+ server->unuse_time = ktime_get_real_seconds();
+ afs_unuse_server_notime(net, server, reason);
+ }
+}
+
+static void afs_server_rcu(struct rcu_head *rcu)
+{
+ struct afs_server *server = container_of(rcu, struct afs_server, rcu);
+
+ trace_afs_server(server->debug_id, refcount_read(&server->ref),
+ atomic_read(&server->active), afs_server_trace_free);
+ afs_put_addrlist(rcu_access_pointer(server->addresses));
+ kfree(server);
+}
+
+static void __afs_put_server(struct afs_net *net, struct afs_server *server)
+{
+ call_rcu(&server->rcu, afs_server_rcu);
+ afs_dec_servers_outstanding(net);
+}
+
+static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server)
+{
+ struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
+ struct afs_addr_cursor ac = {
+ .alist = alist,
+ .index = alist->preferred,
+ .error = 0,
+ };
+
+ afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+}
+
+/*
+ * destroy a dead server
+ */
+static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
+{
+ if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
+ afs_give_up_callbacks(net, server);
+
+ flush_work(&server->initcb_work);
+ afs_put_server(net, server, afs_server_trace_destroy);
+}
+
+/*
+ * Garbage collect any expired servers.
+ */
+static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
+{
+ struct afs_server *server, *next, *prev;
+ int active;
+
+ while ((server = gc_list)) {
+ gc_list = server->gc_next;
+
+ write_seqlock(&net->fs_lock);
+
+ active = atomic_read(&server->active);
+ if (active == 0) {
+ trace_afs_server(server->debug_id, refcount_read(&server->ref),
+ active, afs_server_trace_gc);
+ next = rcu_dereference_protected(
+ server->uuid_next, lockdep_is_held(&net->fs_lock.lock));
+ prev = server->uuid_prev;
+ if (!prev) {
+ /* The one at the front is in the tree */
+ if (!next) {
+ rb_erase(&server->uuid_rb, &net->fs_servers);
+ } else {
+ rb_replace_node_rcu(&server->uuid_rb,
+ &next->uuid_rb,
+ &net->fs_servers);
+ next->uuid_prev = NULL;
+ }
+ } else {
+ /* This server is not at the front */
+ rcu_assign_pointer(prev->uuid_next, next);
+ if (next)
+ next->uuid_prev = prev;
+ }
+
+ list_del(&server->probe_link);
+ hlist_del_rcu(&server->proc_link);
+ if (!hlist_unhashed(&server->addr4_link))
+ hlist_del_rcu(&server->addr4_link);
+ if (!hlist_unhashed(&server->addr6_link))
+ hlist_del_rcu(&server->addr6_link);
+ }
+ write_sequnlock(&net->fs_lock);
+
+ if (active == 0)
+ afs_destroy_server(net, server);
+ }
+}
+
+/*
+ * Manage the records of servers known to be within a network namespace. This
+ * includes garbage collecting unused servers.
+ *
+ * Note also that we were given an increment on net->servers_outstanding by
+ * whoever queued us that we need to deal with before returning.
+ */
+void afs_manage_servers(struct work_struct *work)
+{
+ struct afs_net *net = container_of(work, struct afs_net, fs_manager);
+ struct afs_server *gc_list = NULL;
+ struct rb_node *cursor;
+ time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
+ bool purging = !net->live;
+
+ _enter("");
+
+ /* Trawl the server list looking for servers that have expired from
+ * lack of use.
+ */
+ read_seqlock_excl(&net->fs_lock);
+
+ for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
+ struct afs_server *server =
+ rb_entry(cursor, struct afs_server, uuid_rb);
+ int active = atomic_read(&server->active);
+
+ _debug("manage %pU %u", &server->uuid, active);
+
+ if (purging) {
+ trace_afs_server(server->debug_id, refcount_read(&server->ref),
+ active, afs_server_trace_purging);
+ if (active != 0)
+ pr_notice("Can't purge s=%08x\n", server->debug_id);
+ }
+
+ if (active == 0) {
+ time64_t expire_at = server->unuse_time;
+
+ if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
+ !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
+ expire_at += afs_server_gc_delay;
+ if (purging || expire_at <= now) {
+ server->gc_next = gc_list;
+ gc_list = server;
+ } else if (expire_at < next_manage) {
+ next_manage = expire_at;
+ }
+ }
+ }
+
+ read_sequnlock_excl(&net->fs_lock);
+
+ /* Update the timer on the way out. We have to pass an increment on
+ * servers_outstanding in the namespace that we are in to the timer or
+ * the work scheduler.
+ */
+ if (!purging && next_manage < TIME64_MAX) {
+ now = ktime_get_real_seconds();
+
+ if (next_manage - now <= 0) {
+ if (queue_work(afs_wq, &net->fs_manager))
+ afs_inc_servers_outstanding(net);
+ } else {
+ afs_set_server_timer(net, next_manage - now);
+ }
+ }
+
+ afs_gc_servers(net, gc_list);
+
+ afs_dec_servers_outstanding(net);
+ _leave(" [%d]", atomic_read(&net->servers_outstanding));
+}
+
+static void afs_queue_server_manager(struct afs_net *net)
+{
+ afs_inc_servers_outstanding(net);
+ if (!queue_work(afs_wq, &net->fs_manager))
+ afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Purge list of servers.
+ */
+void afs_purge_servers(struct afs_net *net)
+{
+ _enter("");
+
+ if (del_timer_sync(&net->fs_timer))
+ afs_dec_servers_outstanding(net);
+
+ afs_queue_server_manager(net);
+
+ _debug("wait");
+ atomic_dec(&net->servers_outstanding);
+ wait_var_event(&net->servers_outstanding,
+ !atomic_read(&net->servers_outstanding));
+ _leave("");
+}
+
+/*
+ * Get an update for a server's address list.
+ */
+static noinline bool afs_update_server_record(struct afs_operation *op,
+ struct afs_server *server)
+{
+ struct afs_addr_list *alist, *discard;
+
+ _enter("");
+
+ trace_afs_server(server->debug_id, refcount_read(&server->ref),
+ atomic_read(&server->active),
+ afs_server_trace_update);
+
+ alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid);
+ if (IS_ERR(alist)) {
+ if ((PTR_ERR(alist) == -ERESTARTSYS ||
+ PTR_ERR(alist) == -EINTR) &&
+ (op->flags & AFS_OPERATION_UNINTR) &&
+ server->addresses) {
+ _leave(" = t [intr]");
+ return true;
+ }
+ op->error = PTR_ERR(alist);
+ _leave(" = f [%d]", op->error);
+ return false;
+ }
+
+ discard = alist;
+ if (server->addr_version != alist->version) {
+ write_lock(&server->fs_lock);
+ discard = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->fs_lock));
+ rcu_assign_pointer(server->addresses, alist);
+ server->addr_version = alist->version;
+ write_unlock(&server->fs_lock);
+ }
+
+ afs_put_addrlist(discard);
+ _leave(" = t");
+ return true;
+}
+
+/*
+ * See if a server's address list needs updating.
+ */
+bool afs_check_server_record(struct afs_operation *op, struct afs_server *server)
+{
+ bool success;
+ int ret, retries = 0;
+
+ _enter("");
+
+ ASSERT(server);
+
+retry:
+ if (test_bit(AFS_SERVER_FL_UPDATING, &server->flags))
+ goto wait;
+ if (test_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags))
+ goto update;
+ _leave(" = t [good]");
+ return true;
+
+update:
+ if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
+ clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
+ success = afs_update_server_record(op, server);
+ clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
+ wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
+ _leave(" = %d", success);
+ return success;
+ }
+
+wait:
+ ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
+ (op->flags & AFS_OPERATION_UNINTR) ?
+ TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
+ if (ret == -ERESTARTSYS) {
+ op->error = ret;
+ _leave(" = f [intr]");
+ return false;
+ }
+
+ retries++;
+ if (retries == 4) {
+ _leave(" = f [stale]");
+ ret = -ESTALE;
+ return false;
+ }
+ goto retry;
+}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
new file mode 100644
index 000000000..b59896b1d
--- /dev/null
+++ b/fs/afs/server_list.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS fileserver list management.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
+{
+ int i;
+
+ if (slist && refcount_dec_and_test(&slist->usage)) {
+ for (i = 0; i < slist->nr_servers; i++)
+ afs_unuse_server(net, slist->servers[i].server,
+ afs_server_trace_put_slist);
+ kfree_rcu(slist, rcu);
+ }
+}
+
+/*
+ * Build a server list from a VLDB record.
+ */
+struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
+ struct key *key,
+ struct afs_vldb_entry *vldb,
+ u8 type_mask)
+{
+ struct afs_server_list *slist;
+ struct afs_server *server;
+ int ret = -ENOMEM, nr_servers = 0, i, j;
+
+ for (i = 0; i < vldb->nr_servers; i++)
+ if (vldb->fs_mask[i] & type_mask)
+ nr_servers++;
+
+ slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
+ if (!slist)
+ goto error;
+
+ refcount_set(&slist->usage, 1);
+ rwlock_init(&slist->lock);
+
+ for (i = 0; i < AFS_MAXTYPES; i++)
+ slist->vids[i] = vldb->vid[i];
+
+ /* Make sure a records exists for each server in the list. */
+ for (i = 0; i < vldb->nr_servers; i++) {
+ if (!(vldb->fs_mask[i] & type_mask))
+ continue;
+
+ server = afs_lookup_server(cell, key, &vldb->fs_server[i],
+ vldb->addr_version[i]);
+ if (IS_ERR(server)) {
+ ret = PTR_ERR(server);
+ if (ret == -ENOENT ||
+ ret == -ENOMEDIUM)
+ continue;
+ goto error_2;
+ }
+
+ /* Insertion-sort by UUID */
+ for (j = 0; j < slist->nr_servers; j++)
+ if (memcmp(&slist->servers[j].server->uuid,
+ &server->uuid,
+ sizeof(server->uuid)) >= 0)
+ break;
+ if (j < slist->nr_servers) {
+ if (slist->servers[j].server == server) {
+ afs_put_server(cell->net, server,
+ afs_server_trace_put_slist_isort);
+ continue;
+ }
+
+ memmove(slist->servers + j + 1,
+ slist->servers + j,
+ (slist->nr_servers - j) * sizeof(struct afs_server_entry));
+ }
+
+ slist->servers[j].server = server;
+ slist->nr_servers++;
+ }
+
+ if (slist->nr_servers == 0) {
+ ret = -EDESTADDRREQ;
+ goto error_2;
+ }
+
+ return slist;
+
+error_2:
+ afs_put_serverlist(cell->net, slist);
+error:
+ return ERR_PTR(ret);
+}
+
+/*
+ * Copy the annotations from an old server list to its potential replacement.
+ */
+bool afs_annotate_server_list(struct afs_server_list *new,
+ struct afs_server_list *old)
+{
+ struct afs_server *cur;
+ int i, j;
+
+ if (old->nr_servers != new->nr_servers)
+ goto changed;
+
+ for (i = 0; i < old->nr_servers; i++)
+ if (old->servers[i].server != new->servers[i].server)
+ goto changed;
+
+ return false;
+
+changed:
+ /* Maintain the same preferred server as before if possible. */
+ cur = old->servers[old->preferred].server;
+ for (j = 0; j < new->nr_servers; j++) {
+ if (new->servers[j].server == cur) {
+ new->preferred = j;
+ break;
+ }
+ }
+
+ return true;
+}
diff --git a/fs/afs/super.c b/fs/afs/super.c
new file mode 100644
index 000000000..e95fb4cb4
--- /dev/null
+++ b/fs/afs/super.c
@@ -0,0 +1,778 @@
+/* AFS superblock handling
+ *
+ * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved.
+ *
+ * This software may be freely redistributed under the terms of the
+ * GNU General Public License.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Authors: David Howells <dhowells@redhat.com>
+ * David Woodhouse <dwmw2@infradead.org>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/fs_parser.h>
+#include <linux/statfs.h>
+#include <linux/sched.h>
+#include <linux/nsproxy.h>
+#include <linux/magic.h>
+#include <net/net_namespace.h>
+#include "internal.h"
+
+static void afs_i_init_once(void *foo);
+static void afs_kill_super(struct super_block *sb);
+static struct inode *afs_alloc_inode(struct super_block *sb);
+static void afs_destroy_inode(struct inode *inode);
+static void afs_free_inode(struct inode *inode);
+static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
+static int afs_show_devname(struct seq_file *m, struct dentry *root);
+static int afs_show_options(struct seq_file *m, struct dentry *root);
+static int afs_init_fs_context(struct fs_context *fc);
+static const struct fs_parameter_spec afs_fs_parameters[];
+
+struct file_system_type afs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "afs",
+ .init_fs_context = afs_init_fs_context,
+ .parameters = afs_fs_parameters,
+ .kill_sb = afs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE,
+};
+MODULE_ALIAS_FS("afs");
+
+int afs_net_id;
+
+static const struct super_operations afs_super_ops = {
+ .statfs = afs_statfs,
+ .alloc_inode = afs_alloc_inode,
+ .write_inode = afs_write_inode,
+ .drop_inode = afs_drop_inode,
+ .destroy_inode = afs_destroy_inode,
+ .free_inode = afs_free_inode,
+ .evict_inode = afs_evict_inode,
+ .show_devname = afs_show_devname,
+ .show_options = afs_show_options,
+};
+
+static struct kmem_cache *afs_inode_cachep;
+static atomic_t afs_count_active_inodes;
+
+enum afs_param {
+ Opt_autocell,
+ Opt_dyn,
+ Opt_flock,
+ Opt_source,
+};
+
+static const struct constant_table afs_param_flock[] = {
+ {"local", afs_flock_mode_local },
+ {"openafs", afs_flock_mode_openafs },
+ {"strict", afs_flock_mode_strict },
+ {"write", afs_flock_mode_write },
+ {}
+};
+
+static const struct fs_parameter_spec afs_fs_parameters[] = {
+ fsparam_flag ("autocell", Opt_autocell),
+ fsparam_flag ("dyn", Opt_dyn),
+ fsparam_enum ("flock", Opt_flock, afs_param_flock),
+ fsparam_string("source", Opt_source),
+ {}
+};
+
+/*
+ * initialise the filesystem
+ */
+int __init afs_fs_init(void)
+{
+ int ret;
+
+ _enter("");
+
+ /* create ourselves an inode cache */
+ atomic_set(&afs_count_active_inodes, 0);
+
+ ret = -ENOMEM;
+ afs_inode_cachep = kmem_cache_create("afs_inode_cache",
+ sizeof(struct afs_vnode),
+ 0,
+ SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT,
+ afs_i_init_once);
+ if (!afs_inode_cachep) {
+ printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n");
+ return ret;
+ }
+
+ /* now export our filesystem to lesser mortals */
+ ret = register_filesystem(&afs_fs_type);
+ if (ret < 0) {
+ kmem_cache_destroy(afs_inode_cachep);
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * clean up the filesystem
+ */
+void afs_fs_exit(void)
+{
+ _enter("");
+
+ afs_mntpt_kill_timer();
+ unregister_filesystem(&afs_fs_type);
+
+ if (atomic_read(&afs_count_active_inodes) != 0) {
+ printk("kAFS: %d active inode objects still present\n",
+ atomic_read(&afs_count_active_inodes));
+ BUG();
+ }
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(afs_inode_cachep);
+ _leave("");
+}
+
+/*
+ * Display the mount device name in /proc/mounts.
+ */
+static int afs_show_devname(struct seq_file *m, struct dentry *root)
+{
+ struct afs_super_info *as = AFS_FS_S(root->d_sb);
+ struct afs_volume *volume = as->volume;
+ struct afs_cell *cell = as->cell;
+ const char *suf = "";
+ char pref = '%';
+
+ if (as->dyn_root) {
+ seq_puts(m, "none");
+ return 0;
+ }
+
+ switch (volume->type) {
+ case AFSVL_RWVOL:
+ break;
+ case AFSVL_ROVOL:
+ pref = '#';
+ if (volume->type_force)
+ suf = ".readonly";
+ break;
+ case AFSVL_BACKVOL:
+ pref = '#';
+ suf = ".backup";
+ break;
+ }
+
+ seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf);
+ return 0;
+}
+
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int afs_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct afs_super_info *as = AFS_FS_S(root->d_sb);
+ const char *p = NULL;
+
+ if (as->dyn_root)
+ seq_puts(m, ",dyn");
+ if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
+ seq_puts(m, ",autocell");
+ switch (as->flock_mode) {
+ case afs_flock_mode_unset: break;
+ case afs_flock_mode_local: p = "local"; break;
+ case afs_flock_mode_openafs: p = "openafs"; break;
+ case afs_flock_mode_strict: p = "strict"; break;
+ case afs_flock_mode_write: p = "write"; break;
+ }
+ if (p)
+ seq_printf(m, ",flock=%s", p);
+
+ return 0;
+}
+
+/*
+ * Parse the source name to get cell name, volume name, volume type and R/W
+ * selector.
+ *
+ * This can be one of the following:
+ * "%[cell:]volume[.]" R/W volume
+ * "#[cell:]volume[.]" R/O or R/W volume (R/O parent),
+ * or R/W (R/W parent) volume
+ * "%[cell:]volume.readonly" R/O volume
+ * "#[cell:]volume.readonly" R/O volume
+ * "%[cell:]volume.backup" Backup volume
+ * "#[cell:]volume.backup" Backup volume
+ */
+static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct afs_fs_context *ctx = fc->fs_private;
+ struct afs_cell *cell;
+ const char *cellname, *suffix, *name = param->string;
+ int cellnamesz;
+
+ _enter(",%s", name);
+
+ if (fc->source)
+ return invalf(fc, "kAFS: Multiple sources not supported");
+
+ if (!name) {
+ printk(KERN_ERR "kAFS: no volume name specified\n");
+ return -EINVAL;
+ }
+
+ if ((name[0] != '%' && name[0] != '#') || !name[1]) {
+ /* To use dynroot, we don't want to have to provide a source */
+ if (strcmp(name, "none") == 0) {
+ ctx->no_cell = true;
+ return 0;
+ }
+ printk(KERN_ERR "kAFS: unparsable volume name\n");
+ return -EINVAL;
+ }
+
+ /* determine the type of volume we're looking for */
+ if (name[0] == '%') {
+ ctx->type = AFSVL_RWVOL;
+ ctx->force = true;
+ }
+ name++;
+
+ /* split the cell name out if there is one */
+ ctx->volname = strchr(name, ':');
+ if (ctx->volname) {
+ cellname = name;
+ cellnamesz = ctx->volname - name;
+ ctx->volname++;
+ } else {
+ ctx->volname = name;
+ cellname = NULL;
+ cellnamesz = 0;
+ }
+
+ /* the volume type is further affected by a possible suffix */
+ suffix = strrchr(ctx->volname, '.');
+ if (suffix) {
+ if (strcmp(suffix, ".readonly") == 0) {
+ ctx->type = AFSVL_ROVOL;
+ ctx->force = true;
+ } else if (strcmp(suffix, ".backup") == 0) {
+ ctx->type = AFSVL_BACKVOL;
+ ctx->force = true;
+ } else if (suffix[1] == 0) {
+ } else {
+ suffix = NULL;
+ }
+ }
+
+ ctx->volnamesz = suffix ?
+ suffix - ctx->volname : strlen(ctx->volname);
+
+ _debug("cell %*.*s [%p]",
+ cellnamesz, cellnamesz, cellname ?: "", ctx->cell);
+
+ /* lookup the cell record */
+ if (cellname) {
+ cell = afs_lookup_cell(ctx->net, cellname, cellnamesz,
+ NULL, false);
+ if (IS_ERR(cell)) {
+ pr_err("kAFS: unable to lookup cell '%*.*s'\n",
+ cellnamesz, cellnamesz, cellname ?: "");
+ return PTR_ERR(cell);
+ }
+ afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse);
+ afs_see_cell(cell, afs_cell_trace_see_source);
+ ctx->cell = cell;
+ }
+
+ _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
+ ctx->cell->name, ctx->cell,
+ ctx->volnamesz, ctx->volnamesz, ctx->volname,
+ suffix ?: "-", ctx->type, ctx->force ? " FORCE" : "");
+
+ fc->source = param->string;
+ param->string = NULL;
+ return 0;
+}
+
+/*
+ * Parse a single mount parameter.
+ */
+static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct fs_parse_result result;
+ struct afs_fs_context *ctx = fc->fs_private;
+ int opt;
+
+ opt = fs_parse(fc, afs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_source:
+ return afs_parse_source(fc, param);
+
+ case Opt_autocell:
+ ctx->autocell = true;
+ break;
+
+ case Opt_dyn:
+ ctx->dyn_root = true;
+ break;
+
+ case Opt_flock:
+ ctx->flock_mode = result.uint_32;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Validate the options, get the cell key and look up the volume.
+ */
+static int afs_validate_fc(struct fs_context *fc)
+{
+ struct afs_fs_context *ctx = fc->fs_private;
+ struct afs_volume *volume;
+ struct afs_cell *cell;
+ struct key *key;
+ int ret;
+
+ if (!ctx->dyn_root) {
+ if (ctx->no_cell) {
+ pr_warn("kAFS: Can only specify source 'none' with -o dyn\n");
+ return -EINVAL;
+ }
+
+ if (!ctx->cell) {
+ pr_warn("kAFS: No cell specified\n");
+ return -EDESTADDRREQ;
+ }
+
+ reget_key:
+ /* We try to do the mount securely. */
+ key = afs_request_key(ctx->cell);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+
+ ctx->key = key;
+
+ if (ctx->volume) {
+ afs_put_volume(ctx->net, ctx->volume,
+ afs_volume_trace_put_validate_fc);
+ ctx->volume = NULL;
+ }
+
+ if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &ctx->cell->flags)) {
+ ret = afs_cell_detect_alias(ctx->cell, key);
+ if (ret < 0)
+ return ret;
+ if (ret == 1) {
+ _debug("switch to alias");
+ key_put(ctx->key);
+ ctx->key = NULL;
+ cell = afs_use_cell(ctx->cell->alias_of,
+ afs_cell_trace_use_fc_alias);
+ afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
+ ctx->cell = cell;
+ goto reget_key;
+ }
+ }
+
+ volume = afs_create_volume(ctx);
+ if (IS_ERR(volume))
+ return PTR_ERR(volume);
+
+ ctx->volume = volume;
+ if (volume->type != AFSVL_RWVOL)
+ ctx->flock_mode = afs_flock_mode_local;
+ }
+
+ return 0;
+}
+
+/*
+ * check a superblock to see if it's the one we're looking for
+ */
+static int afs_test_super(struct super_block *sb, struct fs_context *fc)
+{
+ struct afs_fs_context *ctx = fc->fs_private;
+ struct afs_super_info *as = AFS_FS_S(sb);
+
+ return (as->net_ns == fc->net_ns &&
+ as->volume &&
+ as->volume->vid == ctx->volume->vid &&
+ as->cell == ctx->cell &&
+ !as->dyn_root);
+}
+
+static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc)
+{
+ struct afs_super_info *as = AFS_FS_S(sb);
+
+ return (as->net_ns == fc->net_ns &&
+ as->dyn_root);
+}
+
+static int afs_set_super(struct super_block *sb, struct fs_context *fc)
+{
+ return set_anon_super(sb, NULL);
+}
+
+/*
+ * fill in the superblock
+ */
+static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
+{
+ struct afs_super_info *as = AFS_FS_S(sb);
+ struct inode *inode = NULL;
+ int ret;
+
+ _enter("");
+
+ /* fill in the superblock */
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_magic = AFS_FS_MAGIC;
+ sb->s_op = &afs_super_ops;
+ if (!as->dyn_root)
+ sb->s_xattr = afs_xattr_handlers;
+ ret = super_setup_bdi(sb);
+ if (ret)
+ return ret;
+
+ /* allocate the root inode and dentry */
+ if (as->dyn_root) {
+ inode = afs_iget_pseudo_dir(sb, true);
+ } else {
+ sprintf(sb->s_id, "%llu", as->volume->vid);
+ afs_activate_volume(as->volume);
+ inode = afs_root_iget(sb, ctx->key);
+ }
+
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (ctx->autocell || as->dyn_root)
+ set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
+
+ ret = -ENOMEM;
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
+ goto error;
+
+ if (as->dyn_root) {
+ sb->s_d_op = &afs_dynroot_dentry_operations;
+ ret = afs_dynroot_populate(sb);
+ if (ret < 0)
+ goto error;
+ } else {
+ sb->s_d_op = &afs_fs_dentry_operations;
+ rcu_assign_pointer(as->volume->sb, sb);
+ }
+
+ _leave(" = 0");
+ return 0;
+
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
+{
+ struct afs_fs_context *ctx = fc->fs_private;
+ struct afs_super_info *as;
+
+ as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
+ if (as) {
+ as->net_ns = get_net(fc->net_ns);
+ as->flock_mode = ctx->flock_mode;
+ if (ctx->dyn_root) {
+ as->dyn_root = true;
+ } else {
+ as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi);
+ as->volume = afs_get_volume(ctx->volume,
+ afs_volume_trace_get_alloc_sbi);
+ }
+ }
+ return as;
+}
+
+static void afs_destroy_sbi(struct afs_super_info *as)
+{
+ if (as) {
+ struct afs_net *net = afs_net(as->net_ns);
+ afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi);
+ afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi);
+ put_net(as->net_ns);
+ kfree(as);
+ }
+}
+
+static void afs_kill_super(struct super_block *sb)
+{
+ struct afs_super_info *as = AFS_FS_S(sb);
+
+ if (as->dyn_root)
+ afs_dynroot_depopulate(sb);
+
+ /* Clear the callback interests (which will do ilookup5) before
+ * deactivating the superblock.
+ */
+ if (as->volume)
+ rcu_assign_pointer(as->volume->sb, NULL);
+ kill_anon_super(sb);
+ if (as->volume)
+ afs_deactivate_volume(as->volume);
+ afs_destroy_sbi(as);
+}
+
+/*
+ * Get an AFS superblock and root directory.
+ */
+static int afs_get_tree(struct fs_context *fc)
+{
+ struct afs_fs_context *ctx = fc->fs_private;
+ struct super_block *sb;
+ struct afs_super_info *as;
+ int ret;
+
+ ret = afs_validate_fc(fc);
+ if (ret)
+ goto error;
+
+ _enter("");
+
+ /* allocate a superblock info record */
+ ret = -ENOMEM;
+ as = afs_alloc_sbi(fc);
+ if (!as)
+ goto error;
+ fc->s_fs_info = as;
+
+ /* allocate a deviceless superblock */
+ sb = sget_fc(fc,
+ as->dyn_root ? afs_dynroot_test_super : afs_test_super,
+ afs_set_super);
+ if (IS_ERR(sb)) {
+ ret = PTR_ERR(sb);
+ goto error;
+ }
+
+ if (!sb->s_root) {
+ /* initial superblock/root creation */
+ _debug("create");
+ ret = afs_fill_super(sb, ctx);
+ if (ret < 0)
+ goto error_sb;
+ sb->s_flags |= SB_ACTIVE;
+ } else {
+ _debug("reuse");
+ ASSERTCMP(sb->s_flags, &, SB_ACTIVE);
+ }
+
+ fc->root = dget(sb->s_root);
+ trace_afs_get_tree(as->cell, as->volume);
+ _leave(" = 0 [%p]", sb);
+ return 0;
+
+error_sb:
+ deactivate_locked_super(sb);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+static void afs_free_fc(struct fs_context *fc)
+{
+ struct afs_fs_context *ctx = fc->fs_private;
+
+ afs_destroy_sbi(fc->s_fs_info);
+ afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc);
+ afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
+ key_put(ctx->key);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations afs_context_ops = {
+ .free = afs_free_fc,
+ .parse_param = afs_parse_param,
+ .get_tree = afs_get_tree,
+};
+
+/*
+ * Set up the filesystem mount context.
+ */
+static int afs_init_fs_context(struct fs_context *fc)
+{
+ struct afs_fs_context *ctx;
+ struct afs_cell *cell;
+
+ ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->type = AFSVL_ROVOL;
+ ctx->net = afs_net(fc->net_ns);
+
+ /* Default to the workstation cell. */
+ cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc);
+ if (IS_ERR(cell))
+ cell = NULL;
+ ctx->cell = cell;
+
+ fc->fs_private = ctx;
+ fc->ops = &afs_context_ops;
+ return 0;
+}
+
+/*
+ * Initialise an inode cache slab element prior to any use. Note that
+ * afs_alloc_inode() *must* reset anything that could incorrectly leak from one
+ * inode to another.
+ */
+static void afs_i_init_once(void *_vnode)
+{
+ struct afs_vnode *vnode = _vnode;
+
+ memset(vnode, 0, sizeof(*vnode));
+ inode_init_once(&vnode->netfs.inode);
+ mutex_init(&vnode->io_lock);
+ init_rwsem(&vnode->validate_lock);
+ spin_lock_init(&vnode->wb_lock);
+ spin_lock_init(&vnode->lock);
+ INIT_LIST_HEAD(&vnode->wb_keys);
+ INIT_LIST_HEAD(&vnode->pending_locks);
+ INIT_LIST_HEAD(&vnode->granted_locks);
+ INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
+ INIT_LIST_HEAD(&vnode->cb_mmap_link);
+ seqlock_init(&vnode->cb_lock);
+}
+
+/*
+ * allocate an AFS inode struct from our slab cache
+ */
+static struct inode *afs_alloc_inode(struct super_block *sb)
+{
+ struct afs_vnode *vnode;
+
+ vnode = alloc_inode_sb(sb, afs_inode_cachep, GFP_KERNEL);
+ if (!vnode)
+ return NULL;
+
+ atomic_inc(&afs_count_active_inodes);
+
+ /* Reset anything that shouldn't leak from one inode to the next. */
+ memset(&vnode->fid, 0, sizeof(vnode->fid));
+ memset(&vnode->status, 0, sizeof(vnode->status));
+ afs_vnode_set_cache(vnode, NULL);
+
+ vnode->volume = NULL;
+ vnode->lock_key = NULL;
+ vnode->permit_cache = NULL;
+
+ vnode->flags = 1 << AFS_VNODE_UNSET;
+ vnode->lock_state = AFS_VNODE_LOCK_NONE;
+
+ init_rwsem(&vnode->rmdir_lock);
+ INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work);
+
+ _leave(" = %p", &vnode->netfs.inode);
+ return &vnode->netfs.inode;
+}
+
+static void afs_free_inode(struct inode *inode)
+{
+ kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode));
+}
+
+/*
+ * destroy an AFS inode struct
+ */
+static void afs_destroy_inode(struct inode *inode)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+
+ _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode);
+
+ _debug("DESTROY INODE %p", inode);
+
+ atomic_dec(&afs_count_active_inodes);
+}
+
+static void afs_get_volume_status_success(struct afs_operation *op)
+{
+ struct afs_volume_status *vs = &op->volstatus.vs;
+ struct kstatfs *buf = op->volstatus.buf;
+
+ if (vs->max_quota == 0)
+ buf->f_blocks = vs->part_max_blocks;
+ else
+ buf->f_blocks = vs->max_quota;
+
+ if (buf->f_blocks > vs->blocks_in_use)
+ buf->f_bavail = buf->f_bfree =
+ buf->f_blocks - vs->blocks_in_use;
+}
+
+static const struct afs_operation_ops afs_get_volume_status_operation = {
+ .issue_afs_rpc = afs_fs_get_volume_status,
+ .issue_yfs_rpc = yfs_fs_get_volume_status,
+ .success = afs_get_volume_status_success,
+};
+
+/*
+ * return information about an AFS volume
+ */
+static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct afs_super_info *as = AFS_FS_S(dentry->d_sb);
+ struct afs_operation *op;
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+
+ buf->f_type = dentry->d_sb->s_magic;
+ buf->f_bsize = AFS_BLOCK_SIZE;
+ buf->f_namelen = AFSNAMEMAX - 1;
+
+ if (as->dyn_root) {
+ buf->f_blocks = 1;
+ buf->f_bavail = 0;
+ buf->f_bfree = 0;
+ return 0;
+ }
+
+ op = afs_alloc_operation(NULL, as->volume);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ afs_op_set_vnode(op, 0, vnode);
+ op->nr_files = 1;
+ op->volstatus.buf = buf;
+ op->ops = &afs_get_volume_status_operation;
+ return afs_do_sync_operation(op);
+}
diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
new file mode 100644
index 000000000..f04a80e4f
--- /dev/null
+++ b/fs/afs/vl_alias.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS cell alias detection
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+
+/*
+ * Sample a volume.
+ */
+static struct afs_volume *afs_sample_volume(struct afs_cell *cell, struct key *key,
+ const char *name, unsigned int namelen)
+{
+ struct afs_volume *volume;
+ struct afs_fs_context fc = {
+ .type = 0, /* Explicitly leave it to the VLDB */
+ .volnamesz = namelen,
+ .volname = name,
+ .net = cell->net,
+ .cell = cell,
+ .key = key, /* This might need to be something */
+ };
+
+ volume = afs_create_volume(&fc);
+ _leave(" = %p", volume);
+ return volume;
+}
+
+/*
+ * Compare two addresses.
+ */
+static int afs_compare_addrs(const struct sockaddr_rxrpc *srx_a,
+ const struct sockaddr_rxrpc *srx_b)
+{
+ short port_a, port_b;
+ int addr_a, addr_b, diff;
+
+ diff = (short)srx_a->transport_type - (short)srx_b->transport_type;
+ if (diff)
+ goto out;
+
+ switch (srx_a->transport_type) {
+ case AF_INET: {
+ const struct sockaddr_in *a = &srx_a->transport.sin;
+ const struct sockaddr_in *b = &srx_b->transport.sin;
+ addr_a = ntohl(a->sin_addr.s_addr);
+ addr_b = ntohl(b->sin_addr.s_addr);
+ diff = addr_a - addr_b;
+ if (diff == 0) {
+ port_a = ntohs(a->sin_port);
+ port_b = ntohs(b->sin_port);
+ diff = port_a - port_b;
+ }
+ break;
+ }
+
+ case AF_INET6: {
+ const struct sockaddr_in6 *a = &srx_a->transport.sin6;
+ const struct sockaddr_in6 *b = &srx_b->transport.sin6;
+ diff = memcmp(&a->sin6_addr, &b->sin6_addr, 16);
+ if (diff == 0) {
+ port_a = ntohs(a->sin6_port);
+ port_b = ntohs(b->sin6_port);
+ diff = port_a - port_b;
+ }
+ break;
+ }
+
+ default:
+ WARN_ON(1);
+ diff = 1;
+ }
+
+out:
+ return diff;
+}
+
+/*
+ * Compare the address lists of a pair of fileservers.
+ */
+static int afs_compare_fs_alists(const struct afs_server *server_a,
+ const struct afs_server *server_b)
+{
+ const struct afs_addr_list *la, *lb;
+ int a = 0, b = 0, addr_matches = 0;
+
+ la = rcu_dereference(server_a->addresses);
+ lb = rcu_dereference(server_b->addresses);
+
+ while (a < la->nr_addrs && b < lb->nr_addrs) {
+ const struct sockaddr_rxrpc *srx_a = &la->addrs[a];
+ const struct sockaddr_rxrpc *srx_b = &lb->addrs[b];
+ int diff = afs_compare_addrs(srx_a, srx_b);
+
+ if (diff < 0) {
+ a++;
+ } else if (diff > 0) {
+ b++;
+ } else {
+ addr_matches++;
+ a++;
+ b++;
+ }
+ }
+
+ return addr_matches;
+}
+
+/*
+ * Compare the fileserver lists of two volumes. The server lists are sorted in
+ * order of ascending UUID.
+ */
+static int afs_compare_volume_slists(const struct afs_volume *vol_a,
+ const struct afs_volume *vol_b)
+{
+ const struct afs_server_list *la, *lb;
+ int i, a = 0, b = 0, uuid_matches = 0, addr_matches = 0;
+
+ la = rcu_dereference(vol_a->servers);
+ lb = rcu_dereference(vol_b->servers);
+
+ for (i = 0; i < AFS_MAXTYPES; i++)
+ if (la->vids[i] != lb->vids[i])
+ return 0;
+
+ while (a < la->nr_servers && b < lb->nr_servers) {
+ const struct afs_server *server_a = la->servers[a].server;
+ const struct afs_server *server_b = lb->servers[b].server;
+ int diff = memcmp(&server_a->uuid, &server_b->uuid, sizeof(uuid_t));
+
+ if (diff < 0) {
+ a++;
+ } else if (diff > 0) {
+ b++;
+ } else {
+ uuid_matches++;
+ addr_matches += afs_compare_fs_alists(server_a, server_b);
+ a++;
+ b++;
+ }
+ }
+
+ _leave(" = %d [um %d]", addr_matches, uuid_matches);
+ return addr_matches;
+}
+
+/*
+ * Compare root.cell volumes.
+ */
+static int afs_compare_cell_roots(struct afs_cell *cell)
+{
+ struct afs_cell *p;
+
+ _enter("");
+
+ rcu_read_lock();
+
+ hlist_for_each_entry_rcu(p, &cell->net->proc_cells, proc_link) {
+ if (p == cell || p->alias_of)
+ continue;
+ if (!p->root_volume)
+ continue; /* Ignore cells that don't have a root.cell volume. */
+
+ if (afs_compare_volume_slists(cell->root_volume, p->root_volume) != 0)
+ goto is_alias;
+ }
+
+ rcu_read_unlock();
+ _leave(" = 0");
+ return 0;
+
+is_alias:
+ rcu_read_unlock();
+ cell->alias_of = afs_use_cell(p, afs_cell_trace_use_alias);
+ return 1;
+}
+
+/*
+ * Query the new cell for a volume from a cell we're already using.
+ */
+static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key,
+ struct afs_cell *p)
+{
+ struct afs_volume *volume, *pvol = NULL;
+ int ret;
+
+ /* Arbitrarily pick a volume from the list. */
+ read_seqlock_excl(&p->volume_lock);
+ if (!RB_EMPTY_ROOT(&p->volumes))
+ pvol = afs_get_volume(rb_entry(p->volumes.rb_node,
+ struct afs_volume, cell_node),
+ afs_volume_trace_get_query_alias);
+ read_sequnlock_excl(&p->volume_lock);
+ if (!pvol)
+ return 0;
+
+ _enter("%s:%s", cell->name, pvol->name);
+
+ /* And see if it's in the new cell. */
+ volume = afs_sample_volume(cell, key, pvol->name, pvol->name_len);
+ if (IS_ERR(volume)) {
+ afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias);
+ if (PTR_ERR(volume) != -ENOMEDIUM)
+ return PTR_ERR(volume);
+ /* That volume is not in the new cell, so not an alias */
+ return 0;
+ }
+
+ /* The new cell has a like-named volume also - compare volume ID,
+ * server and address lists.
+ */
+ ret = 0;
+ if (pvol->vid == volume->vid) {
+ rcu_read_lock();
+ if (afs_compare_volume_slists(volume, pvol))
+ ret = 1;
+ rcu_read_unlock();
+ }
+
+ afs_put_volume(cell->net, volume, afs_volume_trace_put_query_alias);
+ afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias);
+ return ret;
+}
+
+/*
+ * Query the new cell for volumes we know exist in cells we're already using.
+ */
+static int afs_query_for_alias(struct afs_cell *cell, struct key *key)
+{
+ struct afs_cell *p;
+
+ _enter("%s", cell->name);
+
+ if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0)
+ return -ERESTARTSYS;
+
+ hlist_for_each_entry(p, &cell->net->proc_cells, proc_link) {
+ if (p == cell || p->alias_of)
+ continue;
+ if (RB_EMPTY_ROOT(&p->volumes))
+ continue;
+ if (p->root_volume)
+ continue; /* Ignore cells that have a root.cell volume. */
+ afs_use_cell(p, afs_cell_trace_use_check_alias);
+ mutex_unlock(&cell->net->proc_cells_lock);
+
+ if (afs_query_for_alias_one(cell, key, p) != 0)
+ goto is_alias;
+
+ if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) {
+ afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
+ return -ERESTARTSYS;
+ }
+
+ afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
+ }
+
+ mutex_unlock(&cell->net->proc_cells_lock);
+ _leave(" = 0");
+ return 0;
+
+is_alias:
+ cell->alias_of = p; /* Transfer our ref */
+ return 1;
+}
+
+/*
+ * Look up a VLDB record for a volume.
+ */
+static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key)
+{
+ struct afs_vl_cursor vc;
+ char *cell_name = ERR_PTR(-EDESTADDRREQ);
+ bool skipped = false, not_skipped = false;
+ int ret;
+
+ if (!afs_begin_vlserver_operation(&vc, cell, key))
+ return ERR_PTR(-ERESTARTSYS);
+
+ while (afs_select_vlserver(&vc)) {
+ if (!test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) {
+ vc.ac.error = -EOPNOTSUPP;
+ skipped = true;
+ continue;
+ }
+ not_skipped = true;
+ cell_name = afs_yfsvl_get_cell_name(&vc);
+ }
+
+ ret = afs_end_vlserver_operation(&vc);
+ if (skipped && !not_skipped)
+ ret = -EOPNOTSUPP;
+ return ret < 0 ? ERR_PTR(ret) : cell_name;
+}
+
+static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key)
+{
+ struct afs_cell *master;
+ char *cell_name;
+
+ cell_name = afs_vl_get_cell_name(cell, key);
+ if (IS_ERR(cell_name))
+ return PTR_ERR(cell_name);
+
+ if (strcmp(cell_name, cell->name) == 0) {
+ kfree(cell_name);
+ return 0;
+ }
+
+ master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name),
+ NULL, false);
+ kfree(cell_name);
+ if (IS_ERR(master))
+ return PTR_ERR(master);
+
+ cell->alias_of = master; /* Transfer our ref */
+ return 1;
+}
+
+static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key)
+{
+ struct afs_volume *root_volume;
+ int ret;
+
+ _enter("%s", cell->name);
+
+ ret = yfs_check_canonical_cell_name(cell, key);
+ if (ret != -EOPNOTSUPP)
+ return ret;
+
+ /* Try and get the root.cell volume for comparison with other cells */
+ root_volume = afs_sample_volume(cell, key, "root.cell", 9);
+ if (!IS_ERR(root_volume)) {
+ cell->root_volume = root_volume;
+ return afs_compare_cell_roots(cell);
+ }
+
+ if (PTR_ERR(root_volume) != -ENOMEDIUM)
+ return PTR_ERR(root_volume);
+
+ /* Okay, this cell doesn't have an root.cell volume. We need to
+ * locate some other random volume and use that to check.
+ */
+ return afs_query_for_alias(cell, key);
+}
+
+/*
+ * Check to see if a new cell is an alias of a cell we already have. At this
+ * point we have the cell's volume server list.
+ *
+ * Returns 0 if we didn't detect an alias, 1 if we found an alias and an error
+ * if we had problems gathering the data required. In the case the we did
+ * detect an alias, cell->alias_of is set to point to the assumed master.
+ */
+int afs_cell_detect_alias(struct afs_cell *cell, struct key *key)
+{
+ struct afs_net *net = cell->net;
+ int ret;
+
+ if (mutex_lock_interruptible(&net->cells_alias_lock) < 0)
+ return -ERESTARTSYS;
+
+ if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &cell->flags)) {
+ ret = afs_do_cell_detect_alias(cell, key);
+ if (ret >= 0)
+ clear_bit_unlock(AFS_CELL_FL_CHECK_ALIAS, &cell->flags);
+ } else {
+ ret = cell->alias_of ? 1 : 0;
+ }
+
+ mutex_unlock(&net->cells_alias_lock);
+
+ if (ret == 1)
+ pr_notice("kAFS: Cell %s is an alias of %s\n",
+ cell->name, cell->alias_of->name);
+ return ret;
+}
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
new file mode 100644
index 000000000..acc482161
--- /dev/null
+++ b/fs/afs/vl_list.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS vlserver list management.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
+ unsigned short port)
+{
+ struct afs_vlserver *vlserver;
+
+ vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
+ GFP_KERNEL);
+ if (vlserver) {
+ refcount_set(&vlserver->ref, 1);
+ rwlock_init(&vlserver->lock);
+ init_waitqueue_head(&vlserver->probe_wq);
+ spin_lock_init(&vlserver->probe_lock);
+ vlserver->rtt = UINT_MAX;
+ vlserver->name_len = name_len;
+ vlserver->port = port;
+ memcpy(vlserver->name, name, name_len);
+ }
+ return vlserver;
+}
+
+static void afs_vlserver_rcu(struct rcu_head *rcu)
+{
+ struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
+
+ afs_put_addrlist(rcu_access_pointer(vlserver->addresses));
+ kfree_rcu(vlserver, rcu);
+}
+
+void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver)
+{
+ if (vlserver &&
+ refcount_dec_and_test(&vlserver->ref))
+ call_rcu(&vlserver->rcu, afs_vlserver_rcu);
+}
+
+struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
+{
+ struct afs_vlserver_list *vllist;
+
+ vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL);
+ if (vllist) {
+ refcount_set(&vllist->ref, 1);
+ rwlock_init(&vllist->lock);
+ }
+
+ return vllist;
+}
+
+void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist)
+{
+ if (vllist) {
+ if (refcount_dec_and_test(&vllist->ref)) {
+ int i;
+
+ for (i = 0; i < vllist->nr_servers; i++) {
+ afs_put_vlserver(net, vllist->servers[i].server);
+ }
+ kfree_rcu(vllist, rcu);
+ }
+ }
+}
+
+static u16 afs_extract_le16(const u8 **_b)
+{
+ u16 val;
+
+ val = (u16)*(*_b)++ << 0;
+ val |= (u16)*(*_b)++ << 8;
+ return val;
+}
+
+/*
+ * Build a VL server address list from a DNS queried server list.
+ */
+static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
+ u8 nr_addrs, u16 port)
+{
+ struct afs_addr_list *alist;
+ const u8 *b = *_b;
+ int ret = -EINVAL;
+
+ alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port);
+ if (!alist)
+ return ERR_PTR(-ENOMEM);
+ if (nr_addrs == 0)
+ return alist;
+
+ for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) {
+ struct dns_server_list_v1_address hdr;
+ __be32 x[4];
+
+ hdr.address_type = *b++;
+
+ switch (hdr.address_type) {
+ case DNS_ADDRESS_IS_IPV4:
+ if (end - b < 4) {
+ _leave(" = -EINVAL [short inet]");
+ goto error;
+ }
+ memcpy(x, b, 4);
+ afs_merge_fs_addr4(alist, x[0], port);
+ b += 4;
+ break;
+
+ case DNS_ADDRESS_IS_IPV6:
+ if (end - b < 16) {
+ _leave(" = -EINVAL [short inet6]");
+ goto error;
+ }
+ memcpy(x, b, 16);
+ afs_merge_fs_addr6(alist, x, port);
+ b += 16;
+ break;
+
+ default:
+ _leave(" = -EADDRNOTAVAIL [unknown af %u]",
+ hdr.address_type);
+ ret = -EADDRNOTAVAIL;
+ goto error;
+ }
+ }
+
+ /* Start with IPv6 if available. */
+ if (alist->nr_ipv4 < alist->nr_addrs)
+ alist->preferred = alist->nr_ipv4;
+
+ *_b = b;
+ return alist;
+
+error:
+ *_b = b;
+ afs_put_addrlist(alist);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Build a VL server list from a DNS queried server list.
+ */
+struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
+ const void *buffer,
+ size_t buffer_size)
+{
+ const struct dns_server_list_v1_header *hdr = buffer;
+ struct dns_server_list_v1_server bs;
+ struct afs_vlserver_list *vllist, *previous;
+ struct afs_addr_list *addrs;
+ struct afs_vlserver *server;
+ const u8 *b = buffer, *end = buffer + buffer_size;
+ int ret = -ENOMEM, nr_servers, i, j;
+
+ _enter("");
+
+ /* Check that it's a server list, v1 */
+ if (end - b < sizeof(*hdr) ||
+ hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST ||
+ hdr->hdr.version != 1) {
+ pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n",
+ hdr->hdr.content, hdr->hdr.version, end - b);
+ ret = -EDESTADDRREQ;
+ goto dump;
+ }
+
+ nr_servers = hdr->nr_servers;
+
+ vllist = afs_alloc_vlserver_list(nr_servers);
+ if (!vllist)
+ return ERR_PTR(-ENOMEM);
+
+ vllist->source = (hdr->source < NR__dns_record_source) ?
+ hdr->source : NR__dns_record_source;
+ vllist->status = (hdr->status < NR__dns_lookup_status) ?
+ hdr->status : NR__dns_lookup_status;
+
+ read_lock(&cell->vl_servers_lock);
+ previous = afs_get_vlserverlist(
+ rcu_dereference_protected(cell->vl_servers,
+ lockdep_is_held(&cell->vl_servers_lock)));
+ read_unlock(&cell->vl_servers_lock);
+
+ b += sizeof(*hdr);
+ while (end - b >= sizeof(bs)) {
+ bs.name_len = afs_extract_le16(&b);
+ bs.priority = afs_extract_le16(&b);
+ bs.weight = afs_extract_le16(&b);
+ bs.port = afs_extract_le16(&b);
+ bs.source = *b++;
+ bs.status = *b++;
+ bs.protocol = *b++;
+ bs.nr_addrs = *b++;
+
+ _debug("extract %u %u %u %u %u %u %*.*s",
+ bs.name_len, bs.priority, bs.weight,
+ bs.port, bs.protocol, bs.nr_addrs,
+ bs.name_len, bs.name_len, b);
+
+ if (end - b < bs.name_len)
+ break;
+
+ ret = -EPROTONOSUPPORT;
+ if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) {
+ bs.protocol = DNS_SERVER_PROTOCOL_UDP;
+ } else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) {
+ _leave(" = [proto %u]", bs.protocol);
+ goto error;
+ }
+
+ if (bs.port == 0)
+ bs.port = AFS_VL_PORT;
+ if (bs.source > NR__dns_record_source)
+ bs.source = NR__dns_record_source;
+ if (bs.status > NR__dns_lookup_status)
+ bs.status = NR__dns_lookup_status;
+
+ /* See if we can update an old server record */
+ server = NULL;
+ for (i = 0; i < previous->nr_servers; i++) {
+ struct afs_vlserver *p = previous->servers[i].server;
+
+ if (p->name_len == bs.name_len &&
+ p->port == bs.port &&
+ strncasecmp(b, p->name, bs.name_len) == 0) {
+ server = afs_get_vlserver(p);
+ break;
+ }
+ }
+
+ if (!server) {
+ ret = -ENOMEM;
+ server = afs_alloc_vlserver(b, bs.name_len, bs.port);
+ if (!server)
+ goto error;
+ }
+
+ b += bs.name_len;
+
+ /* Extract the addresses - note that we can't skip this as we
+ * have to advance the payload pointer.
+ */
+ addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port);
+ if (IS_ERR(addrs)) {
+ ret = PTR_ERR(addrs);
+ goto error_2;
+ }
+
+ if (vllist->nr_servers >= nr_servers) {
+ _debug("skip %u >= %u", vllist->nr_servers, nr_servers);
+ afs_put_addrlist(addrs);
+ afs_put_vlserver(cell->net, server);
+ continue;
+ }
+
+ addrs->source = bs.source;
+ addrs->status = bs.status;
+
+ if (addrs->nr_addrs == 0) {
+ afs_put_addrlist(addrs);
+ if (!rcu_access_pointer(server->addresses)) {
+ afs_put_vlserver(cell->net, server);
+ continue;
+ }
+ } else {
+ struct afs_addr_list *old = addrs;
+
+ write_lock(&server->lock);
+ old = rcu_replace_pointer(server->addresses, old,
+ lockdep_is_held(&server->lock));
+ write_unlock(&server->lock);
+ afs_put_addrlist(old);
+ }
+
+
+ /* TODO: Might want to check for duplicates */
+
+ /* Insertion-sort by priority and weight */
+ for (j = 0; j < vllist->nr_servers; j++) {
+ if (bs.priority < vllist->servers[j].priority)
+ break; /* Lower preferable */
+ if (bs.priority == vllist->servers[j].priority &&
+ bs.weight > vllist->servers[j].weight)
+ break; /* Higher preferable */
+ }
+
+ if (j < vllist->nr_servers) {
+ memmove(vllist->servers + j + 1,
+ vllist->servers + j,
+ (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
+ }
+
+ clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+
+ vllist->servers[j].priority = bs.priority;
+ vllist->servers[j].weight = bs.weight;
+ vllist->servers[j].server = server;
+ vllist->nr_servers++;
+ }
+
+ if (b != end) {
+ _debug("parse error %zd", b - end);
+ goto error;
+ }
+
+ afs_put_vlserverlist(cell->net, previous);
+ _leave(" = ok [%u]", vllist->nr_servers);
+ return vllist;
+
+error_2:
+ afs_put_vlserver(cell->net, server);
+error:
+ afs_put_vlserverlist(cell->net, vllist);
+ afs_put_vlserverlist(cell->net, previous);
+dump:
+ if (ret != -ENOMEM) {
+ printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer);
+ print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size);
+ }
+ return ERR_PTR(ret);
+}
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
new file mode 100644
index 000000000..58452b86e
--- /dev/null
+++ b/fs/afs/vl_probe.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS vlserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+
+/*
+ * Handle the completion of a set of probes.
+ */
+static void afs_finished_vl_probe(struct afs_vlserver *server)
+{
+ if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) {
+ server->rtt = UINT_MAX;
+ clear_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags);
+ }
+
+ clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags);
+ wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING);
+}
+
+/*
+ * Handle the completion of a probe RPC call.
+ */
+static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up)
+{
+ if (atomic_dec_and_test(&server->probe_outstanding)) {
+ afs_finished_vl_probe(server);
+ wake_up = true;
+ }
+
+ if (wake_up)
+ wake_up_all(&server->probe_wq);
+}
+
+/*
+ * Process the result of probing a vlserver. This is called after successful
+ * or failed delivery of an VL.GetCapabilities operation.
+ */
+void afs_vlserver_probe_result(struct afs_call *call)
+{
+ struct afs_addr_list *alist = call->alist;
+ struct afs_vlserver *server = call->vlserver;
+ unsigned int server_index = call->server_index;
+ unsigned int rtt_us = 0;
+ unsigned int index = call->addr_ix;
+ bool have_result = false;
+ int ret = call->error;
+
+ _enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code);
+
+ spin_lock(&server->probe_lock);
+
+ switch (ret) {
+ case 0:
+ server->probe.error = 0;
+ goto responded;
+ case -ECONNABORTED:
+ if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) {
+ server->probe.abort_code = call->abort_code;
+ server->probe.error = ret;
+ }
+ goto responded;
+ case -ENOMEM:
+ case -ENONET:
+ case -EKEYEXPIRED:
+ case -EKEYREVOKED:
+ case -EKEYREJECTED:
+ server->probe.flags |= AFS_VLSERVER_PROBE_LOCAL_FAILURE;
+ if (server->probe.error == 0)
+ server->probe.error = ret;
+ trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail);
+ goto out;
+ case -ECONNRESET: /* Responded, but call expired. */
+ case -ERFKILL:
+ case -EADDRNOTAVAIL:
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -EHOSTDOWN:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
+ default:
+ clear_bit(index, &alist->responded);
+ set_bit(index, &alist->failed);
+ if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) &&
+ (server->probe.error == 0 ||
+ server->probe.error == -ETIMEDOUT ||
+ server->probe.error == -ETIME))
+ server->probe.error = ret;
+ trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail);
+ goto out;
+ }
+
+responded:
+ set_bit(index, &alist->responded);
+ clear_bit(index, &alist->failed);
+
+ if (call->service_id == YFS_VL_SERVICE) {
+ server->probe.flags |= AFS_VLSERVER_PROBE_IS_YFS;
+ set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ } else {
+ server->probe.flags |= AFS_VLSERVER_PROBE_NOT_YFS;
+ if (!(server->probe.flags & AFS_VLSERVER_PROBE_IS_YFS)) {
+ clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ }
+ }
+
+ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
+ if (rtt_us < server->probe.rtt) {
+ server->probe.rtt = rtt_us;
+ server->rtt = rtt_us;
+ alist->preferred = index;
+ }
+
+ smp_wmb(); /* Set rtt before responded. */
+ server->probe.flags |= AFS_VLSERVER_PROBE_RESPONDED;
+ set_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+ set_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags);
+ have_result = true;
+out:
+ spin_unlock(&server->probe_lock);
+
+ _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
+ server_index, index, &alist->addrs[index].transport, rtt_us, ret);
+
+ afs_done_one_vl_probe(server, have_result);
+}
+
+/*
+ * Probe all of a vlserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static bool afs_do_probe_vlserver(struct afs_net *net,
+ struct afs_vlserver *server,
+ struct key *key,
+ unsigned int server_index,
+ struct afs_error *_e)
+{
+ struct afs_addr_cursor ac = {
+ .index = 0,
+ };
+ struct afs_call *call;
+ bool in_progress = false;
+
+ _enter("%s", server->name);
+
+ read_lock(&server->lock);
+ ac.alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->lock));
+ read_unlock(&server->lock);
+
+ atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+ memset(&server->probe, 0, sizeof(server->probe));
+ server->probe.rtt = UINT_MAX;
+
+ for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
+ call = afs_vl_get_capabilities(net, &ac, key, server,
+ server_index);
+ if (!IS_ERR(call)) {
+ afs_put_call(call);
+ in_progress = true;
+ } else {
+ afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
+ afs_done_one_vl_probe(server, false);
+ }
+ }
+
+ return in_progress;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_send_vl_probes(struct afs_net *net, struct key *key,
+ struct afs_vlserver_list *vllist)
+{
+ struct afs_vlserver *server;
+ struct afs_error e;
+ bool in_progress = false;
+ int i;
+
+ e.error = 0;
+ e.responded = false;
+ for (i = 0; i < vllist->nr_servers; i++) {
+ server = vllist->servers[i].server;
+ if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
+ continue;
+
+ if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags) &&
+ afs_do_probe_vlserver(net, server, key, i, &e))
+ in_progress = true;
+ }
+
+ return in_progress ? 0 : e.error;
+}
+
+/*
+ * Wait for the first as-yet untried server to respond.
+ */
+int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
+ unsigned long untried)
+{
+ struct wait_queue_entry *waits;
+ struct afs_vlserver *server;
+ unsigned int rtt = UINT_MAX, rtt_s;
+ bool have_responders = false;
+ int pref = -1, i;
+
+ _enter("%u,%lx", vllist->nr_servers, untried);
+
+ /* Only wait for servers that have a probe outstanding. */
+ for (i = 0; i < vllist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = vllist->servers[i].server;
+ if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+ __clear_bit(i, &untried);
+ if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)
+ have_responders = true;
+ }
+ }
+ if (have_responders || !untried)
+ return 0;
+
+ waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+ if (!waits)
+ return -ENOMEM;
+
+ for (i = 0; i < vllist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = vllist->servers[i].server;
+ init_waitqueue_entry(&waits[i], current);
+ add_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ for (;;) {
+ bool still_probing = false;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ for (i = 0; i < vllist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = vllist->servers[i].server;
+ if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)
+ goto stop;
+ if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+ still_probing = true;
+ }
+ }
+
+ if (!still_probing || signal_pending(current))
+ goto stop;
+ schedule();
+ }
+
+stop:
+ set_current_state(TASK_RUNNING);
+
+ for (i = 0; i < vllist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = vllist->servers[i].server;
+ rtt_s = READ_ONCE(server->rtt);
+ if (test_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags) &&
+ rtt_s < rtt) {
+ pref = i;
+ rtt = rtt_s;
+ }
+
+ remove_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ kfree(waits);
+
+ if (pref == -1 && signal_pending(current))
+ return -ERESTARTSYS;
+
+ if (pref >= 0)
+ vllist->preferred = pref;
+
+ _leave(" = 0 [%u]", pref);
+ return 0;
+}
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
new file mode 100644
index 000000000..eb415ce56
--- /dev/null
+++ b/fs/afs/vl_rotate.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Handle vlserver selection and rotation.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include "internal.h"
+#include "afs_vl.h"
+
+/*
+ * Begin an operation on a volume location server.
+ */
+bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
+ struct key *key)
+{
+ memset(vc, 0, sizeof(*vc));
+ vc->cell = cell;
+ vc->key = key;
+ vc->error = -EDESTADDRREQ;
+ vc->ac.error = SHRT_MAX;
+
+ if (signal_pending(current)) {
+ vc->error = -EINTR;
+ vc->flags |= AFS_VL_CURSOR_STOP;
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Begin iteration through a server list, starting with the last used server if
+ * possible, or the last recorded good server if not.
+ */
+static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
+{
+ struct afs_cell *cell = vc->cell;
+ unsigned int dns_lookup_count;
+
+ if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
+ cell->dns_expiry <= ktime_get_real_seconds()) {
+ dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
+ set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
+ afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
+
+ if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
+ if (wait_var_event_interruptible(
+ &cell->dns_lookup_count,
+ smp_load_acquire(&cell->dns_lookup_count)
+ != dns_lookup_count) < 0) {
+ vc->error = -ERESTARTSYS;
+ return false;
+ }
+ }
+
+ /* Status load is ordered after lookup counter load */
+ if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
+ pr_warn("No record of cell %s\n", cell->name);
+ vc->error = -ENOENT;
+ return false;
+ }
+
+ if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
+ vc->error = -EDESTADDRREQ;
+ return false;
+ }
+ }
+
+ read_lock(&cell->vl_servers_lock);
+ vc->server_list = afs_get_vlserverlist(
+ rcu_dereference_protected(cell->vl_servers,
+ lockdep_is_held(&cell->vl_servers_lock)));
+ read_unlock(&cell->vl_servers_lock);
+ if (!vc->server_list->nr_servers)
+ return false;
+
+ vc->untried = (1UL << vc->server_list->nr_servers) - 1;
+ vc->index = -1;
+ return true;
+}
+
+/*
+ * Select the vlserver to use. May be called multiple times to rotate
+ * through the vlservers.
+ */
+bool afs_select_vlserver(struct afs_vl_cursor *vc)
+{
+ struct afs_addr_list *alist;
+ struct afs_vlserver *vlserver;
+ struct afs_error e;
+ u32 rtt;
+ int error = vc->ac.error, i;
+
+ _enter("%lx[%d],%lx[%d],%d,%d",
+ vc->untried, vc->index,
+ vc->ac.tried, vc->ac.index,
+ error, vc->ac.abort_code);
+
+ if (vc->flags & AFS_VL_CURSOR_STOP) {
+ _leave(" = f [stopped]");
+ return false;
+ }
+
+ vc->nr_iterations++;
+
+ /* Evaluate the result of the previous operation, if there was one. */
+ switch (error) {
+ case SHRT_MAX:
+ goto start;
+
+ default:
+ case 0:
+ /* Success or local failure. Stop. */
+ vc->error = error;
+ vc->flags |= AFS_VL_CURSOR_STOP;
+ _leave(" = f [okay/local %d]", vc->ac.error);
+ return false;
+
+ case -ECONNABORTED:
+ /* The far side rejected the operation on some grounds. This
+ * might involve the server being busy or the volume having been moved.
+ */
+ switch (vc->ac.abort_code) {
+ case AFSVL_IO:
+ case AFSVL_BADVOLOPER:
+ case AFSVL_NOMEM:
+ /* The server went weird. */
+ vc->error = -EREMOTEIO;
+ //write_lock(&vc->cell->vl_servers_lock);
+ //vc->server_list->weird_mask |= 1 << vc->index;
+ //write_unlock(&vc->cell->vl_servers_lock);
+ goto next_server;
+
+ default:
+ vc->error = afs_abort_to_error(vc->ac.abort_code);
+ goto failed;
+ }
+
+ case -ERFKILL:
+ case -EADDRNOTAVAIL:
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -EHOSTDOWN:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
+ _debug("no conn %d", error);
+ vc->error = error;
+ goto iterate_address;
+
+ case -ECONNRESET:
+ _debug("call reset");
+ vc->error = error;
+ vc->flags |= AFS_VL_CURSOR_RETRY;
+ goto next_server;
+
+ case -EOPNOTSUPP:
+ _debug("notsupp");
+ goto next_server;
+ }
+
+restart_from_beginning:
+ _debug("restart");
+ afs_end_cursor(&vc->ac);
+ afs_put_vlserverlist(vc->cell->net, vc->server_list);
+ vc->server_list = NULL;
+ if (vc->flags & AFS_VL_CURSOR_RETRIED)
+ goto failed;
+ vc->flags |= AFS_VL_CURSOR_RETRIED;
+start:
+ _debug("start");
+
+ if (!afs_start_vl_iteration(vc))
+ goto failed;
+
+ error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
+ if (error < 0)
+ goto failed_set_error;
+
+pick_server:
+ _debug("pick [%lx]", vc->untried);
+
+ error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
+ if (error < 0)
+ goto failed_set_error;
+
+ /* Pick the untried server with the lowest RTT. */
+ vc->index = vc->server_list->preferred;
+ if (test_bit(vc->index, &vc->untried))
+ goto selected_server;
+
+ vc->index = -1;
+ rtt = U32_MAX;
+ for (i = 0; i < vc->server_list->nr_servers; i++) {
+ struct afs_vlserver *s = vc->server_list->servers[i].server;
+
+ if (!test_bit(i, &vc->untried) ||
+ !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
+ continue;
+ if (s->probe.rtt < rtt) {
+ vc->index = i;
+ rtt = s->probe.rtt;
+ }
+ }
+
+ if (vc->index == -1)
+ goto no_more_servers;
+
+selected_server:
+ _debug("use %d", vc->index);
+ __clear_bit(vc->index, &vc->untried);
+
+ /* We're starting on a different vlserver from the list. We need to
+ * check it, find its address list and probe its capabilities before we
+ * use it.
+ */
+ ASSERTCMP(vc->ac.alist, ==, NULL);
+ vlserver = vc->server_list->servers[vc->index].server;
+ vc->server = vlserver;
+
+ _debug("USING VLSERVER: %s", vlserver->name);
+
+ read_lock(&vlserver->lock);
+ alist = rcu_dereference_protected(vlserver->addresses,
+ lockdep_is_held(&vlserver->lock));
+ afs_get_addrlist(alist);
+ read_unlock(&vlserver->lock);
+
+ memset(&vc->ac, 0, sizeof(vc->ac));
+
+ if (!vc->ac.alist)
+ vc->ac.alist = alist;
+ else
+ afs_put_addrlist(alist);
+
+ vc->ac.index = -1;
+
+iterate_address:
+ ASSERT(vc->ac.alist);
+ /* Iterate over the current server's address list to try and find an
+ * address on which it will respond to us.
+ */
+ if (!afs_iterate_addresses(&vc->ac))
+ goto next_server;
+
+ _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
+
+ _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
+ return true;
+
+next_server:
+ _debug("next");
+ afs_end_cursor(&vc->ac);
+ goto pick_server;
+
+no_more_servers:
+ /* That's all the servers poked to no good effect. Try again if some
+ * of them were busy.
+ */
+ if (vc->flags & AFS_VL_CURSOR_RETRY)
+ goto restart_from_beginning;
+
+ e.error = -EDESTADDRREQ;
+ e.responded = false;
+ for (i = 0; i < vc->server_list->nr_servers; i++) {
+ struct afs_vlserver *s = vc->server_list->servers[i].server;
+
+ if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
+ e.responded = true;
+ afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+ s->probe.abort_code);
+ }
+
+ error = e.error;
+
+failed_set_error:
+ vc->error = error;
+failed:
+ vc->flags |= AFS_VL_CURSOR_STOP;
+ afs_end_cursor(&vc->ac);
+ _leave(" = f [failed %d]", vc->error);
+ return false;
+}
+
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
+{
+ struct afs_cell *cell = vc->cell;
+ static int count;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+ return;
+ count++;
+
+ rcu_read_lock();
+ pr_notice("EDESTADDR occurred\n");
+ pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
+ pr_notice("DNS: src=%u st=%u lc=%x\n",
+ cell->dns_source, cell->dns_status, cell->dns_lookup_count);
+ pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
+ vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
+
+ if (vc->server_list) {
+ const struct afs_vlserver_list *sl = vc->server_list;
+ pr_notice("VC: SL nr=%u ix=%u\n",
+ sl->nr_servers, sl->index);
+ for (i = 0; i < sl->nr_servers; i++) {
+ const struct afs_vlserver *s = sl->servers[i].server;
+ pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
+ s->name, s->port, s->flags, s->probe.error);
+ if (s->addresses) {
+ const struct afs_addr_list *a =
+ rcu_dereference(s->addresses);
+ pr_notice("VC: - nr=%u/%u/%u pf=%u\n",
+ a->nr_ipv4, a->nr_addrs, a->max_addrs,
+ a->preferred);
+ pr_notice("VC: - R=%lx F=%lx\n",
+ a->responded, a->failed);
+ if (a == vc->ac.alist)
+ pr_notice("VC: - current\n");
+ }
+ }
+ }
+
+ pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
+ vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
+ vc->ac.responded, vc->ac.nr_iterations);
+ rcu_read_unlock();
+}
+
+/*
+ * Tidy up a volume location server cursor and unlock the vnode.
+ */
+int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
+{
+ struct afs_net *net = vc->cell->net;
+
+ if (vc->error == -EDESTADDRREQ ||
+ vc->error == -EADDRNOTAVAIL ||
+ vc->error == -ENETUNREACH ||
+ vc->error == -EHOSTUNREACH)
+ afs_vl_dump_edestaddrreq(vc);
+
+ afs_end_cursor(&vc->ac);
+ afs_put_vlserverlist(net, vc->server_list);
+
+ if (vc->error == -ECONNABORTED)
+ vc->error = afs_abort_to_error(vc->ac.abort_code);
+
+ return vc->error;
+}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
new file mode 100644
index 000000000..00fca3c66
--- /dev/null
+++ b/fs/afs/vlclient.c
@@ -0,0 +1,759 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS Volume Location Service client
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include "afs_fs.h"
+#include "internal.h"
+
+/*
+ * Deliver reply data to a VL.GetEntryByNameU call.
+ */
+static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
+{
+ struct afs_uvldbentry__xdr *uvldb;
+ struct afs_vldb_entry *entry;
+ bool new_only = false;
+ u32 tmp, nr_servers, vlflags;
+ int i, ret;
+
+ _enter("");
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ uvldb = call->buffer;
+ entry = call->ret_vldb;
+
+ nr_servers = ntohl(uvldb->nServers);
+ if (nr_servers > AFS_NMAXNSERVERS)
+ nr_servers = AFS_NMAXNSERVERS;
+
+ for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++)
+ entry->name[i] = (u8)ntohl(uvldb->name[i]);
+ entry->name[i] = 0;
+ entry->name_len = strlen(entry->name);
+
+ /* If there is a new replication site that we can use, ignore all the
+ * sites that aren't marked as new.
+ */
+ for (i = 0; i < nr_servers; i++) {
+ tmp = ntohl(uvldb->serverFlags[i]);
+ if (!(tmp & AFS_VLSF_DONTUSE) &&
+ (tmp & AFS_VLSF_NEWREPSITE))
+ new_only = true;
+ }
+
+ vlflags = ntohl(uvldb->flags);
+ for (i = 0; i < nr_servers; i++) {
+ struct afs_uuid__xdr *xdr;
+ struct afs_uuid *uuid;
+ int j;
+ int n = entry->nr_servers;
+
+ tmp = ntohl(uvldb->serverFlags[i]);
+ if (tmp & AFS_VLSF_DONTUSE ||
+ (new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
+ continue;
+ if (tmp & AFS_VLSF_RWVOL) {
+ entry->fs_mask[n] |= AFS_VOL_VTM_RW;
+ if (vlflags & AFS_VLF_BACKEXISTS)
+ entry->fs_mask[n] |= AFS_VOL_VTM_BAK;
+ }
+ if (tmp & AFS_VLSF_ROVOL)
+ entry->fs_mask[n] |= AFS_VOL_VTM_RO;
+ if (!entry->fs_mask[n])
+ continue;
+
+ xdr = &uvldb->serverNumber[i];
+ uuid = (struct afs_uuid *)&entry->fs_server[n];
+ uuid->time_low = xdr->time_low;
+ uuid->time_mid = htons(ntohl(xdr->time_mid));
+ uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version));
+ uuid->clock_seq_hi_and_reserved = (u8)ntohl(xdr->clock_seq_hi_and_reserved);
+ uuid->clock_seq_low = (u8)ntohl(xdr->clock_seq_low);
+ for (j = 0; j < 6; j++)
+ uuid->node[j] = (u8)ntohl(xdr->node[j]);
+
+ entry->addr_version[n] = ntohl(uvldb->serverUnique[i]);
+ entry->nr_servers++;
+ }
+
+ for (i = 0; i < AFS_MAXTYPES; i++)
+ entry->vid[i] = ntohl(uvldb->volumeId[i]);
+
+ if (vlflags & AFS_VLF_RWEXISTS)
+ __set_bit(AFS_VLDB_HAS_RW, &entry->flags);
+ if (vlflags & AFS_VLF_ROEXISTS)
+ __set_bit(AFS_VLDB_HAS_RO, &entry->flags);
+ if (vlflags & AFS_VLF_BACKEXISTS)
+ __set_bit(AFS_VLDB_HAS_BAK, &entry->flags);
+
+ if (!(vlflags & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
+ entry->error = -ENOMEDIUM;
+ __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags);
+ }
+
+ __set_bit(AFS_VLDB_QUERY_VALID, &entry->flags);
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call)
+{
+ kfree(call->ret_vldb);
+ afs_flat_call_destructor(call);
+}
+
+/*
+ * VL.GetEntryByNameU operation type.
+ */
+static const struct afs_call_type afs_RXVLGetEntryByNameU = {
+ .name = "VL.GetEntryByNameU",
+ .op = afs_VL_GetEntryByNameU,
+ .deliver = afs_deliver_vl_get_entry_by_name_u,
+ .destructor = afs_destroy_vl_get_entry_by_name_u,
+};
+
+/*
+ * Dispatch a get volume entry by name or ID operation (uuid variant). If the
+ * volname is a decimal number then it's a volume ID not a volume name.
+ */
+struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
+ const char *volname,
+ int volnamesz)
+{
+ struct afs_vldb_entry *entry;
+ struct afs_call *call;
+ struct afs_net *net = vc->cell->net;
+ size_t reqsz, padsz;
+ __be32 *bp;
+
+ _enter("");
+
+ padsz = (4 - (volnamesz & 3)) & 3;
+ reqsz = 8 + volnamesz + padsz;
+
+ entry = kzalloc(sizeof(struct afs_vldb_entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByNameU, reqsz,
+ sizeof(struct afs_uvldbentry__xdr));
+ if (!call) {
+ kfree(entry);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ call->key = vc->key;
+ call->ret_vldb = entry;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+
+ /* Marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(VLGETENTRYBYNAMEU);
+ *bp++ = htonl(volnamesz);
+ memcpy(bp, volname, volnamesz);
+ if (padsz > 0)
+ memset((void *)bp + volnamesz, 0, padsz);
+
+ trace_afs_make_vl_call(call);
+ afs_make_call(&vc->ac, call, GFP_KERNEL);
+ return (struct afs_vldb_entry *)afs_wait_for_call_to_complete(call, &vc->ac);
+}
+
+/*
+ * Deliver reply data to a VL.GetAddrsU call.
+ *
+ * GetAddrsU(IN ListAddrByAttributes *inaddr,
+ * OUT afsUUID *uuidp1,
+ * OUT uint32_t *uniquifier,
+ * OUT uint32_t *nentries,
+ * OUT bulkaddrs *blkaddrs);
+ */
+static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
+{
+ struct afs_addr_list *alist;
+ __be32 *bp;
+ u32 uniquifier, nentries, count;
+ int i, ret;
+
+ _enter("{%u,%zu/%u}",
+ call->unmarshall, iov_iter_count(call->iter), call->count);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_buf(call,
+ sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
+ call->unmarshall++;
+
+ /* Extract the returned uuid, uniquifier, nentries and
+ * blkaddrs size */
+ fallthrough;
+ case 1:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer + sizeof(struct afs_uuid__xdr);
+ uniquifier = ntohl(*bp++);
+ nentries = ntohl(*bp++);
+ count = ntohl(*bp);
+
+ nentries = min(nentries, count);
+ alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT);
+ if (!alist)
+ return -ENOMEM;
+ alist->version = uniquifier;
+ call->ret_alist = alist;
+ call->count = count;
+ call->count2 = nentries;
+ call->unmarshall++;
+
+ more_entries:
+ count = min(call->count, 4U);
+ afs_extract_to_buf(call, count * sizeof(__be32));
+
+ fallthrough; /* and extract entries */
+ case 2:
+ ret = afs_extract_data(call, call->count > 4);
+ if (ret < 0)
+ return ret;
+
+ alist = call->ret_alist;
+ bp = call->buffer;
+ count = min(call->count, 4U);
+ for (i = 0; i < count; i++)
+ if (alist->nr_addrs < call->count2)
+ afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
+
+ call->count -= count;
+ if (call->count > 0)
+ goto more_entries;
+ call->unmarshall++;
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+static void afs_vl_get_addrs_u_destructor(struct afs_call *call)
+{
+ afs_put_addrlist(call->ret_alist);
+ return afs_flat_call_destructor(call);
+}
+
+/*
+ * VL.GetAddrsU operation type.
+ */
+static const struct afs_call_type afs_RXVLGetAddrsU = {
+ .name = "VL.GetAddrsU",
+ .op = afs_VL_GetAddrsU,
+ .deliver = afs_deliver_vl_get_addrs_u,
+ .destructor = afs_vl_get_addrs_u_destructor,
+};
+
+/*
+ * Dispatch an operation to get the addresses for a server, where the server is
+ * nominated by UUID.
+ */
+struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
+ const uuid_t *uuid)
+{
+ struct afs_ListAddrByAttributes__xdr *r;
+ const struct afs_uuid *u = (const struct afs_uuid *)uuid;
+ struct afs_call *call;
+ struct afs_net *net = vc->cell->net;
+ __be32 *bp;
+ int i;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_RXVLGetAddrsU,
+ sizeof(__be32) + sizeof(struct afs_ListAddrByAttributes__xdr),
+ sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+
+ call->key = vc->key;
+ call->ret_alist = NULL;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+
+ /* Marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(VLGETADDRSU);
+ r = (struct afs_ListAddrByAttributes__xdr *)bp;
+ r->Mask = htonl(AFS_VLADDR_UUID);
+ r->ipaddr = 0;
+ r->index = 0;
+ r->spare = 0;
+ r->uuid.time_low = u->time_low;
+ r->uuid.time_mid = htonl(ntohs(u->time_mid));
+ r->uuid.time_hi_and_version = htonl(ntohs(u->time_hi_and_version));
+ r->uuid.clock_seq_hi_and_reserved = htonl(u->clock_seq_hi_and_reserved);
+ r->uuid.clock_seq_low = htonl(u->clock_seq_low);
+ for (i = 0; i < 6; i++)
+ r->uuid.node[i] = htonl(u->node[i]);
+
+ trace_afs_make_vl_call(call);
+ afs_make_call(&vc->ac, call, GFP_KERNEL);
+ return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac);
+}
+
+/*
+ * Deliver reply data to an VL.GetCapabilities operation.
+ */
+static int afs_deliver_vl_get_capabilities(struct afs_call *call)
+{
+ u32 count;
+ int ret;
+
+ _enter("{%u,%zu/%u}",
+ call->unmarshall, iov_iter_count(call->iter), call->count);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ fallthrough; /* and extract the capabilities word count */
+ case 1:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ count = ntohl(call->tmp);
+ call->count = count;
+ call->count2 = count;
+
+ call->unmarshall++;
+ afs_extract_discard(call, count * sizeof(__be32));
+
+ fallthrough; /* and extract capabilities words */
+ case 2:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ /* TODO: Examine capabilities */
+
+ call->unmarshall++;
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+static void afs_destroy_vl_get_capabilities(struct afs_call *call)
+{
+ afs_put_vlserver(call->net, call->vlserver);
+ afs_flat_call_destructor(call);
+}
+
+/*
+ * VL.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_RXVLGetCapabilities = {
+ .name = "VL.GetCapabilities",
+ .op = afs_VL_GetCapabilities,
+ .deliver = afs_deliver_vl_get_capabilities,
+ .done = afs_vlserver_probe_result,
+ .destructor = afs_destroy_vl_get_capabilities,
+};
+
+/*
+ * Probe a volume server for the capabilities that it supports. This can
+ * return up to 196 words.
+ *
+ * We use this to probe for service upgrade to determine what the server at the
+ * other end supports.
+ */
+struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
+ struct afs_addr_cursor *ac,
+ struct key *key,
+ struct afs_vlserver *server,
+ unsigned int server_index)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4);
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+
+ call->key = key;
+ call->vlserver = afs_get_vlserver(server);
+ call->server_index = server_index;
+ call->upgrade = true;
+ call->async = true;
+ call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(VLGETCAPABILITIES);
+
+ /* Can't take a ref on server */
+ trace_afs_make_vl_call(call);
+ afs_make_call(ac, call, GFP_KERNEL);
+ return call;
+}
+
+/*
+ * Deliver reply data to a YFSVL.GetEndpoints call.
+ *
+ * GetEndpoints(IN yfsServerAttributes *attr,
+ * OUT opr_uuid *uuid,
+ * OUT afs_int32 *uniquifier,
+ * OUT endpoints *fsEndpoints,
+ * OUT endpoints *volEndpoints)
+ */
+static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
+{
+ struct afs_addr_list *alist;
+ __be32 *bp;
+ u32 uniquifier, size;
+ int ret;
+
+ _enter("{%u,%zu,%u}",
+ call->unmarshall, iov_iter_count(call->iter), call->count2);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32));
+ call->unmarshall = 1;
+
+ /* Extract the returned uuid, uniquifier, fsEndpoints count and
+ * either the first fsEndpoint type or the volEndpoints
+ * count if there are no fsEndpoints. */
+ fallthrough;
+ case 1:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer + sizeof(uuid_t);
+ uniquifier = ntohl(*bp++);
+ call->count = ntohl(*bp++);
+ call->count2 = ntohl(*bp); /* Type or next count */
+
+ if (call->count > YFS_MAXENDPOINTS)
+ return afs_protocol_error(call, afs_eproto_yvl_fsendpt_num);
+
+ alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
+ if (!alist)
+ return -ENOMEM;
+ alist->version = uniquifier;
+ call->ret_alist = alist;
+
+ if (call->count == 0)
+ goto extract_volendpoints;
+
+ next_fsendpoint:
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ size = sizeof(__be32) * (1 + 1 + 1);
+ break;
+ case YFS_ENDPOINT_IPV6:
+ size = sizeof(__be32) * (1 + 4 + 1);
+ break;
+ default:
+ return afs_protocol_error(call, afs_eproto_yvl_fsendpt_type);
+ }
+
+ size += sizeof(__be32);
+ afs_extract_to_buf(call, size);
+ call->unmarshall = 2;
+
+ fallthrough; /* and extract fsEndpoints[] entries */
+ case 2:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ alist = call->ret_alist;
+ bp = call->buffer;
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ if (ntohl(bp[0]) != sizeof(__be32) * 2)
+ return afs_protocol_error(
+ call, afs_eproto_yvl_fsendpt4_len);
+ afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
+ bp += 3;
+ break;
+ case YFS_ENDPOINT_IPV6:
+ if (ntohl(bp[0]) != sizeof(__be32) * 5)
+ return afs_protocol_error(
+ call, afs_eproto_yvl_fsendpt6_len);
+ afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
+ bp += 6;
+ break;
+ default:
+ return afs_protocol_error(call, afs_eproto_yvl_fsendpt_type);
+ }
+
+ /* Got either the type of the next entry or the count of
+ * volEndpoints if no more fsEndpoints.
+ */
+ call->count2 = ntohl(*bp++);
+
+ call->count--;
+ if (call->count > 0)
+ goto next_fsendpoint;
+
+ extract_volendpoints:
+ /* Extract the list of volEndpoints. */
+ call->count = call->count2;
+ if (!call->count)
+ goto end;
+ if (call->count > YFS_MAXENDPOINTS)
+ return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type);
+
+ afs_extract_to_buf(call, 1 * sizeof(__be32));
+ call->unmarshall = 3;
+
+ /* Extract the type of volEndpoints[0]. Normally we would
+ * extract the type of the next endpoint when we extract the
+ * data of the current one, but this is the first...
+ */
+ fallthrough;
+ case 3:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+
+ next_volendpoint:
+ call->count2 = ntohl(*bp++);
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ size = sizeof(__be32) * (1 + 1 + 1);
+ break;
+ case YFS_ENDPOINT_IPV6:
+ size = sizeof(__be32) * (1 + 4 + 1);
+ break;
+ default:
+ return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type);
+ }
+
+ if (call->count > 1)
+ size += sizeof(__be32); /* Get next type too */
+ afs_extract_to_buf(call, size);
+ call->unmarshall = 4;
+
+ fallthrough; /* and extract volEndpoints[] entries */
+ case 4:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ if (ntohl(bp[0]) != sizeof(__be32) * 2)
+ return afs_protocol_error(
+ call, afs_eproto_yvl_vlendpt4_len);
+ bp += 3;
+ break;
+ case YFS_ENDPOINT_IPV6:
+ if (ntohl(bp[0]) != sizeof(__be32) * 5)
+ return afs_protocol_error(
+ call, afs_eproto_yvl_vlendpt6_len);
+ bp += 6;
+ break;
+ default:
+ return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type);
+ }
+
+ /* Got either the type of the next entry or the count of
+ * volEndpoints if no more fsEndpoints.
+ */
+ call->count--;
+ if (call->count > 0)
+ goto next_volendpoint;
+
+ end:
+ afs_extract_discard(call, 0);
+ call->unmarshall = 5;
+
+ fallthrough; /* Done */
+ case 5:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+ call->unmarshall = 6;
+ fallthrough;
+
+ case 6:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFSVL.GetEndpoints operation type.
+ */
+static const struct afs_call_type afs_YFSVLGetEndpoints = {
+ .name = "YFSVL.GetEndpoints",
+ .op = afs_YFSVL_GetEndpoints,
+ .deliver = afs_deliver_yfsvl_get_endpoints,
+ .destructor = afs_vl_get_addrs_u_destructor,
+};
+
+/*
+ * Dispatch an operation to get the addresses for a server, where the server is
+ * nominated by UUID.
+ */
+struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
+ const uuid_t *uuid)
+{
+ struct afs_call *call;
+ struct afs_net *net = vc->cell->net;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints,
+ sizeof(__be32) * 2 + sizeof(*uuid),
+ sizeof(struct in6_addr) + sizeof(__be32) * 3);
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+
+ call->key = vc->key;
+ call->ret_alist = NULL;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+
+ /* Marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(YVLGETENDPOINTS);
+ *bp++ = htonl(YFS_SERVER_UUID);
+ memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
+
+ trace_afs_make_vl_call(call);
+ afs_make_call(&vc->ac, call, GFP_KERNEL);
+ return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac);
+}
+
+/*
+ * Deliver reply data to a YFSVL.GetCellName operation.
+ */
+static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
+{
+ char *cell_name;
+ u32 namesz, paddedsz;
+ int ret;
+
+ _enter("{%u,%zu/%u}",
+ call->unmarshall, iov_iter_count(call->iter), call->count);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ fallthrough; /* and extract the cell name length */
+ case 1:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ namesz = ntohl(call->tmp);
+ if (namesz > AFS_MAXCELLNAME)
+ return afs_protocol_error(call, afs_eproto_cellname_len);
+ paddedsz = (namesz + 3) & ~3;
+ call->count = namesz;
+ call->count2 = paddedsz - namesz;
+
+ cell_name = kmalloc(namesz + 1, GFP_KERNEL);
+ if (!cell_name)
+ return -ENOMEM;
+ cell_name[namesz] = 0;
+ call->ret_str = cell_name;
+
+ afs_extract_begin(call, cell_name, namesz);
+ call->unmarshall++;
+
+ fallthrough; /* and extract cell name */
+ case 2:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ afs_extract_discard(call, call->count2);
+ call->unmarshall++;
+
+ fallthrough; /* and extract padding */
+ case 3:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ call->unmarshall++;
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+static void afs_destroy_yfsvl_get_cell_name(struct afs_call *call)
+{
+ kfree(call->ret_str);
+ afs_flat_call_destructor(call);
+}
+
+/*
+ * VL.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_YFSVLGetCellName = {
+ .name = "YFSVL.GetCellName",
+ .op = afs_YFSVL_GetCellName,
+ .deliver = afs_deliver_yfsvl_get_cell_name,
+ .destructor = afs_destroy_yfsvl_get_cell_name,
+};
+
+/*
+ * Probe a volume server for the capabilities that it supports. This can
+ * return up to 196 words.
+ *
+ * We use this to probe for service upgrade to determine what the server at the
+ * other end supports.
+ */
+char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc)
+{
+ struct afs_call *call;
+ struct afs_net *net = vc->cell->net;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_YFSVLGetCellName, 1 * 4, 0);
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+
+ call->key = vc->key;
+ call->ret_str = NULL;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(YVLGETCELLNAME);
+
+ /* Can't take a ref on server */
+ trace_afs_make_vl_call(call);
+ afs_make_call(&vc->ac, call, GFP_KERNEL);
+ return (char *)afs_wait_for_call_to_complete(call, &vc->ac);
+}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
new file mode 100644
index 000000000..1c9144e3e
--- /dev/null
+++ b/fs/afs/volume.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS volume management
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+static unsigned __read_mostly afs_volume_record_life = 60 * 60;
+
+/*
+ * Insert a volume into a cell. If there's an existing volume record, that is
+ * returned instead with a ref held.
+ */
+static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
+ struct afs_volume *volume)
+{
+ struct afs_volume *p;
+ struct rb_node *parent = NULL, **pp;
+
+ write_seqlock(&cell->volume_lock);
+
+ pp = &cell->volumes.rb_node;
+ while (*pp) {
+ parent = *pp;
+ p = rb_entry(parent, struct afs_volume, cell_node);
+ if (p->vid < volume->vid) {
+ pp = &(*pp)->rb_left;
+ } else if (p->vid > volume->vid) {
+ pp = &(*pp)->rb_right;
+ } else {
+ if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
+ volume = p;
+ goto found;
+ }
+
+ set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
+ rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
+ }
+ }
+
+ rb_link_node_rcu(&volume->cell_node, parent, pp);
+ rb_insert_color(&volume->cell_node, &cell->volumes);
+ hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes);
+
+found:
+ write_sequnlock(&cell->volume_lock);
+ return volume;
+
+}
+
+static void afs_remove_volume_from_cell(struct afs_volume *volume)
+{
+ struct afs_cell *cell = volume->cell;
+
+ if (!hlist_unhashed(&volume->proc_link)) {
+ trace_afs_volume(volume->vid, refcount_read(&cell->ref),
+ afs_volume_trace_remove);
+ write_seqlock(&cell->volume_lock);
+ hlist_del_rcu(&volume->proc_link);
+ if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
+ rb_erase(&volume->cell_node, &cell->volumes);
+ write_sequnlock(&cell->volume_lock);
+ }
+}
+
+/*
+ * Allocate a volume record and load it up from a vldb record.
+ */
+static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
+ struct afs_vldb_entry *vldb,
+ unsigned long type_mask)
+{
+ struct afs_server_list *slist;
+ struct afs_volume *volume;
+ int ret = -ENOMEM, nr_servers = 0, i;
+
+ for (i = 0; i < vldb->nr_servers; i++)
+ if (vldb->fs_mask[i] & type_mask)
+ nr_servers++;
+
+ volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
+ if (!volume)
+ goto error_0;
+
+ volume->vid = vldb->vid[params->type];
+ volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
+ volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol);
+ volume->type = params->type;
+ volume->type_force = params->force;
+ volume->name_len = vldb->name_len;
+
+ refcount_set(&volume->ref, 1);
+ INIT_HLIST_NODE(&volume->proc_link);
+ rwlock_init(&volume->servers_lock);
+ rwlock_init(&volume->cb_v_break_lock);
+ memcpy(volume->name, vldb->name, vldb->name_len + 1);
+
+ slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
+ if (IS_ERR(slist)) {
+ ret = PTR_ERR(slist);
+ goto error_1;
+ }
+
+ refcount_set(&slist->usage, 1);
+ rcu_assign_pointer(volume->servers, slist);
+ trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc);
+ return volume;
+
+error_1:
+ afs_put_cell(volume->cell, afs_cell_trace_put_vol);
+ kfree(volume);
+error_0:
+ return ERR_PTR(ret);
+}
+
+/*
+ * Look up or allocate a volume record.
+ */
+static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params,
+ struct afs_vldb_entry *vldb,
+ unsigned long type_mask)
+{
+ struct afs_volume *candidate, *volume;
+
+ candidate = afs_alloc_volume(params, vldb, type_mask);
+ if (IS_ERR(candidate))
+ return candidate;
+
+ volume = afs_insert_volume_into_cell(params->cell, candidate);
+ if (volume != candidate)
+ afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup);
+ return volume;
+}
+
+/*
+ * Look up a VLDB record for a volume.
+ */
+static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
+ struct key *key,
+ const char *volname,
+ size_t volnamesz)
+{
+ struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
+ struct afs_vl_cursor vc;
+ int ret;
+
+ if (!afs_begin_vlserver_operation(&vc, cell, key))
+ return ERR_PTR(-ERESTARTSYS);
+
+ while (afs_select_vlserver(&vc)) {
+ vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
+ }
+
+ ret = afs_end_vlserver_operation(&vc);
+ return ret < 0 ? ERR_PTR(ret) : vldb;
+}
+
+/*
+ * Look up a volume in the VL server and create a candidate volume record for
+ * it.
+ *
+ * The volume name can be one of the following:
+ * "%[cell:]volume[.]" R/W volume
+ * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
+ * or R/W (rwparent=1) volume
+ * "%[cell:]volume.readonly" R/O volume
+ * "#[cell:]volume.readonly" R/O volume
+ * "%[cell:]volume.backup" Backup volume
+ * "#[cell:]volume.backup" Backup volume
+ *
+ * The cell name is optional, and defaults to the current cell.
+ *
+ * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
+ * Guide
+ * - Rule 1: Explicit type suffix forces access of that type or nothing
+ * (no suffix, then use Rule 2 & 3)
+ * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
+ * if not available
+ * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
+ * explicitly told otherwise
+ */
+struct afs_volume *afs_create_volume(struct afs_fs_context *params)
+{
+ struct afs_vldb_entry *vldb;
+ struct afs_volume *volume;
+ unsigned long type_mask = 1UL << params->type;
+
+ vldb = afs_vl_lookup_vldb(params->cell, params->key,
+ params->volname, params->volnamesz);
+ if (IS_ERR(vldb))
+ return ERR_CAST(vldb);
+
+ if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
+ volume = ERR_PTR(vldb->error);
+ goto error;
+ }
+
+ /* Make the final decision on the type we want */
+ volume = ERR_PTR(-ENOMEDIUM);
+ if (params->force) {
+ if (!(vldb->flags & type_mask))
+ goto error;
+ } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
+ params->type = AFSVL_ROVOL;
+ } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
+ params->type = AFSVL_RWVOL;
+ } else {
+ goto error;
+ }
+
+ type_mask = 1UL << params->type;
+ volume = afs_lookup_volume(params, vldb, type_mask);
+
+error:
+ kfree(vldb);
+ return volume;
+}
+
+/*
+ * Destroy a volume record
+ */
+static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
+{
+ _enter("%p", volume);
+
+#ifdef CONFIG_AFS_FSCACHE
+ ASSERTCMP(volume->cache, ==, NULL);
+#endif
+
+ afs_remove_volume_from_cell(volume);
+ afs_put_serverlist(net, rcu_access_pointer(volume->servers));
+ afs_put_cell(volume->cell, afs_cell_trace_put_vol);
+ trace_afs_volume(volume->vid, refcount_read(&volume->ref),
+ afs_volume_trace_free);
+ kfree_rcu(volume, rcu);
+
+ _leave(" [destroyed]");
+}
+
+/*
+ * Try to get a reference on a volume record.
+ */
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
+{
+ int r;
+
+ if (__refcount_inc_not_zero(&volume->ref, &r)) {
+ trace_afs_volume(volume->vid, r + 1, reason);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Get a reference on a volume record.
+ */
+struct afs_volume *afs_get_volume(struct afs_volume *volume,
+ enum afs_volume_trace reason)
+{
+ if (volume) {
+ int r;
+
+ __refcount_inc(&volume->ref, &r);
+ trace_afs_volume(volume->vid, r + 1, reason);
+ }
+ return volume;
+}
+
+
+/*
+ * Drop a reference on a volume record.
+ */
+void afs_put_volume(struct afs_net *net, struct afs_volume *volume,
+ enum afs_volume_trace reason)
+{
+ if (volume) {
+ afs_volid_t vid = volume->vid;
+ bool zero;
+ int r;
+
+ zero = __refcount_dec_and_test(&volume->ref, &r);
+ trace_afs_volume(vid, r - 1, reason);
+ if (zero)
+ afs_destroy_volume(net, volume);
+ }
+}
+
+/*
+ * Activate a volume.
+ */
+int afs_activate_volume(struct afs_volume *volume)
+{
+#ifdef CONFIG_AFS_FSCACHE
+ struct fscache_volume *vcookie;
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "afs,%s,%llx",
+ volume->cell->name, volume->vid);
+ if (!name)
+ return -ENOMEM;
+
+ vcookie = fscache_acquire_volume(name, NULL, NULL, 0);
+ if (IS_ERR(vcookie)) {
+ if (vcookie != ERR_PTR(-EBUSY)) {
+ kfree(name);
+ return PTR_ERR(vcookie);
+ }
+ pr_err("AFS: Cache volume key already in use (%s)\n", name);
+ vcookie = NULL;
+ }
+ volume->cache = vcookie;
+ kfree(name);
+#endif
+ return 0;
+}
+
+/*
+ * Deactivate a volume.
+ */
+void afs_deactivate_volume(struct afs_volume *volume)
+{
+ _enter("%s", volume->name);
+
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_relinquish_volume(volume->cache, NULL,
+ test_bit(AFS_VOLUME_DELETED, &volume->flags));
+ volume->cache = NULL;
+#endif
+
+ _leave("");
+}
+
+/*
+ * Query the VL service to update the volume status.
+ */
+static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
+{
+ struct afs_server_list *new, *old, *discard;
+ struct afs_vldb_entry *vldb;
+ char idbuf[16];
+ int ret, idsz;
+
+ _enter("");
+
+ /* We look up an ID by passing it as a decimal string in the
+ * operation's name parameter.
+ */
+ idsz = sprintf(idbuf, "%llu", volume->vid);
+
+ vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
+ if (IS_ERR(vldb)) {
+ ret = PTR_ERR(vldb);
+ goto error;
+ }
+
+ /* See if the volume got renamed. */
+ if (vldb->name_len != volume->name_len ||
+ memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
+ /* TODO: Use RCU'd string. */
+ memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
+ volume->name_len = vldb->name_len;
+ }
+
+ /* See if the volume's server list got updated. */
+ new = afs_alloc_server_list(volume->cell, key,
+ vldb, (1 << volume->type));
+ if (IS_ERR(new)) {
+ ret = PTR_ERR(new);
+ goto error_vldb;
+ }
+
+ write_lock(&volume->servers_lock);
+
+ discard = new;
+ old = rcu_dereference_protected(volume->servers,
+ lockdep_is_held(&volume->servers_lock));
+ if (afs_annotate_server_list(new, old)) {
+ new->seq = volume->servers_seq + 1;
+ rcu_assign_pointer(volume->servers, new);
+ smp_wmb();
+ volume->servers_seq++;
+ discard = old;
+ }
+
+ volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
+ write_unlock(&volume->servers_lock);
+ ret = 0;
+
+ afs_put_serverlist(volume->cell->net, discard);
+error_vldb:
+ kfree(vldb);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Make sure the volume record is up to date.
+ */
+int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op)
+{
+ int ret, retries = 0;
+
+ _enter("");
+
+retry:
+ if (test_bit(AFS_VOLUME_WAIT, &volume->flags))
+ goto wait;
+ if (volume->update_at <= ktime_get_real_seconds() ||
+ test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags))
+ goto update;
+ _leave(" = 0");
+ return 0;
+
+update:
+ if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
+ clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+ ret = afs_update_volume_status(volume, op->key);
+ if (ret < 0)
+ set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+ clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
+ clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
+ wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+wait:
+ if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
+ _leave(" = 0 [no wait]");
+ return 0;
+ }
+
+ ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT,
+ (op->flags & AFS_OPERATION_UNINTR) ?
+ TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
+ if (ret == -ERESTARTSYS) {
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+ retries++;
+ if (retries == 4) {
+ _leave(" = -ESTALE");
+ return -ESTALE;
+ }
+ goto retry;
+}
diff --git a/fs/afs/write.c b/fs/afs/write.c
new file mode 100644
index 000000000..3ecc212b6
--- /dev/null
+++ b/fs/afs/write.c
@@ -0,0 +1,1040 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* handling of writes to regular files and writing back to the server
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/backing-dev.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include <linux/netfs.h>
+#include "internal.h"
+
+static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len,
+ loff_t i_size, bool caching);
+
+#ifdef CONFIG_AFS_FSCACHE
+/*
+ * Mark a page as having been made dirty and thus needing writeback. We also
+ * need to pin the cache object to write back to.
+ */
+bool afs_dirty_folio(struct address_space *mapping, struct folio *folio)
+{
+ return fscache_dirty_folio(mapping, folio,
+ afs_vnode_cache(AFS_FS_I(mapping->host)));
+}
+static void afs_folio_start_fscache(bool caching, struct folio *folio)
+{
+ if (caching)
+ folio_start_fscache(folio);
+}
+#else
+static void afs_folio_start_fscache(bool caching, struct folio *folio)
+{
+}
+#endif
+
+/*
+ * prepare to perform part of a write to a page
+ */
+int afs_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct page **_page, void **fsdata)
+{
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+ struct folio *folio;
+ unsigned long priv;
+ unsigned f, from;
+ unsigned t, to;
+ pgoff_t index;
+ int ret;
+
+ _enter("{%llx:%llu},%llx,%x",
+ vnode->fid.vid, vnode->fid.vnode, pos, len);
+
+ /* Prefetch area to be written into the cache if we're caching this
+ * file. We need to do this before we get a lock on the page in case
+ * there's more than one writer competing for the same cache block.
+ */
+ ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata);
+ if (ret < 0)
+ return ret;
+
+ index = folio_index(folio);
+ from = pos - index * PAGE_SIZE;
+ to = from + len;
+
+try_again:
+ /* See if this page is already partially written in a way that we can
+ * merge the new write with.
+ */
+ if (folio_test_private(folio)) {
+ priv = (unsigned long)folio_get_private(folio);
+ f = afs_folio_dirty_from(folio, priv);
+ t = afs_folio_dirty_to(folio, priv);
+ ASSERTCMP(f, <=, t);
+
+ if (folio_test_writeback(folio)) {
+ trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
+ goto flush_conflicting_write;
+ }
+ /* If the file is being filled locally, allow inter-write
+ * spaces to be merged into writes. If it's not, only write
+ * back what the user gives us.
+ */
+ if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
+ (to < f || from > t))
+ goto flush_conflicting_write;
+ }
+
+ *_page = folio_file_page(folio, pos / PAGE_SIZE);
+ _leave(" = 0");
+ return 0;
+
+ /* The previous write and this write aren't adjacent or overlapping, so
+ * flush the page out.
+ */
+flush_conflicting_write:
+ _debug("flush conflict");
+ ret = folio_write_one(folio);
+ if (ret < 0)
+ goto error;
+
+ ret = folio_lock_killable(folio);
+ if (ret < 0)
+ goto error;
+ goto try_again;
+
+error:
+ folio_put(folio);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * finalise part of a write to a page
+ */
+int afs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *subpage, void *fsdata)
+{
+ struct folio *folio = page_folio(subpage);
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+ unsigned long priv;
+ unsigned int f, from = offset_in_folio(folio, pos);
+ unsigned int t, to = from + copied;
+ loff_t i_size, write_end_pos;
+
+ _enter("{%llx:%llu},{%lx}",
+ vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
+
+ if (!folio_test_uptodate(folio)) {
+ if (copied < len) {
+ copied = 0;
+ goto out;
+ }
+
+ folio_mark_uptodate(folio);
+ }
+
+ if (copied == 0)
+ goto out;
+
+ write_end_pos = pos + copied;
+
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (write_end_pos > i_size) {
+ write_seqlock(&vnode->cb_lock);
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (write_end_pos > i_size)
+ afs_set_i_size(vnode, write_end_pos);
+ write_sequnlock(&vnode->cb_lock);
+ fscache_update_cookie(afs_vnode_cache(vnode), NULL, &write_end_pos);
+ }
+
+ if (folio_test_private(folio)) {
+ priv = (unsigned long)folio_get_private(folio);
+ f = afs_folio_dirty_from(folio, priv);
+ t = afs_folio_dirty_to(folio, priv);
+ if (from < f)
+ f = from;
+ if (to > t)
+ t = to;
+ priv = afs_folio_dirty(folio, f, t);
+ folio_change_private(folio, (void *)priv);
+ trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio);
+ } else {
+ priv = afs_folio_dirty(folio, from, to);
+ folio_attach_private(folio, (void *)priv);
+ trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio);
+ }
+
+ if (folio_mark_dirty(folio))
+ _debug("dirtied %lx", folio_index(folio));
+
+out:
+ folio_unlock(folio);
+ folio_put(folio);
+ return copied;
+}
+
+/*
+ * kill all the pages in the given range
+ */
+static void afs_kill_pages(struct address_space *mapping,
+ loff_t start, loff_t len)
+{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ struct folio *folio;
+ pgoff_t index = start / PAGE_SIZE;
+ pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
+
+ _enter("{%llx:%llu},%llx @%llx",
+ vnode->fid.vid, vnode->fid.vnode, len, start);
+
+ do {
+ _debug("kill %lx (to %lx)", index, last);
+
+ folio = filemap_get_folio(mapping, index);
+ if (!folio) {
+ next = index + 1;
+ continue;
+ }
+
+ next = folio_next_index(folio);
+
+ folio_clear_uptodate(folio);
+ folio_end_writeback(folio);
+ folio_lock(folio);
+ generic_error_remove_page(mapping, &folio->page);
+ folio_unlock(folio);
+ folio_put(folio);
+
+ } while (index = next, index <= last);
+
+ _leave("");
+}
+
+/*
+ * Redirty all the pages in a given range.
+ */
+static void afs_redirty_pages(struct writeback_control *wbc,
+ struct address_space *mapping,
+ loff_t start, loff_t len)
+{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ struct folio *folio;
+ pgoff_t index = start / PAGE_SIZE;
+ pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
+
+ _enter("{%llx:%llu},%llx @%llx",
+ vnode->fid.vid, vnode->fid.vnode, len, start);
+
+ do {
+ _debug("redirty %llx @%llx", len, start);
+
+ folio = filemap_get_folio(mapping, index);
+ if (!folio) {
+ next = index + 1;
+ continue;
+ }
+
+ next = index + folio_nr_pages(folio);
+ folio_redirty_for_writepage(wbc, folio);
+ folio_end_writeback(folio);
+ folio_put(folio);
+ } while (index = next, index <= last);
+
+ _leave("");
+}
+
+/*
+ * completion of write to server
+ */
+static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
+{
+ struct address_space *mapping = vnode->netfs.inode.i_mapping;
+ struct folio *folio;
+ pgoff_t end;
+
+ XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
+
+ _enter("{%llx:%llu},{%x @%llx}",
+ vnode->fid.vid, vnode->fid.vnode, len, start);
+
+ rcu_read_lock();
+
+ end = (start + len - 1) / PAGE_SIZE;
+ xas_for_each(&xas, folio, end) {
+ if (!folio_test_writeback(folio)) {
+ kdebug("bad %x @%llx page %lx %lx",
+ len, start, folio_index(folio), end);
+ ASSERT(folio_test_writeback(folio));
+ }
+
+ trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio);
+ folio_detach_private(folio);
+ folio_end_writeback(folio);
+ }
+
+ rcu_read_unlock();
+
+ afs_prune_wb_keys(vnode);
+ _leave("");
+}
+
+/*
+ * Find a key to use for the writeback. We cached the keys used to author the
+ * writes on the vnode. *_wbk will contain the last writeback key used or NULL
+ * and we need to start from there if it's set.
+ */
+static int afs_get_writeback_key(struct afs_vnode *vnode,
+ struct afs_wb_key **_wbk)
+{
+ struct afs_wb_key *wbk = NULL;
+ struct list_head *p;
+ int ret = -ENOKEY, ret2;
+
+ spin_lock(&vnode->wb_lock);
+ if (*_wbk)
+ p = (*_wbk)->vnode_link.next;
+ else
+ p = vnode->wb_keys.next;
+
+ while (p != &vnode->wb_keys) {
+ wbk = list_entry(p, struct afs_wb_key, vnode_link);
+ _debug("wbk %u", key_serial(wbk->key));
+ ret2 = key_validate(wbk->key);
+ if (ret2 == 0) {
+ refcount_inc(&wbk->usage);
+ _debug("USE WB KEY %u", key_serial(wbk->key));
+ break;
+ }
+
+ wbk = NULL;
+ if (ret == -ENOKEY)
+ ret = ret2;
+ p = p->next;
+ }
+
+ spin_unlock(&vnode->wb_lock);
+ if (*_wbk)
+ afs_put_wb_key(*_wbk);
+ *_wbk = wbk;
+ return 0;
+}
+
+static void afs_store_data_success(struct afs_operation *op)
+{
+ struct afs_vnode *vnode = op->file[0].vnode;
+
+ op->ctime = op->file[0].scb.status.mtime_client;
+ afs_vnode_commit_status(op, &op->file[0]);
+ if (op->error == 0) {
+ if (!op->store.laundering)
+ afs_pages_written_back(vnode, op->store.pos, op->store.size);
+ afs_stat_v(vnode, n_stores);
+ atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
+ }
+}
+
+static const struct afs_operation_ops afs_store_data_operation = {
+ .issue_afs_rpc = afs_fs_store_data,
+ .issue_yfs_rpc = yfs_fs_store_data,
+ .success = afs_store_data_success,
+};
+
+/*
+ * write to a file
+ */
+static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
+ bool laundering)
+{
+ struct afs_operation *op;
+ struct afs_wb_key *wbk = NULL;
+ loff_t size = iov_iter_count(iter);
+ int ret = -ENOKEY;
+
+ _enter("%s{%llx:%llu.%u},%llx,%llx",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ size, pos);
+
+ ret = afs_get_writeback_key(vnode, &wbk);
+ if (ret) {
+ _leave(" = %d [no keys]", ret);
+ return ret;
+ }
+
+ op = afs_alloc_operation(wbk->key, vnode->volume);
+ if (IS_ERR(op)) {
+ afs_put_wb_key(wbk);
+ return -ENOMEM;
+ }
+
+ afs_op_set_vnode(op, 0, vnode);
+ op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
+ op->store.pos = pos;
+ op->store.size = size;
+ op->store.laundering = laundering;
+ op->flags |= AFS_OPERATION_UNINTR;
+ op->ops = &afs_store_data_operation;
+
+try_next_key:
+ afs_begin_vnode_operation(op);
+
+ op->store.write_iter = iter;
+ op->store.i_size = max(pos + size, vnode->netfs.remote_i_size);
+ op->mtime = vnode->netfs.inode.i_mtime;
+
+ afs_wait_for_operation(op);
+
+ switch (op->error) {
+ case -EACCES:
+ case -EPERM:
+ case -ENOKEY:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EKEYREVOKED:
+ _debug("next");
+
+ ret = afs_get_writeback_key(vnode, &wbk);
+ if (ret == 0) {
+ key_put(op->key);
+ op->key = key_get(wbk->key);
+ goto try_next_key;
+ }
+ break;
+ }
+
+ afs_put_wb_key(wbk);
+ _leave(" = %d", op->error);
+ return afs_put_operation(op);
+}
+
+/*
+ * Extend the region to be written back to include subsequent contiguously
+ * dirty pages if possible, but don't sleep while doing so.
+ *
+ * If this page holds new content, then we can include filler zeros in the
+ * writeback.
+ */
+static void afs_extend_writeback(struct address_space *mapping,
+ struct afs_vnode *vnode,
+ long *_count,
+ loff_t start,
+ loff_t max_len,
+ bool new_content,
+ bool caching,
+ unsigned int *_len)
+{
+ struct pagevec pvec;
+ struct folio *folio;
+ unsigned long priv;
+ unsigned int psize, filler = 0;
+ unsigned int f, t;
+ loff_t len = *_len;
+ pgoff_t index = (start + len) / PAGE_SIZE;
+ bool stop = true;
+ unsigned int i;
+
+ XA_STATE(xas, &mapping->i_pages, index);
+ pagevec_init(&pvec);
+
+ do {
+ /* Firstly, we gather up a batch of contiguous dirty pages
+ * under the RCU read lock - but we can't clear the dirty flags
+ * there if any of those pages are mapped.
+ */
+ rcu_read_lock();
+
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ stop = true;
+ if (xas_retry(&xas, folio))
+ continue;
+ if (xa_is_value(folio))
+ break;
+ if (folio_index(folio) != index)
+ break;
+
+ if (!folio_try_get_rcu(folio)) {
+ xas_reset(&xas);
+ continue;
+ }
+
+ /* Has the page moved or been split? */
+ if (unlikely(folio != xas_reload(&xas))) {
+ folio_put(folio);
+ break;
+ }
+
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
+ break;
+ }
+ if (!folio_test_dirty(folio) ||
+ folio_test_writeback(folio) ||
+ folio_test_fscache(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ break;
+ }
+
+ psize = folio_size(folio);
+ priv = (unsigned long)folio_get_private(folio);
+ f = afs_folio_dirty_from(folio, priv);
+ t = afs_folio_dirty_to(folio, priv);
+ if (f != 0 && !new_content) {
+ folio_unlock(folio);
+ folio_put(folio);
+ break;
+ }
+
+ len += filler + t;
+ filler = psize - t;
+ if (len >= max_len || *_count <= 0)
+ stop = true;
+ else if (t == psize || new_content)
+ stop = false;
+
+ index += folio_nr_pages(folio);
+ if (!pagevec_add(&pvec, &folio->page))
+ break;
+ if (stop)
+ break;
+ }
+
+ if (!stop)
+ xas_pause(&xas);
+ rcu_read_unlock();
+
+ /* Now, if we obtained any pages, we can shift them to being
+ * writable and mark them for caching.
+ */
+ if (!pagevec_count(&pvec))
+ break;
+
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ folio = page_folio(pvec.pages[i]);
+ trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
+
+ if (!folio_clear_dirty_for_io(folio))
+ BUG();
+ if (folio_start_writeback(folio))
+ BUG();
+ afs_folio_start_fscache(caching, folio);
+
+ *_count -= folio_nr_pages(folio);
+ folio_unlock(folio);
+ }
+
+ pagevec_release(&pvec);
+ cond_resched();
+ } while (!stop);
+
+ *_len = len;
+}
+
+/*
+ * Synchronously write back the locked page and any subsequent non-locked dirty
+ * pages.
+ */
+static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct folio *folio,
+ loff_t start, loff_t end)
+{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ struct iov_iter iter;
+ unsigned long priv;
+ unsigned int offset, to, len, max_len;
+ loff_t i_size = i_size_read(&vnode->netfs.inode);
+ bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+ bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode));
+ long count = wbc->nr_to_write;
+ int ret;
+
+ _enter(",%lx,%llx-%llx", folio_index(folio), start, end);
+
+ if (folio_start_writeback(folio))
+ BUG();
+ afs_folio_start_fscache(caching, folio);
+
+ count -= folio_nr_pages(folio);
+
+ /* Find all consecutive lockable dirty pages that have contiguous
+ * written regions, stopping when we find a page that is not
+ * immediately lockable, is not dirty or is missing, or we reach the
+ * end of the range.
+ */
+ priv = (unsigned long)folio_get_private(folio);
+ offset = afs_folio_dirty_from(folio, priv);
+ to = afs_folio_dirty_to(folio, priv);
+ trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio);
+
+ len = to - offset;
+ start += offset;
+ if (start < i_size) {
+ /* Trim the write to the EOF; the extra data is ignored. Also
+ * put an upper limit on the size of a single storedata op.
+ */
+ max_len = 65536 * 4096;
+ max_len = min_t(unsigned long long, max_len, end - start + 1);
+ max_len = min_t(unsigned long long, max_len, i_size - start);
+
+ if (len < max_len &&
+ (to == folio_size(folio) || new_content))
+ afs_extend_writeback(mapping, vnode, &count,
+ start, max_len, new_content,
+ caching, &len);
+ len = min_t(loff_t, len, max_len);
+ }
+
+ /* We now have a contiguous set of dirty pages, each with writeback
+ * set; the first page is still locked at this point, but all the rest
+ * have been unlocked.
+ */
+ folio_unlock(folio);
+
+ if (start < i_size) {
+ _debug("write back %x @%llx [%llx]", len, start, i_size);
+
+ /* Speculatively write to the cache. We have to fix this up
+ * later if the store fails.
+ */
+ afs_write_to_cache(vnode, start, len, i_size, caching);
+
+ iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len);
+ ret = afs_store_data(vnode, &iter, start, false);
+ } else {
+ _debug("write discard %x @%llx [%llx]", len, start, i_size);
+
+ /* The dirty region was entirely beyond the EOF. */
+ fscache_clear_page_bits(mapping, start, len, caching);
+ afs_pages_written_back(vnode, start, len);
+ ret = 0;
+ }
+
+ switch (ret) {
+ case 0:
+ wbc->nr_to_write = count;
+ ret = len;
+ break;
+
+ default:
+ pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
+ fallthrough;
+ case -EACCES:
+ case -EPERM:
+ case -ENOKEY:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EKEYREVOKED:
+ case -ENETRESET:
+ afs_redirty_pages(wbc, mapping, start, len);
+ mapping_set_error(mapping, ret);
+ break;
+
+ case -EDQUOT:
+ case -ENOSPC:
+ afs_redirty_pages(wbc, mapping, start, len);
+ mapping_set_error(mapping, -ENOSPC);
+ break;
+
+ case -EROFS:
+ case -EIO:
+ case -EREMOTEIO:
+ case -EFBIG:
+ case -ENOENT:
+ case -ENOMEDIUM:
+ case -ENXIO:
+ trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
+ afs_kill_pages(mapping, start, len);
+ mapping_set_error(mapping, ret);
+ break;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * write a page back to the server
+ * - the caller locked the page for us
+ */
+int afs_writepage(struct page *subpage, struct writeback_control *wbc)
+{
+ struct folio *folio = page_folio(subpage);
+ ssize_t ret;
+ loff_t start;
+
+ _enter("{%lx},", folio_index(folio));
+
+#ifdef CONFIG_AFS_FSCACHE
+ folio_wait_fscache(folio);
+#endif
+
+ start = folio_index(folio) * PAGE_SIZE;
+ ret = afs_write_back_from_locked_folio(folio_mapping(folio), wbc,
+ folio, start, LLONG_MAX - start);
+ if (ret < 0) {
+ _leave(" = %zd", ret);
+ return ret;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * write a region of pages back to the server
+ */
+static int afs_writepages_region(struct address_space *mapping,
+ struct writeback_control *wbc,
+ loff_t start, loff_t end, loff_t *_next)
+{
+ struct folio *folio;
+ struct page *head_page;
+ ssize_t ret;
+ int n, skips = 0;
+
+ _enter("%llx,%llx,", start, end);
+
+ do {
+ pgoff_t index = start / PAGE_SIZE;
+
+ n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
+ PAGECACHE_TAG_DIRTY, 1, &head_page);
+ if (!n)
+ break;
+
+ folio = page_folio(head_page);
+ start = folio_pos(folio); /* May regress with THPs */
+
+ _debug("wback %lx", folio_index(folio));
+
+ /* At this point we hold neither the i_pages lock nor the
+ * page lock: the page may be truncated or invalidated
+ * (changing page->mapping to NULL), or even swizzled
+ * back from swapper_space to tmpfs file mapping
+ */
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ ret = folio_lock_killable(folio);
+ if (ret < 0) {
+ folio_put(folio);
+ return ret;
+ }
+ } else {
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
+ return 0;
+ }
+ }
+
+ if (folio_mapping(folio) != mapping ||
+ !folio_test_dirty(folio)) {
+ start += folio_size(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+ continue;
+ }
+
+ if (folio_test_writeback(folio) ||
+ folio_test_fscache(folio)) {
+ folio_unlock(folio);
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ folio_wait_writeback(folio);
+#ifdef CONFIG_AFS_FSCACHE
+ folio_wait_fscache(folio);
+#endif
+ } else {
+ start += folio_size(folio);
+ }
+ folio_put(folio);
+ if (wbc->sync_mode == WB_SYNC_NONE) {
+ if (skips >= 5 || need_resched())
+ break;
+ skips++;
+ }
+ continue;
+ }
+
+ if (!folio_clear_dirty_for_io(folio))
+ BUG();
+ ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
+ folio_put(folio);
+ if (ret < 0) {
+ _leave(" = %zd", ret);
+ return ret;
+ }
+
+ start += ret;
+
+ cond_resched();
+ } while (wbc->nr_to_write > 0);
+
+ *_next = start;
+ _leave(" = 0 [%llx]", *_next);
+ return 0;
+}
+
+/*
+ * write some of the pending data back to the server
+ */
+int afs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ loff_t start, next;
+ int ret;
+
+ _enter("");
+
+ /* We have to be careful as we can end up racing with setattr()
+ * truncating the pagecache since the caller doesn't take a lock here
+ * to prevent it.
+ */
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ down_read(&vnode->validate_lock);
+ else if (!down_read_trylock(&vnode->validate_lock))
+ return 0;
+
+ if (wbc->range_cyclic) {
+ start = mapping->writeback_index * PAGE_SIZE;
+ ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
+ if (ret == 0) {
+ mapping->writeback_index = next / PAGE_SIZE;
+ if (start > 0 && wbc->nr_to_write > 0) {
+ ret = afs_writepages_region(mapping, wbc, 0,
+ start, &next);
+ if (ret == 0)
+ mapping->writeback_index =
+ next / PAGE_SIZE;
+ }
+ }
+ } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
+ ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
+ if (wbc->nr_to_write > 0 && ret == 0)
+ mapping->writeback_index = next / PAGE_SIZE;
+ } else {
+ ret = afs_writepages_region(mapping, wbc,
+ wbc->range_start, wbc->range_end, &next);
+ }
+
+ up_read(&vnode->validate_lock);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * write to an AFS file
+ */
+ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
+ struct afs_file *af = iocb->ki_filp->private_data;
+ ssize_t result;
+ size_t count = iov_iter_count(from);
+
+ _enter("{%llx:%llu},{%zu},",
+ vnode->fid.vid, vnode->fid.vnode, count);
+
+ if (IS_SWAPFILE(&vnode->netfs.inode)) {
+ printk(KERN_INFO
+ "AFS: Attempt to write to active swap file!\n");
+ return -EBUSY;
+ }
+
+ if (!count)
+ return 0;
+
+ result = afs_validate(vnode, af->key);
+ if (result < 0)
+ return result;
+
+ result = generic_file_write_iter(iocb, from);
+
+ _leave(" = %zd", result);
+ return result;
+}
+
+/*
+ * flush any dirty pages for this process, and check for write errors.
+ * - the return status from this call provides a reliable indication of
+ * whether any write errors occurred for this process.
+ */
+int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+ struct afs_file *af = file->private_data;
+ int ret;
+
+ _enter("{%llx:%llu},{n=%pD},%d",
+ vnode->fid.vid, vnode->fid.vnode, file,
+ datasync);
+
+ ret = afs_validate(vnode, af->key);
+ if (ret < 0)
+ return ret;
+
+ return file_write_and_wait_range(file, start, end);
+}
+
+/*
+ * notification that a previously read-only page is about to become writable
+ * - if it returns an error, the caller will deliver a bus error signal
+ */
+vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
+{
+ struct folio *folio = page_folio(vmf->page);
+ struct file *file = vmf->vma->vm_file;
+ struct inode *inode = file_inode(file);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_file *af = file->private_data;
+ unsigned long priv;
+ vm_fault_t ret = VM_FAULT_RETRY;
+
+ _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
+
+ afs_validate(vnode, af->key);
+
+ sb_start_pagefault(inode->i_sb);
+
+ /* Wait for the page to be written to the cache before we allow it to
+ * be modified. We then assume the entire page will need writing back.
+ */
+#ifdef CONFIG_AFS_FSCACHE
+ if (folio_test_fscache(folio) &&
+ folio_wait_fscache_killable(folio) < 0)
+ goto out;
+#endif
+
+ if (folio_wait_writeback_killable(folio))
+ goto out;
+
+ if (folio_lock_killable(folio) < 0)
+ goto out;
+
+ /* We mustn't change folio->private until writeback is complete as that
+ * details the portion of the page we need to write back and we might
+ * need to redirty the page if there's a problem.
+ */
+ if (folio_wait_writeback_killable(folio) < 0) {
+ folio_unlock(folio);
+ goto out;
+ }
+
+ priv = afs_folio_dirty(folio, 0, folio_size(folio));
+ priv = afs_folio_dirty_mmapped(priv);
+ if (folio_test_private(folio)) {
+ folio_change_private(folio, (void *)priv);
+ trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio);
+ } else {
+ folio_attach_private(folio, (void *)priv);
+ trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio);
+ }
+ file_update_time(file);
+
+ ret = VM_FAULT_LOCKED;
+out:
+ sb_end_pagefault(inode->i_sb);
+ return ret;
+}
+
+/*
+ * Prune the keys cached for writeback. The caller must hold vnode->wb_lock.
+ */
+void afs_prune_wb_keys(struct afs_vnode *vnode)
+{
+ LIST_HEAD(graveyard);
+ struct afs_wb_key *wbk, *tmp;
+
+ /* Discard unused keys */
+ spin_lock(&vnode->wb_lock);
+
+ if (!mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
+ !mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_DIRTY)) {
+ list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
+ if (refcount_read(&wbk->usage) == 1)
+ list_move(&wbk->vnode_link, &graveyard);
+ }
+ }
+
+ spin_unlock(&vnode->wb_lock);
+
+ while (!list_empty(&graveyard)) {
+ wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link);
+ list_del(&wbk->vnode_link);
+ afs_put_wb_key(wbk);
+ }
+}
+
+/*
+ * Clean up a page during invalidation.
+ */
+int afs_launder_folio(struct folio *folio)
+{
+ struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
+ struct iov_iter iter;
+ struct bio_vec bv[1];
+ unsigned long priv;
+ unsigned int f, t;
+ int ret = 0;
+
+ _enter("{%lx}", folio->index);
+
+ priv = (unsigned long)folio_get_private(folio);
+ if (folio_clear_dirty_for_io(folio)) {
+ f = 0;
+ t = folio_size(folio);
+ if (folio_test_private(folio)) {
+ f = afs_folio_dirty_from(folio, priv);
+ t = afs_folio_dirty_to(folio, priv);
+ }
+
+ bv[0].bv_page = &folio->page;
+ bv[0].bv_offset = f;
+ bv[0].bv_len = t - f;
+ iov_iter_bvec(&iter, ITER_SOURCE, bv, 1, bv[0].bv_len);
+
+ trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
+ ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
+ }
+
+ trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio);
+ folio_detach_private(folio);
+ folio_wait_fscache(folio);
+ return ret;
+}
+
+/*
+ * Deal with the completion of writing the data to the cache.
+ */
+static void afs_write_to_cache_done(void *priv, ssize_t transferred_or_error,
+ bool was_async)
+{
+ struct afs_vnode *vnode = priv;
+
+ if (IS_ERR_VALUE(transferred_or_error) &&
+ transferred_or_error != -ENOBUFS)
+ afs_invalidate_cache(vnode, 0);
+}
+
+/*
+ * Save the write to the cache also.
+ */
+static void afs_write_to_cache(struct afs_vnode *vnode,
+ loff_t start, size_t len, loff_t i_size,
+ bool caching)
+{
+ fscache_write_to_cache(afs_vnode_cache(vnode),
+ vnode->netfs.inode.i_mapping, start, len, i_size,
+ afs_write_to_cache_done, vnode, caching);
+}
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
new file mode 100644
index 000000000..7751b0b3f
--- /dev/null
+++ b/fs/afs/xattr.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Extended attribute handling for AFS. We use xattrs to get and set metadata
+ * instead of providing pioctl().
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/xattr.h>
+#include "internal.h"
+
+/*
+ * Deal with the result of a successful fetch ACL operation.
+ */
+static void afs_acl_success(struct afs_operation *op)
+{
+ afs_vnode_commit_status(op, &op->file[0]);
+}
+
+static void afs_acl_put(struct afs_operation *op)
+{
+ kfree(op->acl);
+}
+
+static const struct afs_operation_ops afs_fetch_acl_operation = {
+ .issue_afs_rpc = afs_fs_fetch_acl,
+ .success = afs_acl_success,
+ .put = afs_acl_put,
+};
+
+/*
+ * Get a file's ACL.
+ */
+static int afs_xattr_get_acl(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ struct afs_operation *op;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_acl *acl = NULL;
+ int ret;
+
+ op = afs_alloc_operation(NULL, vnode->volume);
+ if (IS_ERR(op))
+ return -ENOMEM;
+
+ afs_op_set_vnode(op, 0, vnode);
+ op->ops = &afs_fetch_acl_operation;
+
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+ acl = op->acl;
+ op->acl = NULL;
+ ret = afs_put_operation(op);
+
+ if (ret == 0) {
+ ret = acl->size;
+ if (size > 0) {
+ if (acl->size <= size)
+ memcpy(buffer, acl->data, acl->size);
+ else
+ ret = -ERANGE;
+ }
+ }
+
+ kfree(acl);
+ return ret;
+}
+
+static bool afs_make_acl(struct afs_operation *op,
+ const void *buffer, size_t size)
+{
+ struct afs_acl *acl;
+
+ acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL);
+ if (!acl) {
+ afs_op_nomem(op);
+ return false;
+ }
+
+ acl->size = size;
+ memcpy(acl->data, buffer, size);
+ op->acl = acl;
+ return true;
+}
+
+static const struct afs_operation_ops afs_store_acl_operation = {
+ .issue_afs_rpc = afs_fs_store_acl,
+ .success = afs_acl_success,
+ .put = afs_acl_put,
+};
+
+/*
+ * Set a file's AFS3 ACL.
+ */
+static int afs_xattr_set_acl(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ const void *buffer, size_t size, int flags)
+{
+ struct afs_operation *op;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+
+ if (flags == XATTR_CREATE)
+ return -EINVAL;
+
+ op = afs_alloc_operation(NULL, vnode->volume);
+ if (IS_ERR(op))
+ return -ENOMEM;
+
+ afs_op_set_vnode(op, 0, vnode);
+ if (!afs_make_acl(op, buffer, size))
+ return afs_put_operation(op);
+
+ op->ops = &afs_store_acl_operation;
+ return afs_do_sync_operation(op);
+}
+
+static const struct xattr_handler afs_xattr_afs_acl_handler = {
+ .name = "afs.acl",
+ .get = afs_xattr_get_acl,
+ .set = afs_xattr_set_acl,
+};
+
+static const struct afs_operation_ops yfs_fetch_opaque_acl_operation = {
+ .issue_yfs_rpc = yfs_fs_fetch_opaque_acl,
+ .success = afs_acl_success,
+ /* Don't free op->yacl in .put here */
+};
+
+/*
+ * Get a file's YFS ACL.
+ */
+static int afs_xattr_get_yfs(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ struct afs_operation *op;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct yfs_acl *yacl = NULL;
+ char buf[16], *data;
+ int which = 0, dsize, ret = -ENOMEM;
+
+ if (strcmp(name, "acl") == 0)
+ which = 0;
+ else if (strcmp(name, "acl_inherited") == 0)
+ which = 1;
+ else if (strcmp(name, "acl_num_cleaned") == 0)
+ which = 2;
+ else if (strcmp(name, "vol_acl") == 0)
+ which = 3;
+ else
+ return -EOPNOTSUPP;
+
+ yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL);
+ if (!yacl)
+ goto error;
+
+ if (which == 0)
+ yacl->flags |= YFS_ACL_WANT_ACL;
+ else if (which == 3)
+ yacl->flags |= YFS_ACL_WANT_VOL_ACL;
+
+ op = afs_alloc_operation(NULL, vnode->volume);
+ if (IS_ERR(op))
+ goto error_yacl;
+
+ afs_op_set_vnode(op, 0, vnode);
+ op->yacl = yacl;
+ op->ops = &yfs_fetch_opaque_acl_operation;
+
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+ ret = afs_put_operation(op);
+
+ if (ret == 0) {
+ switch (which) {
+ case 0:
+ data = yacl->acl->data;
+ dsize = yacl->acl->size;
+ break;
+ case 1:
+ data = buf;
+ dsize = scnprintf(buf, sizeof(buf), "%u", yacl->inherit_flag);
+ break;
+ case 2:
+ data = buf;
+ dsize = scnprintf(buf, sizeof(buf), "%u", yacl->num_cleaned);
+ break;
+ case 3:
+ data = yacl->vol_acl->data;
+ dsize = yacl->vol_acl->size;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ goto error_yacl;
+ }
+
+ ret = dsize;
+ if (size > 0) {
+ if (dsize <= size)
+ memcpy(buffer, data, dsize);
+ else
+ ret = -ERANGE;
+ }
+ } else if (ret == -ENOTSUPP) {
+ ret = -ENODATA;
+ }
+
+error_yacl:
+ yfs_free_opaque_acl(yacl);
+error:
+ return ret;
+}
+
+static const struct afs_operation_ops yfs_store_opaque_acl2_operation = {
+ .issue_yfs_rpc = yfs_fs_store_opaque_acl2,
+ .success = afs_acl_success,
+ .put = afs_acl_put,
+};
+
+/*
+ * Set a file's YFS ACL.
+ */
+static int afs_xattr_set_yfs(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ const void *buffer, size_t size, int flags)
+{
+ struct afs_operation *op;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ int ret;
+
+ if (flags == XATTR_CREATE ||
+ strcmp(name, "acl") != 0)
+ return -EINVAL;
+
+ op = afs_alloc_operation(NULL, vnode->volume);
+ if (IS_ERR(op))
+ return -ENOMEM;
+
+ afs_op_set_vnode(op, 0, vnode);
+ if (!afs_make_acl(op, buffer, size))
+ return afs_put_operation(op);
+
+ op->ops = &yfs_store_opaque_acl2_operation;
+ ret = afs_do_sync_operation(op);
+ if (ret == -ENOTSUPP)
+ ret = -ENODATA;
+ return ret;
+}
+
+static const struct xattr_handler afs_xattr_yfs_handler = {
+ .prefix = "afs.yfs.",
+ .get = afs_xattr_get_yfs,
+ .set = afs_xattr_set_yfs,
+};
+
+/*
+ * Get the name of the cell on which a file resides.
+ */
+static int afs_xattr_get_cell(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_cell *cell = vnode->volume->cell;
+ size_t namelen;
+
+ namelen = cell->name_len;
+ if (size == 0)
+ return namelen;
+ if (namelen > size)
+ return -ERANGE;
+ memcpy(buffer, cell->name, namelen);
+ return namelen;
+}
+
+static const struct xattr_handler afs_xattr_afs_cell_handler = {
+ .name = "afs.cell",
+ .get = afs_xattr_get_cell,
+};
+
+/*
+ * Get the volume ID, vnode ID and vnode uniquifier of a file as a sequence of
+ * hex numbers separated by colons.
+ */
+static int afs_xattr_get_fid(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ char text[16 + 1 + 24 + 1 + 8 + 1];
+ size_t len;
+
+ /* The volume ID is 64-bit, the vnode ID is 96-bit and the
+ * uniquifier is 32-bit.
+ */
+ len = scnprintf(text, sizeof(text), "%llx:", vnode->fid.vid);
+ if (vnode->fid.vnode_hi)
+ len += scnprintf(text + len, sizeof(text) - len, "%x%016llx",
+ vnode->fid.vnode_hi, vnode->fid.vnode);
+ else
+ len += scnprintf(text + len, sizeof(text) - len, "%llx",
+ vnode->fid.vnode);
+ len += scnprintf(text + len, sizeof(text) - len, ":%x",
+ vnode->fid.unique);
+
+ if (size == 0)
+ return len;
+ if (len > size)
+ return -ERANGE;
+ memcpy(buffer, text, len);
+ return len;
+}
+
+static const struct xattr_handler afs_xattr_afs_fid_handler = {
+ .name = "afs.fid",
+ .get = afs_xattr_get_fid,
+};
+
+/*
+ * Get the name of the volume on which a file resides.
+ */
+static int afs_xattr_get_volume(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ const char *volname = vnode->volume->name;
+ size_t namelen;
+
+ namelen = strlen(volname);
+ if (size == 0)
+ return namelen;
+ if (namelen > size)
+ return -ERANGE;
+ memcpy(buffer, volname, namelen);
+ return namelen;
+}
+
+static const struct xattr_handler afs_xattr_afs_volume_handler = {
+ .name = "afs.volume",
+ .get = afs_xattr_get_volume,
+};
+
+const struct xattr_handler *afs_xattr_handlers[] = {
+ &afs_xattr_afs_acl_handler,
+ &afs_xattr_afs_cell_handler,
+ &afs_xattr_afs_fid_handler,
+ &afs_xattr_afs_volume_handler,
+ &afs_xattr_yfs_handler, /* afs.yfs. prefix */
+ NULL
+};
diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h
new file mode 100644
index 000000000..8ca868164
--- /dev/null
+++ b/fs/afs/xdr_fs.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AFS fileserver XDR types
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef XDR_FS_H
+#define XDR_FS_H
+
+struct afs_xdr_AFSFetchStatus {
+ __be32 if_version;
+#define AFS_FSTATUS_VERSION 1
+ __be32 type;
+ __be32 nlink;
+ __be32 size_lo;
+ __be32 data_version_lo;
+ __be32 author;
+ __be32 owner;
+ __be32 caller_access;
+ __be32 anon_access;
+ __be32 mode;
+ __be32 parent_vnode;
+ __be32 parent_unique;
+ __be32 seg_size;
+ __be32 mtime_client;
+ __be32 mtime_server;
+ __be32 group;
+ __be32 sync_counter;
+ __be32 data_version_hi;
+ __be32 lock_count;
+ __be32 size_hi;
+ __be32 abort_code;
+} __packed;
+
+#define AFS_DIR_HASHTBL_SIZE 128
+#define AFS_DIR_DIRENT_SIZE 32
+#define AFS_DIR_SLOTS_PER_BLOCK 64
+#define AFS_DIR_BLOCK_SIZE 2048
+#define AFS_DIR_BLOCKS_PER_PAGE (PAGE_SIZE / AFS_DIR_BLOCK_SIZE)
+#define AFS_DIR_MAX_SLOTS 65536
+#define AFS_DIR_BLOCKS_WITH_CTR 128
+#define AFS_DIR_MAX_BLOCKS 1023
+#define AFS_DIR_RESV_BLOCKS 1
+#define AFS_DIR_RESV_BLOCKS0 13
+
+/*
+ * Directory entry structure.
+ */
+union afs_xdr_dirent {
+ struct {
+ u8 valid;
+ u8 unused[1];
+ __be16 hash_next;
+ __be32 vnode;
+ __be32 unique;
+ u8 name[];
+ /* When determining the number of dirent slots needed to
+ * represent a directory entry, name should be assumed to be 16
+ * bytes, due to a now-standardised (mis)calculation, but it is
+ * in fact 20 bytes in size. afs_dir_calc_slots() should be
+ * used for this.
+ *
+ * For names longer than (16 or) 20 bytes, extra slots should
+ * be annexed to this one using the extended_name format.
+ */
+ } u;
+ u8 extended_name[32];
+} __packed;
+
+/*
+ * Directory block header (one at the beginning of every 2048-byte block).
+ */
+struct afs_xdr_dir_hdr {
+ __be16 npages;
+ __be16 magic;
+#define AFS_DIR_MAGIC htons(1234)
+ u8 reserved;
+ u8 bitmap[8];
+ u8 pad[19];
+} __packed;
+
+/*
+ * Directory block layout
+ */
+union afs_xdr_dir_block {
+ struct afs_xdr_dir_hdr hdr;
+
+ struct {
+ struct afs_xdr_dir_hdr hdr;
+ u8 alloc_ctrs[AFS_DIR_MAX_BLOCKS];
+ __be16 hashtable[AFS_DIR_HASHTBL_SIZE];
+ } meta;
+
+ union afs_xdr_dirent dirents[AFS_DIR_SLOTS_PER_BLOCK];
+} __packed;
+
+/*
+ * Directory layout on a linux VM page.
+ */
+struct afs_xdr_dir_page {
+ union afs_xdr_dir_block blocks[AFS_DIR_BLOCKS_PER_PAGE];
+};
+
+/*
+ * Calculate the number of dirent slots required for any given name length.
+ * The calculation is made assuming the part of the name in the first slot is
+ * 16 bytes, rather than 20, but this miscalculation is now standardised.
+ */
+static inline unsigned int afs_dir_calc_slots(size_t name_len)
+{
+ name_len++; /* NUL-terminated */
+ return 1 + ((name_len + 15) / AFS_DIR_DIRENT_SIZE);
+}
+
+#endif /* XDR_FS_H */
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
new file mode 100644
index 000000000..11571cca8
--- /dev/null
+++ b/fs/afs/yfsclient.c
@@ -0,0 +1,1953 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* YFS File Server client stubs
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/circ_buf.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+#include "protocol_yfs.h"
+
+#define xdr_size(x) (sizeof(*x) / sizeof(__be32))
+
+static void xdr_decode_YFSFid(const __be32 **_bp, struct afs_fid *fid)
+{
+ const struct yfs_xdr_YFSFid *x = (const void *)*_bp;
+
+ fid->vid = xdr_to_u64(x->volume);
+ fid->vnode = xdr_to_u64(x->vnode.lo);
+ fid->vnode_hi = ntohl(x->vnode.hi);
+ fid->unique = ntohl(x->vnode.unique);
+ *_bp += xdr_size(x);
+}
+
+static __be32 *xdr_encode_u32(__be32 *bp, u32 n)
+{
+ *bp++ = htonl(n);
+ return bp;
+}
+
+static __be32 *xdr_encode_u64(__be32 *bp, u64 n)
+{
+ struct yfs_xdr_u64 *x = (void *)bp;
+
+ *x = u64_to_xdr(n);
+ return bp + xdr_size(x);
+}
+
+static __be32 *xdr_encode_YFSFid(__be32 *bp, struct afs_fid *fid)
+{
+ struct yfs_xdr_YFSFid *x = (void *)bp;
+
+ x->volume = u64_to_xdr(fid->vid);
+ x->vnode.lo = u64_to_xdr(fid->vnode);
+ x->vnode.hi = htonl(fid->vnode_hi);
+ x->vnode.unique = htonl(fid->unique);
+ return bp + xdr_size(x);
+}
+
+static size_t xdr_strlen(unsigned int len)
+{
+ return sizeof(__be32) + round_up(len, sizeof(__be32));
+}
+
+static __be32 *xdr_encode_string(__be32 *bp, const char *p, unsigned int len)
+{
+ bp = xdr_encode_u32(bp, len);
+ bp = memcpy(bp, p, len);
+ if (len & 3) {
+ unsigned int pad = 4 - (len & 3);
+
+ memset((u8 *)bp + len, 0, pad);
+ len += pad;
+ }
+
+ return bp + len / sizeof(__be32);
+}
+
+static __be32 *xdr_encode_name(__be32 *bp, const struct qstr *p)
+{
+ return xdr_encode_string(bp, p->name, p->len);
+}
+
+static s64 linux_to_yfs_time(const struct timespec64 *t)
+{
+ /* Convert to 100ns intervals. */
+ return (u64)t->tv_sec * 10000000 + t->tv_nsec/100;
+}
+
+static __be32 *xdr_encode_YFSStoreStatus(__be32 *bp, mode_t *mode,
+ const struct timespec64 *t)
+{
+ struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+ mode_t masked_mode = mode ? *mode & S_IALLUGO : 0;
+ s64 mtime = linux_to_yfs_time(t);
+ u32 mask = AFS_SET_MTIME;
+
+ mask |= mode ? AFS_SET_MODE : 0;
+
+ x->mask = htonl(mask);
+ x->mode = htonl(masked_mode);
+ x->mtime_client = u64_to_xdr(mtime);
+ x->owner = u64_to_xdr(0);
+ x->group = u64_to_xdr(0);
+ return bp + xdr_size(x);
+}
+
+/*
+ * Convert a signed 100ns-resolution 64-bit time into a timespec.
+ */
+static struct timespec64 yfs_time_to_linux(s64 t)
+{
+ struct timespec64 ts;
+ u64 abs_t;
+
+ /*
+ * Unfortunately can not use normal 64 bit division on 32 bit arch, but
+ * the alternative, do_div, does not work with negative numbers so have
+ * to special case them
+ */
+ if (t < 0) {
+ abs_t = -t;
+ ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100);
+ ts.tv_nsec = -ts.tv_nsec;
+ ts.tv_sec = -abs_t;
+ } else {
+ abs_t = t;
+ ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100;
+ ts.tv_sec = abs_t;
+ }
+
+ return ts;
+}
+
+static struct timespec64 xdr_to_time(const struct yfs_xdr_u64 xdr)
+{
+ s64 t = xdr_to_u64(xdr);
+
+ return yfs_time_to_linux(t);
+}
+
+static void yfs_check_req(struct afs_call *call, __be32 *bp)
+{
+ size_t len = (void *)bp - call->request;
+
+ if (len > call->request_size)
+ pr_err("kAFS: %s: Request buffer overflow (%zu>%u)\n",
+ call->type->name, len, call->request_size);
+ else if (len < call->request_size)
+ pr_warn("kAFS: %s: Request buffer underflow (%zu<%u)\n",
+ call->type->name, len, call->request_size);
+}
+
+/*
+ * Dump a bad file status record.
+ */
+static void xdr_dump_bad(const __be32 *bp)
+{
+ __be32 x[4];
+ int i;
+
+ pr_notice("YFS XDR: Bad status record\n");
+ for (i = 0; i < 6 * 4 * 4; i += 16) {
+ memcpy(x, bp, 16);
+ bp += 4;
+ pr_notice("%03x: %08x %08x %08x %08x\n",
+ i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
+ }
+
+ memcpy(x, bp, 8);
+ pr_notice("0x60: %08x %08x\n", ntohl(x[0]), ntohl(x[1]));
+}
+
+/*
+ * Decode a YFSFetchStatus block
+ */
+static void xdr_decode_YFSFetchStatus(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
+{
+ const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
+ struct afs_file_status *status = &scb->status;
+ u32 type;
+
+ status->abort_code = ntohl(xdr->abort_code);
+ if (status->abort_code != 0) {
+ if (status->abort_code == VNOVNODE)
+ status->nlink = 0;
+ scb->have_error = true;
+ goto advance;
+ }
+
+ type = ntohl(xdr->type);
+ switch (type) {
+ case AFS_FTYPE_FILE:
+ case AFS_FTYPE_DIR:
+ case AFS_FTYPE_SYMLINK:
+ status->type = type;
+ break;
+ default:
+ goto bad;
+ }
+
+ status->nlink = ntohl(xdr->nlink);
+ status->author = xdr_to_u64(xdr->author);
+ status->owner = xdr_to_u64(xdr->owner);
+ status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */
+ status->anon_access = ntohl(xdr->anon_access);
+ status->mode = ntohl(xdr->mode) & S_IALLUGO;
+ status->group = xdr_to_u64(xdr->group);
+ status->lock_count = ntohl(xdr->lock_count);
+
+ status->mtime_client = xdr_to_time(xdr->mtime_client);
+ status->mtime_server = xdr_to_time(xdr->mtime_server);
+ status->size = xdr_to_u64(xdr->size);
+ status->data_version = xdr_to_u64(xdr->data_version);
+ scb->have_status = true;
+advance:
+ *_bp += xdr_size(xdr);
+ return;
+
+bad:
+ xdr_dump_bad(*_bp);
+ afs_protocol_error(call, afs_eproto_bad_status);
+ goto advance;
+}
+
+/*
+ * Decode a YFSCallBack block
+ */
+static void xdr_decode_YFSCallBack(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
+{
+ struct yfs_xdr_YFSCallBack *x = (void *)*_bp;
+ struct afs_callback *cb = &scb->callback;
+ ktime_t cb_expiry;
+
+ cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100);
+ cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC);
+ scb->have_cb = true;
+ *_bp += xdr_size(x);
+}
+
+/*
+ * Decode a YFSVolSync block
+ */
+static void xdr_decode_YFSVolSync(const __be32 **_bp,
+ struct afs_volsync *volsync)
+{
+ struct yfs_xdr_YFSVolSync *x = (void *)*_bp;
+ u64 creation;
+
+ if (volsync) {
+ creation = xdr_to_u64(x->vol_creation_date);
+ do_div(creation, 10 * 1000 * 1000);
+ volsync->creation = creation;
+ }
+
+ *_bp += xdr_size(x);
+}
+
+/*
+ * Encode the requested attributes into a YFSStoreStatus block
+ */
+static __be32 *xdr_encode_YFS_StoreStatus(__be32 *bp, struct iattr *attr)
+{
+ struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+ s64 mtime = 0, owner = 0, group = 0;
+ u32 mask = 0, mode = 0;
+
+ mask = 0;
+ if (attr->ia_valid & ATTR_MTIME) {
+ mask |= AFS_SET_MTIME;
+ mtime = linux_to_yfs_time(&attr->ia_mtime);
+ }
+
+ if (attr->ia_valid & ATTR_UID) {
+ mask |= AFS_SET_OWNER;
+ owner = from_kuid(&init_user_ns, attr->ia_uid);
+ }
+
+ if (attr->ia_valid & ATTR_GID) {
+ mask |= AFS_SET_GROUP;
+ group = from_kgid(&init_user_ns, attr->ia_gid);
+ }
+
+ if (attr->ia_valid & ATTR_MODE) {
+ mask |= AFS_SET_MODE;
+ mode = attr->ia_mode & S_IALLUGO;
+ }
+
+ x->mask = htonl(mask);
+ x->mode = htonl(mode);
+ x->mtime_client = u64_to_xdr(mtime);
+ x->owner = u64_to_xdr(owner);
+ x->group = u64_to_xdr(group);
+ return bp + xdr_size(x);
+}
+
+/*
+ * Decode a YFSFetchVolumeStatus block.
+ */
+static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp,
+ struct afs_volume_status *vs)
+{
+ const struct yfs_xdr_YFSFetchVolumeStatus *x = (const void *)*_bp;
+ u32 flags;
+
+ vs->vid = xdr_to_u64(x->vid);
+ vs->parent_id = xdr_to_u64(x->parent_id);
+ flags = ntohl(x->flags);
+ vs->online = flags & yfs_FVSOnline;
+ vs->in_service = flags & yfs_FVSInservice;
+ vs->blessed = flags & yfs_FVSBlessed;
+ vs->needs_salvage = flags & yfs_FVSNeedsSalvage;
+ vs->type = ntohl(x->type);
+ vs->min_quota = 0;
+ vs->max_quota = xdr_to_u64(x->max_quota);
+ vs->blocks_in_use = xdr_to_u64(x->blocks_in_use);
+ vs->part_blocks_avail = xdr_to_u64(x->part_blocks_avail);
+ vs->part_max_blocks = xdr_to_u64(x->part_max_blocks);
+ vs->vol_copy_date = xdr_to_u64(x->vol_copy_date);
+ vs->vol_backup_date = xdr_to_u64(x->vol_backup_date);
+ *_bp += sizeof(*x) / sizeof(__be32);
+}
+
+/*
+ * Deliver reply data to operations that just return a file status and a volume
+ * sync record.
+ */
+static int yfs_deliver_status_and_volsync(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_YFSFetchStatus(&bp, call, &op->file[0].scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * Deliver reply data to an YFS.FetchData64.
+ */
+static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_read *req = op->fetch.req;
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u,%zu, %zu/%llu}",
+ call->unmarshall, call->iov_len, iov_iter_count(call->iter),
+ req->actual_len);
+
+ switch (call->unmarshall) {
+ case 0:
+ req->actual_len = 0;
+ afs_extract_to_tmp64(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* Extract the returned data length into ->actual_len. This
+ * may indicate more or less data than was requested will be
+ * returned.
+ */
+ case 1:
+ _debug("extract data length");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ req->actual_len = be64_to_cpu(call->tmp64);
+ _debug("DATA length: %llu", req->actual_len);
+
+ if (req->actual_len == 0)
+ goto no_more_data;
+
+ call->iter = req->iter;
+ call->iov_len = min(req->actual_len, req->len);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the returned data */
+ case 2:
+ _debug("extract data %zu/%llu",
+ iov_iter_count(call->iter), req->actual_len);
+
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->iter = &call->def_iter;
+ if (req->actual_len <= req->len)
+ goto no_more_data;
+
+ /* Discard any excess data the server gave us */
+ afs_extract_discard(call, req->actual_len - req->len);
+ call->unmarshall = 3;
+ fallthrough;
+
+ case 3:
+ _debug("extract discard %zu/%llu",
+ iov_iter_count(call->iter), req->actual_len - req->len);
+
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ no_more_data:
+ call->unmarshall = 4;
+ afs_extract_to_buf(call,
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ fallthrough;
+
+ /* extract the metadata */
+ case 4:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_YFSCallBack(&bp, call, &vp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+ req->data_version = vp->scb.status.data_version;
+ req->file_size = vp->scb.status.size;
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 5:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.FetchData64 operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchData64 = {
+ .name = "YFS.FetchData64",
+ .op = yfs_FS_FetchData64,
+ .deliver = yfs_deliver_fs_fetch_data64,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch data from a file.
+ */
+void yfs_fs_fetch_data(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_read *req = op->fetch.req;
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},%llx,%llx",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode,
+ req->pos, req->len);
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchData64,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_u64) * 2,
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ req->call_debug_id = call->debug_id;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSFETCHDATA64);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ bp = xdr_encode_u64(bp, req->pos);
+ bp = xdr_encode_u64(bp, req->len);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data for YFS.CreateFile or YFS.MakeDir.
+ */
+static int yfs_deliver_fs_create_vnode(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_YFSFid(&bp, &op->file[1].fid);
+ xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+ xdr_decode_YFSCallBack(&bp, call, &vp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.CreateFile and FS.MakeDir operation type
+ */
+static const struct afs_call_type afs_RXFSCreateFile = {
+ .name = "YFS.CreateFile",
+ .op = yfs_FS_CreateFile,
+ .deliver = yfs_deliver_fs_create_vnode,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Create a file.
+ */
+void yfs_fs_create_file(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ size_t reqsz, rplsz;
+ __be32 *bp;
+
+ _enter("");
+
+ reqsz = (sizeof(__be32) +
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(name->len) +
+ sizeof(struct yfs_xdr_YFSStoreStatus) +
+ sizeof(__be32));
+ rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+
+ call = afs_alloc_flat_call(op->net, &afs_RXFSCreateFile, reqsz, rplsz);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSCREATEFILE);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvp->fid);
+ bp = xdr_encode_name(bp, name);
+ bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime);
+ bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+static const struct afs_call_type yfs_RXFSMakeDir = {
+ .name = "YFS.MakeDir",
+ .op = yfs_FS_MakeDir,
+ .deliver = yfs_deliver_fs_create_vnode,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Make a directory.
+ */
+void yfs_fs_make_dir(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ size_t reqsz, rplsz;
+ __be32 *bp;
+
+ _enter("");
+
+ reqsz = (sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(name->len) +
+ sizeof(struct yfs_xdr_YFSStoreStatus));
+ rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXFSMakeDir, reqsz, rplsz);
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSMAKEDIR);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvp->fid);
+ bp = xdr_encode_name(bp, name);
+ bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile2 operation.
+ */
+static int yfs_deliver_fs_remove_file2(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ struct afs_fid fid;
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+ xdr_decode_YFSFid(&bp, &fid);
+ xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+ /* Was deleted if vnode->status.abort_code == VNOVNODE. */
+
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+ return 0;
+}
+
+static void yfs_done_fs_remove_file2(struct afs_call *call)
+{
+ if (call->error == -ECONNABORTED &&
+ call->abort_code == RX_INVALID_OPERATION) {
+ set_bit(AFS_SERVER_FL_NO_RM2, &call->server->flags);
+ call->op->flags |= AFS_OPERATION_DOWNGRADE;
+ }
+}
+
+/*
+ * YFS.RemoveFile2 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile2 = {
+ .name = "YFS.RemoveFile2",
+ .op = yfs_FS_RemoveFile2,
+ .deliver = yfs_deliver_fs_remove_file2,
+ .done = yfs_done_fs_remove_file2,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Remove a file and retrieve new file status.
+ */
+void yfs_fs_remove_file2(struct afs_operation *op)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveFile2,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(name->len),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSREMOVEFILE2);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvp->fid);
+ bp = xdr_encode_name(bp, name);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile or YFS.RemoveDir operation.
+ */
+static int yfs_deliver_fs_remove(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *dvp = &op->file[0];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+ return 0;
+}
+
+/*
+ * FS.RemoveDir and FS.RemoveFile operation types.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile = {
+ .name = "YFS.RemoveFile",
+ .op = yfs_FS_RemoveFile,
+ .deliver = yfs_deliver_fs_remove,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Remove a file.
+ */
+void yfs_fs_remove_file(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ if (!test_bit(AFS_SERVER_FL_NO_RM2, &op->server->flags))
+ return yfs_fs_remove_file2(op);
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveFile,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(name->len),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSREMOVEFILE);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvp->fid);
+ bp = xdr_encode_name(bp, name);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+static const struct afs_call_type yfs_RXYFSRemoveDir = {
+ .name = "YFS.RemoveDir",
+ .op = yfs_FS_RemoveDir,
+ .deliver = yfs_deliver_fs_remove,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Remove a directory.
+ */
+void yfs_fs_remove_dir(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveDir,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(name->len),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSREMOVEDIR);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvp->fid);
+ bp = xdr_encode_name(bp, name);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.Link operation.
+ */
+static int yfs_deliver_fs_link(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.Link operation type.
+ */
+static const struct afs_call_type yfs_RXYFSLink = {
+ .name = "YFS.Link",
+ .op = yfs_FS_Link,
+ .deliver = yfs_deliver_fs_link,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Make a hard link.
+ */
+void yfs_fs_link(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSLink,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(name->len) +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSLINK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvp->fid);
+ bp = xdr_encode_name(bp, name);
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call1(call, &vp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.Symlink operation.
+ */
+static int yfs_deliver_fs_symlink(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_YFSFid(&bp, &vp->fid);
+ xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.Symlink operation type
+ */
+static const struct afs_call_type yfs_RXYFSSymlink = {
+ .name = "YFS.Symlink",
+ .op = yfs_FS_Symlink,
+ .deliver = yfs_deliver_fs_symlink,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Create a symbolic link.
+ */
+void yfs_fs_symlink(struct afs_operation *op)
+{
+ const struct qstr *name = &op->dentry->d_name;
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_call *call;
+ size_t contents_sz;
+ mode_t mode = 0777;
+ __be32 *bp;
+
+ _enter("");
+
+ contents_sz = strlen(op->create.symlink);
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSSymlink,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(name->len) +
+ xdr_strlen(contents_sz) +
+ sizeof(struct yfs_xdr_YFSStoreStatus),
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSYMLINK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvp->fid);
+ bp = xdr_encode_name(bp, name);
+ bp = xdr_encode_string(bp, op->create.symlink, contents_sz);
+ bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call1(call, &dvp->fid, name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.Rename operation.
+ */
+static int yfs_deliver_fs_rename(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ /* If the two dirs are the same, we have two copies of the same status
+ * report, so we just decode it twice.
+ */
+ xdr_decode_YFSFetchStatus(&bp, call, &orig_dvp->scb);
+ xdr_decode_YFSFetchStatus(&bp, call, &new_dvp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.Rename operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename = {
+ .name = "FS.Rename",
+ .op = yfs_FS_Rename,
+ .deliver = yfs_deliver_fs_rename,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Rename a file or directory.
+ */
+void yfs_fs_rename(struct afs_operation *op)
+{
+ struct afs_vnode_param *orig_dvp = &op->file[0];
+ struct afs_vnode_param *new_dvp = &op->file[1];
+ const struct qstr *orig_name = &op->dentry->d_name;
+ const struct qstr *new_name = &op->dentry_2->d_name;
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(orig_name->len) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(new_name->len),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSRENAME);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &orig_dvp->fid);
+ bp = xdr_encode_name(bp, orig_name);
+ bp = xdr_encode_YFSFid(bp, &new_dvp->fid);
+ bp = xdr_encode_name(bp, new_name);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * YFS.StoreData64 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSStoreData64 = {
+ .name = "YFS.StoreData64",
+ .op = yfs_FS_StoreData64,
+ .deliver = yfs_deliver_status_and_volsync,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Store a set of pages to a large file.
+ */
+void yfs_fs_store_data(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ _debug("size %llx, at %llx, i_size %llx",
+ (unsigned long long)op->store.size,
+ (unsigned long long)op->store.pos,
+ (unsigned long long)op->store.i_size);
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreData64,
+ sizeof(__be32) +
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSStoreStatus) +
+ sizeof(struct yfs_xdr_u64) * 3,
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ call->write_iter = op->store.write_iter;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ bp = xdr_encode_YFSStoreStatus(bp, NULL, &op->mtime);
+ bp = xdr_encode_u64(bp, op->store.pos);
+ bp = xdr_encode_u64(bp, op->store.size);
+ bp = xdr_encode_u64(bp, op->store.i_size);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * YFS.StoreStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSStoreStatus = {
+ .name = "YFS.StoreStatus",
+ .op = yfs_FS_StoreStatus,
+ .deliver = yfs_deliver_status_and_volsync,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = {
+ .name = "YFS.StoreData64",
+ .op = yfs_FS_StoreData64,
+ .deliver = yfs_deliver_status_and_volsync,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 rather than
+ * YFS.StoreStatus so as to alter the file size also.
+ */
+static void yfs_fs_setattr_size(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ struct iattr *attr = op->setattr.attr;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreData64_as_Status,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSStoreStatus) +
+ sizeof(struct yfs_xdr_u64) * 3,
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ bp = xdr_encode_YFS_StoreStatus(bp, attr);
+ bp = xdr_encode_u64(bp, attr->ia_size); /* position of start of write */
+ bp = xdr_encode_u64(bp, 0); /* size of write */
+ bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 if there's a change in
+ * file size, and YFS.StoreStatus otherwise.
+ */
+void yfs_fs_setattr(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ struct iattr *attr = op->setattr.attr;
+ __be32 *bp;
+
+ if (attr->ia_valid & ATTR_SIZE)
+ return yfs_fs_setattr_size(op);
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreStatus,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSStoreStatus),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSTORESTATUS);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ bp = xdr_encode_YFS_StoreStatus(bp, attr);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.GetVolumeStatus operation.
+ */
+static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ const __be32 *bp;
+ char *p;
+ u32 size;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ call->unmarshall++;
+ afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus));
+ fallthrough;
+
+ /* extract the returned status record */
+ case 1:
+ _debug("extract status");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_YFSFetchVolumeStatus(&bp, &op->volstatus.vs);
+ call->unmarshall++;
+ afs_extract_to_tmp(call);
+ fallthrough;
+
+ /* extract the volume name length */
+ case 2:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("volname length: %u", call->count);
+ if (call->count >= AFSNAMEMAX)
+ return afs_protocol_error(call, afs_eproto_volname_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_to_buf(call, size);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the volume name */
+ case 3:
+ _debug("extract volname");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ p = call->buffer;
+ p[call->count] = 0;
+ _debug("volname '%s'", p);
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the offline message length */
+ case 4:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("offline msg length: %u", call->count);
+ if (call->count >= AFSNAMEMAX)
+ return afs_protocol_error(call, afs_eproto_offline_msg_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_to_buf(call, size);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the offline message */
+ case 5:
+ _debug("extract offline");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ p = call->buffer;
+ p[call->count] = 0;
+ _debug("offline '%s'", p);
+
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the message of the day length */
+ case 6:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("motd length: %u", call->count);
+ if (call->count >= AFSNAMEMAX)
+ return afs_protocol_error(call, afs_eproto_motd_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_to_buf(call, size);
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the message of the day */
+ case 7:
+ _debug("extract motd");
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ p = call->buffer;
+ p[call->count] = 0;
+ _debug("motd '%s'", p);
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 8:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.GetVolumeStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSGetVolumeStatus = {
+ .name = "YFS.GetVolumeStatus",
+ .op = yfs_FS_GetVolumeStatus,
+ .deliver = yfs_deliver_fs_get_volume_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * fetch the status of a volume
+ */
+void yfs_fs_get_volume_status(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSGetVolumeStatus,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_u64),
+ max_t(size_t,
+ sizeof(struct yfs_xdr_YFSFetchVolumeStatus) +
+ sizeof(__be32),
+ AFSOPAQUEMAX + 1));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSGETVOLUMESTATUS);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_u64(bp, vp->fid.vid);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * YFS.SetLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSSetLock = {
+ .name = "YFS.SetLock",
+ .op = yfs_FS_SetLock,
+ .deliver = yfs_deliver_status_and_volsync,
+ .done = afs_lock_op_done,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ExtendLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSExtendLock = {
+ .name = "YFS.ExtendLock",
+ .op = yfs_FS_ExtendLock,
+ .deliver = yfs_deliver_status_and_volsync,
+ .done = afs_lock_op_done,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ReleaseLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSReleaseLock = {
+ .name = "YFS.ReleaseLock",
+ .op = yfs_FS_ReleaseLock,
+ .deliver = yfs_deliver_status_and_volsync,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Set a lock on a file
+ */
+void yfs_fs_set_lock(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSSetLock,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(__be32),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSETLOCK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ bp = xdr_encode_u32(bp, op->lock.type);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_calli(call, &vp->fid, op->lock.type);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * extend a lock on a file
+ */
+void yfs_fs_extend_lock(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSExtendLock,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSEXTENDLOCK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * release a lock on a file
+ */
+void yfs_fs_release_lock(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSReleaseLock,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSRELEASELOCK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver a reply to YFS.FetchStatus
+ */
+static int yfs_deliver_fs_fetch_status(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_YFSCallBack(&bp, call, &vp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.FetchStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchStatus = {
+ .name = "YFS.FetchStatus",
+ .op = yfs_FS_FetchStatus,
+ .deliver = yfs_deliver_fs_fetch_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for a fid without needing a vnode handle.
+ */
+void yfs_fs_fetch_status(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchStatus,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSFETCHSTATUS);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to an YFS.InlineBulkStatus call
+ */
+static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_status_cb *scb;
+ const __be32 *bp;
+ u32 tmp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* Extract the file status count and array in two steps */
+ case 1:
+ _debug("extract status count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ tmp = ntohl(call->tmp);
+ _debug("status count: %u/%u", tmp, op->nr_files);
+ if (tmp != op->nr_files)
+ return afs_protocol_error(call, afs_eproto_ibulkst_count);
+
+ call->count = 0;
+ call->unmarshall++;
+ more_counts:
+ afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus));
+ fallthrough;
+
+ case 2:
+ _debug("extract status array %u", call->count);
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ switch (call->count) {
+ case 0:
+ scb = &op->file[0].scb;
+ break;
+ case 1:
+ scb = &op->file[1].scb;
+ break;
+ default:
+ scb = &op->more_files[call->count - 2].scb;
+ break;
+ }
+
+ bp = call->buffer;
+ xdr_decode_YFSFetchStatus(&bp, call, scb);
+
+ call->count++;
+ if (call->count < op->nr_files)
+ goto more_counts;
+
+ call->count = 0;
+ call->unmarshall++;
+ afs_extract_to_tmp(call);
+ fallthrough;
+
+ /* Extract the callback count and array in two steps */
+ case 3:
+ _debug("extract CB count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ tmp = ntohl(call->tmp);
+ _debug("CB count: %u", tmp);
+ if (tmp != op->nr_files)
+ return afs_protocol_error(call, afs_eproto_ibulkst_cb_count);
+ call->count = 0;
+ call->unmarshall++;
+ more_cbs:
+ afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack));
+ fallthrough;
+
+ case 4:
+ _debug("extract CB array");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ _debug("unmarshall CB array");
+ switch (call->count) {
+ case 0:
+ scb = &op->file[0].scb;
+ break;
+ case 1:
+ scb = &op->file[1].scb;
+ break;
+ default:
+ scb = &op->more_files[call->count - 2].scb;
+ break;
+ }
+
+ bp = call->buffer;
+ xdr_decode_YFSCallBack(&bp, call, scb);
+ call->count++;
+ if (call->count < op->nr_files)
+ goto more_cbs;
+
+ afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync));
+ call->unmarshall++;
+ fallthrough;
+
+ case 5:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 6:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.InlineBulkStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSInlineBulkStatus = {
+ .name = "YFS.InlineBulkStatus",
+ .op = yfs_FS_InlineBulkStatus,
+ .deliver = yfs_deliver_fs_inline_bulk_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for up to 1024 files
+ */
+void yfs_fs_inline_bulk_status(struct afs_operation *op)
+{
+ struct afs_vnode_param *dvp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[1];
+ struct afs_call *call;
+ __be32 *bp;
+ int i;
+
+ _enter(",%x,{%llx:%llu},%u",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode, op->nr_files);
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSInlineBulkStatus,
+ sizeof(__be32) +
+ sizeof(__be32) +
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_YFSFid) * op->nr_files,
+ sizeof(struct yfs_xdr_YFSFetchStatus));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSINLINEBULKSTATUS);
+ bp = xdr_encode_u32(bp, 0); /* RPCFlags */
+ bp = xdr_encode_u32(bp, op->nr_files);
+ bp = xdr_encode_YFSFid(bp, &dvp->fid);
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ for (i = 0; i < op->nr_files - 2; i++)
+ bp = xdr_encode_YFSFid(bp, &op->more_files[i].fid);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to an YFS.FetchOpaqueACL.
+ */
+static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *vp = &op->file[0];
+ struct yfs_acl *yacl = op->yacl;
+ struct afs_acl *acl;
+ const __be32 *bp;
+ unsigned int size;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* Extract the file ACL length */
+ case 1:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ size = call->count2 = ntohl(call->tmp);
+ size = round_up(size, 4);
+
+ if (yacl->flags & YFS_ACL_WANT_ACL) {
+ acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+ if (!acl)
+ return -ENOMEM;
+ yacl->acl = acl;
+ acl->size = call->count2;
+ afs_extract_begin(call, acl->data, size);
+ } else {
+ afs_extract_discard(call, size);
+ }
+ call->unmarshall++;
+ fallthrough;
+
+ /* Extract the file ACL */
+ case 2:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+ fallthrough;
+
+ /* Extract the volume ACL length */
+ case 3:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ size = call->count2 = ntohl(call->tmp);
+ size = round_up(size, 4);
+
+ if (yacl->flags & YFS_ACL_WANT_VOL_ACL) {
+ acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+ if (!acl)
+ return -ENOMEM;
+ yacl->vol_acl = acl;
+ acl->size = call->count2;
+ afs_extract_begin(call, acl->data, size);
+ } else {
+ afs_extract_discard(call, size);
+ }
+ call->unmarshall++;
+ fallthrough;
+
+ /* Extract the volume ACL */
+ case 4:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ afs_extract_to_buf(call,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ call->unmarshall++;
+ fallthrough;
+
+ /* extract the metadata */
+ case 5:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ yacl->inherit_flag = ntohl(*bp++);
+ yacl->num_cleaned = ntohl(*bp++);
+ xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+ call->unmarshall++;
+ fallthrough;
+
+ case 6:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+void yfs_free_opaque_acl(struct yfs_acl *yacl)
+{
+ if (yacl) {
+ kfree(yacl->acl);
+ kfree(yacl->vol_acl);
+ kfree(yacl);
+ }
+}
+
+/*
+ * YFS.FetchOpaqueACL operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchOpaqueACL = {
+ .name = "YFS.FetchOpaqueACL",
+ .op = yfs_FS_FetchOpaqueACL,
+ .deliver = yfs_deliver_fs_fetch_opaque_acl,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the YFS advanced ACLs for a file.
+ */
+void yfs_fs_fetch_opaque_acl(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchOpaqueACL,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSFETCHOPAQUEACL);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_KERNEL);
+}
+
+/*
+ * YFS.StoreOpaqueACL2 operation type
+ */
+static const struct afs_call_type yfs_RXYFSStoreOpaqueACL2 = {
+ .name = "YFS.StoreOpaqueACL2",
+ .op = yfs_FS_StoreOpaqueACL2,
+ .deliver = yfs_deliver_status_and_volsync,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the YFS ACL for a file.
+ */
+void yfs_fs_store_opaque_acl2(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct afs_call *call;
+ struct afs_acl *acl = op->acl;
+ size_t size;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+ size = round_up(acl->size, 4);
+ call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreOpaqueACL2,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(__be32) + size,
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return afs_op_nomem(op);
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSTOREOPAQUEACL2);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vp->fid);
+ bp = xdr_encode_u32(bp, acl->size);
+ memcpy(bp, acl->data, acl->size);
+ if (acl->size != size)
+ memset((void *)bp + acl->size, 0, size - acl->size);
+ bp += size / sizeof(__be32);
+ yfs_check_req(call, bp);
+
+ trace_afs_make_fs_call(call, &vp->fid);
+ afs_make_op_call(op, call, GFP_KERNEL);
+}