diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
commit | 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch) | |
tree | a94efe259b9009378be6d90eb30d2b019d95c194 /fs/afs | |
parent | Initial commit. (diff) | |
download | linux-upstream.tar.xz linux-upstream.zip |
Adding upstream version 5.10.209.upstream/5.10.209upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'fs/afs')
44 files changed, 24292 insertions, 0 deletions
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig new file mode 100644 index 000000000..1ad211d72 --- /dev/null +++ b/fs/afs/Kconfig @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0-only +config AFS_FS + tristate "Andrew File System support (AFS)" + depends on INET + select AF_RXRPC + select DNS_RESOLVER + help + If you say Y here, you will get an experimental Andrew File System + driver. It currently only supports unsecured read-only AFS access. + + See <file:Documentation/filesystems/afs.rst> for more information. + + If unsure, say N. + +config AFS_DEBUG + bool "AFS dynamic debugging" + depends on AFS_FS + help + Say Y here to make runtime controllable debugging messages appear. + + See <file:Documentation/filesystems/afs.rst> for more information. + + If unsure, say N. + +config AFS_FSCACHE + bool "Provide AFS client caching support" + depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y + help + Say Y here if you want AFS data to be cached locally on disk through + the generic filesystem cache manager + +config AFS_DEBUG_CURSOR + bool "AFS server cursor debugging" + depends on AFS_FS + help + Say Y here to cause the contents of a server cursor to be dumped to + the dmesg log if the server rotation algorithm fails to successfully + contact a server. + + See <file:Documentation/filesystems/afs.rst> for more information. + + If unsure, say N. diff --git a/fs/afs/Makefile b/fs/afs/Makefile new file mode 100644 index 000000000..75c4e4043 --- /dev/null +++ b/fs/afs/Makefile @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Red Hat Linux AFS client. +# + +afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o + +kafs-y := \ + $(afs-cache-y) \ + addr_list.o \ + callback.o \ + cell.o \ + cmservice.o \ + dir.o \ + dir_edit.o \ + dir_silly.o \ + dynroot.o \ + file.o \ + flock.o \ + fsclient.o \ + fs_operation.o \ + fs_probe.o \ + inode.o \ + main.o \ + misc.o \ + mntpt.o \ + rotate.o \ + rxrpc.o \ + security.o \ + server.o \ + server_list.o \ + super.o \ + vlclient.o \ + vl_alias.o \ + vl_list.o \ + vl_probe.o \ + vl_rotate.o \ + volume.o \ + write.o \ + xattr.o \ + yfsclient.o + +kafs-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_AFS_FS) := kafs.o diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c new file mode 100644 index 000000000..de1ae0bea --- /dev/null +++ b/fs/afs/addr_list.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Server address list management + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <linux/ctype.h> +#include <linux/dns_resolver.h> +#include <linux/inet.h> +#include <keys/rxrpc-type.h> +#include "internal.h" +#include "afs_fs.h" + +/* + * Release an address list. + */ +void afs_put_addrlist(struct afs_addr_list *alist) +{ + if (alist && refcount_dec_and_test(&alist->usage)) + kfree_rcu(alist, rcu); +} + +/* + * Allocate an address list. + */ +struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, + unsigned short service, + unsigned short port) +{ + struct afs_addr_list *alist; + unsigned int i; + + _enter("%u,%u,%u", nr, service, port); + + if (nr > AFS_MAX_ADDRESSES) + nr = AFS_MAX_ADDRESSES; + + alist = kzalloc(struct_size(alist, addrs, nr), GFP_KERNEL); + if (!alist) + return NULL; + + refcount_set(&alist->usage, 1); + alist->max_addrs = nr; + + for (i = 0; i < nr; i++) { + struct sockaddr_rxrpc *srx = &alist->addrs[i]; + srx->srx_family = AF_RXRPC; + srx->srx_service = service; + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_port = htons(port); + } + + return alist; +} + +/* + * Parse a text string consisting of delimited addresses. + */ +struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net, + const char *text, size_t len, + char delim, + unsigned short service, + unsigned short port) +{ + struct afs_vlserver_list *vllist; + struct afs_addr_list *alist; + const char *p, *end = text + len; + const char *problem; + unsigned int nr = 0; + int ret = -ENOMEM; + + _enter("%*.*s,%c", (int)len, (int)len, text, delim); + + if (!len) { + _leave(" = -EDESTADDRREQ [empty]"); + return ERR_PTR(-EDESTADDRREQ); + } + + if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len))) + delim = ','; + + /* Count the addresses */ + p = text; + do { + if (!*p) { + problem = "nul"; + goto inval; + } + if (*p == delim) + continue; + nr++; + if (*p == '[') { + p++; + if (p == end) { + problem = "brace1"; + goto inval; + } + p = memchr(p, ']', end - p); + if (!p) { + problem = "brace2"; + goto inval; + } + p++; + if (p >= end) + break; + } + + p = memchr(p, delim, end - p); + if (!p) + break; + p++; + } while (p < end); + + _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); + + vllist = afs_alloc_vlserver_list(1); + if (!vllist) + return ERR_PTR(-ENOMEM); + + vllist->nr_servers = 1; + vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT); + if (!vllist->servers[0].server) + goto error_vl; + + alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT); + if (!alist) + goto error; + + /* Extract the addresses */ + p = text; + do { + const char *q, *stop; + unsigned int xport = port; + __be32 x[4]; + int family; + + if (*p == delim) { + p++; + continue; + } + + if (*p == '[') { + p++; + q = memchr(p, ']', end - p); + } else { + for (q = p; q < end; q++) + if (*q == '+' || *q == delim) + break; + } + + if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) { + family = AF_INET; + } else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) { + family = AF_INET6; + } else { + problem = "family"; + goto bad_address; + } + + p = q; + if (stop != p) { + problem = "nostop"; + goto bad_address; + } + + if (q < end && *q == ']') + p++; + + if (p < end) { + if (*p == '+') { + /* Port number specification "+1234" */ + xport = 0; + p++; + if (p >= end || !isdigit(*p)) { + problem = "port"; + goto bad_address; + } + do { + xport *= 10; + xport += *p - '0'; + if (xport > 65535) { + problem = "pval"; + goto bad_address; + } + p++; + } while (p < end && isdigit(*p)); + } else if (*p == delim) { + p++; + } else { + problem = "weird"; + goto bad_address; + } + } + + if (family == AF_INET) + afs_merge_fs_addr4(alist, x[0], xport); + else + afs_merge_fs_addr6(alist, x, xport); + + } while (p < end); + + rcu_assign_pointer(vllist->servers[0].server->addresses, alist); + _leave(" = [nr %u]", alist->nr_addrs); + return vllist; + +inval: + _leave(" = -EINVAL [%s %zu %*.*s]", + problem, p - text, (int)len, (int)len, text); + return ERR_PTR(-EINVAL); +bad_address: + _leave(" = -EINVAL [%s %zu %*.*s]", + problem, p - text, (int)len, (int)len, text); + ret = -EINVAL; +error: + afs_put_addrlist(alist); +error_vl: + afs_put_vlserverlist(net, vllist); + return ERR_PTR(ret); +} + +/* + * Compare old and new address lists to see if there's been any change. + * - How to do this in better than O(Nlog(N)) time? + * - We don't really want to sort the address list, but would rather take the + * list as we got it so as not to undo record rotation by the DNS server. + */ +#if 0 +static int afs_cmp_addr_list(const struct afs_addr_list *a1, + const struct afs_addr_list *a2) +{ +} +#endif + +/* + * Perform a DNS query for VL servers and build a up an address list. + */ +struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) +{ + struct afs_vlserver_list *vllist; + char *result = NULL; + int ret; + + _enter("%s", cell->name); + + ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len, + "srv=1", &result, _expiry, true); + if (ret < 0) { + _leave(" = %d [dns]", ret); + return ERR_PTR(ret); + } + + if (*_expiry == 0) + *_expiry = ktime_get_real_seconds() + 60; + + if (ret > 1 && result[0] == 0) + vllist = afs_extract_vlserver_list(cell, result, ret); + else + vllist = afs_parse_text_addrs(cell->net, result, ret, ',', + VL_SERVICE, AFS_VL_PORT); + kfree(result); + if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM)) + pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist)); + + return vllist; +} + +/* + * Merge an IPv4 entry into a fileserver address list. + */ +void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) +{ + struct sockaddr_rxrpc *srx; + u32 addr = ntohl(xdr); + int i; + + if (alist->nr_addrs >= alist->max_addrs) + return; + + for (i = 0; i < alist->nr_ipv4; i++) { + struct sockaddr_in *a = &alist->addrs[i].transport.sin; + u32 a_addr = ntohl(a->sin_addr.s_addr); + u16 a_port = ntohs(a->sin_port); + + if (addr == a_addr && port == a_port) + return; + if (addr == a_addr && port < a_port) + break; + if (addr < a_addr) + break; + } + + if (i < alist->nr_addrs) + memmove(alist->addrs + i + 1, + alist->addrs + i, + sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); + + srx = &alist->addrs[i]; + srx->srx_family = AF_RXRPC; + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.sin.sin_family = AF_INET; + srx->transport.sin.sin_port = htons(port); + srx->transport.sin.sin_addr.s_addr = xdr; + alist->nr_ipv4++; + alist->nr_addrs++; +} + +/* + * Merge an IPv6 entry into a fileserver address list. + */ +void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) +{ + struct sockaddr_rxrpc *srx; + int i, diff; + + if (alist->nr_addrs >= alist->max_addrs) + return; + + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { + struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6; + u16 a_port = ntohs(a->sin6_port); + + diff = memcmp(xdr, &a->sin6_addr, 16); + if (diff == 0 && port == a_port) + return; + if (diff == 0 && port < a_port) + break; + if (diff < 0) + break; + } + + if (i < alist->nr_addrs) + memmove(alist->addrs + i + 1, + alist->addrs + i, + sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); + + srx = &alist->addrs[i]; + srx->srx_family = AF_RXRPC; + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_port = htons(port); + memcpy(&srx->transport.sin6.sin6_addr, xdr, 16); + alist->nr_addrs++; +} + +/* + * Get an address to try. + */ +bool afs_iterate_addresses(struct afs_addr_cursor *ac) +{ + unsigned long set, failed; + int index; + + if (!ac->alist) + return false; + + set = ac->alist->responded; + failed = ac->alist->failed; + _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index); + + ac->nr_iterations++; + + set &= ~(failed | ac->tried); + + if (!set) + return false; + + index = READ_ONCE(ac->alist->preferred); + if (test_bit(index, &set)) + goto selected; + + index = __ffs(set); + +selected: + ac->index = index; + set_bit(index, &ac->tried); + ac->responded = false; + return true; +} + +/* + * Release an address list cursor. + */ +int afs_end_cursor(struct afs_addr_cursor *ac) +{ + struct afs_addr_list *alist; + + alist = ac->alist; + if (alist) { + if (ac->responded && + ac->index != alist->preferred && + test_bit(ac->alist->preferred, &ac->tried)) + WRITE_ONCE(alist->preferred, ac->index); + afs_put_addrlist(alist); + ac->alist = NULL; + } + + return ac->error; +} diff --git a/fs/afs/afs.h b/fs/afs/afs.h new file mode 100644 index 000000000..432cb4b23 --- /dev/null +++ b/fs/afs/afs.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* AFS common types + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef AFS_H +#define AFS_H + +#include <linux/in.h> + +#define AFS_MAXCELLNAME 256 /* Maximum length of a cell name */ +#define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */ +#define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */ +#define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */ +#define AFS_MAXTYPES 3 /* Maximum number of volume types */ +#define AFSNAMEMAX 256 /* Maximum length of a filename plus NUL */ +#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */ +#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */ + +#define AFS_VL_MAX_LIFESPAN (120 * HZ) +#define AFS_PROBE_MAX_LIFESPAN (30 * HZ) + +typedef u64 afs_volid_t; +typedef u64 afs_vnodeid_t; +typedef u64 afs_dataversion_t; + +typedef enum { + AFSVL_RWVOL, /* read/write volume */ + AFSVL_ROVOL, /* read-only volume */ + AFSVL_BACKVOL, /* backup volume */ +} __attribute__((packed)) afs_voltype_t; + +typedef enum { + AFS_FTYPE_INVALID = 0, + AFS_FTYPE_FILE = 1, + AFS_FTYPE_DIR = 2, + AFS_FTYPE_SYMLINK = 3, +} afs_file_type_t; + +typedef enum { + AFS_LOCK_READ = 0, /* read lock request */ + AFS_LOCK_WRITE = 1, /* write lock request */ +} afs_lock_type_t; + +#define AFS_LOCKWAIT (5 * 60) /* time until a lock times out (seconds) */ + +/* + * AFS file identifier + */ +struct afs_fid { + afs_volid_t vid; /* volume ID */ + afs_vnodeid_t vnode; /* Lower 64-bits of file index within volume */ + u32 vnode_hi; /* Upper 32-bits of file index */ + u32 unique; /* unique ID number (file index version) */ +}; + +/* + * AFS callback notification + */ +typedef enum { + AFSCM_CB_UNTYPED = 0, /* no type set on CB break */ + AFSCM_CB_EXCLUSIVE = 1, /* CB exclusive to CM [not implemented] */ + AFSCM_CB_SHARED = 2, /* CB shared by other CM's */ + AFSCM_CB_DROPPED = 3, /* CB promise cancelled by file server */ +} afs_callback_type_t; + +struct afs_callback { + time64_t expires_at; /* Time at which expires */ + //unsigned version; /* Callback version */ + //afs_callback_type_t type; /* Type of callback */ +}; + +struct afs_callback_break { + struct afs_fid fid; /* File identifier */ + //struct afs_callback cb; /* Callback details */ +}; + +#define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */ + +struct afs_uuid { + __be32 time_low; /* low part of timestamp */ + __be16 time_mid; /* mid part of timestamp */ + __be16 time_hi_and_version; /* high part of timestamp and version */ + __s8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ + __s8 clock_seq_low; /* clock seq low */ + __s8 node[6]; /* spatially unique node ID (MAC addr) */ +}; + +/* + * AFS volume information + */ +struct afs_volume_info { + afs_volid_t vid; /* volume ID */ + afs_voltype_t type; /* type of this volume */ + afs_volid_t type_vids[5]; /* volume ID's for possible types for this vol */ + + /* list of fileservers serving this volume */ + size_t nservers; /* number of entries used in servers[] */ + struct { + struct in_addr addr; /* fileserver address */ + } servers[8]; +}; + +/* + * AFS security ACE access mask + */ +typedef u32 afs_access_t; +#define AFS_ACE_READ 0x00000001U /* - permission to read a file/dir */ +#define AFS_ACE_WRITE 0x00000002U /* - permission to write/chmod a file */ +#define AFS_ACE_INSERT 0x00000004U /* - permission to create dirent in a dir */ +#define AFS_ACE_LOOKUP 0x00000008U /* - permission to lookup a file/dir in a dir */ +#define AFS_ACE_DELETE 0x00000010U /* - permission to delete a dirent from a dir */ +#define AFS_ACE_LOCK 0x00000020U /* - permission to lock a file */ +#define AFS_ACE_ADMINISTER 0x00000040U /* - permission to change ACL */ +#define AFS_ACE_USER_A 0x01000000U /* - 'A' user-defined permission */ +#define AFS_ACE_USER_B 0x02000000U /* - 'B' user-defined permission */ +#define AFS_ACE_USER_C 0x04000000U /* - 'C' user-defined permission */ +#define AFS_ACE_USER_D 0x08000000U /* - 'D' user-defined permission */ +#define AFS_ACE_USER_E 0x10000000U /* - 'E' user-defined permission */ +#define AFS_ACE_USER_F 0x20000000U /* - 'F' user-defined permission */ +#define AFS_ACE_USER_G 0x40000000U /* - 'G' user-defined permission */ +#define AFS_ACE_USER_H 0x80000000U /* - 'H' user-defined permission */ + +/* + * AFS file status information + */ +struct afs_file_status { + u64 size; /* file size */ + afs_dataversion_t data_version; /* current data version */ + struct timespec64 mtime_client; /* Last time client changed data */ + struct timespec64 mtime_server; /* Last time server changed data */ + s64 author; /* author ID */ + s64 owner; /* owner ID */ + s64 group; /* group ID */ + afs_access_t caller_access; /* access rights for authenticated caller */ + afs_access_t anon_access; /* access rights for unauthenticated caller */ + umode_t mode; /* UNIX mode */ + afs_file_type_t type; /* file type */ + u32 nlink; /* link count */ + s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ + u32 abort_code; /* Abort if bulk-fetching this failed */ +}; + +struct afs_status_cb { + struct afs_file_status status; + struct afs_callback callback; + bool have_status; /* True if status record was retrieved */ + bool have_cb; /* True if cb record was retrieved */ + bool have_error; /* True if status.abort_code indicates an error */ +}; + +/* + * AFS file status change request + */ + +#define AFS_SET_MTIME 0x01 /* set the mtime */ +#define AFS_SET_OWNER 0x02 /* set the owner ID */ +#define AFS_SET_GROUP 0x04 /* set the group ID (unsupported?) */ +#define AFS_SET_MODE 0x08 /* set the UNIX mode */ +#define AFS_SET_SEG_SIZE 0x10 /* set the segment size (unsupported) */ + +/* + * AFS volume synchronisation information + */ +struct afs_volsync { + time64_t creation; /* volume creation time */ +}; + +/* + * AFS volume status record + */ +struct afs_volume_status { + afs_volid_t vid; /* volume ID */ + afs_volid_t parent_id; /* parent volume ID */ + u8 online; /* true if volume currently online and available */ + u8 in_service; /* true if volume currently in service */ + u8 blessed; /* same as in_service */ + u8 needs_salvage; /* true if consistency checking required */ + u32 type; /* volume type (afs_voltype_t) */ + u64 min_quota; /* minimum space set aside (blocks) */ + u64 max_quota; /* maximum space this volume may occupy (blocks) */ + u64 blocks_in_use; /* space this volume currently occupies (blocks) */ + u64 part_blocks_avail; /* space available in volume's partition */ + u64 part_max_blocks; /* size of volume's partition */ + s64 vol_copy_date; + s64 vol_backup_date; +}; + +#define AFS_BLOCK_SIZE 1024 + +/* + * XDR encoding of UUID in AFS. + */ +struct afs_uuid__xdr { + __be32 time_low; + __be32 time_mid; + __be32 time_hi_and_version; + __be32 clock_seq_hi_and_reserved; + __be32 clock_seq_low; + __be32 node[6]; +}; + +#endif /* AFS_H */ diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h new file mode 100644 index 000000000..565cbe0a8 --- /dev/null +++ b/fs/afs/afs_cm.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* AFS Cache Manager definitions + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef AFS_CM_H +#define AFS_CM_H + +#define AFS_CM_PORT 7001 /* AFS file server port */ +#define CM_SERVICE 1 /* AFS File Service ID */ + +enum AFS_CM_Operations { + CBCallBack = 204, /* break callback promises */ + CBInitCallBackState = 205, /* initialise callback state */ + CBProbe = 206, /* probe client */ + CBGetLock = 207, /* get contents of CM lock table */ + CBGetCE = 208, /* get cache file description */ + CBGetXStatsVersion = 209, /* get version of extended statistics */ + CBGetXStats = 210, /* get contents of extended statistics data */ + CBInitCallBackState3 = 213, /* initialise callback state, version 3 */ + CBProbeUuid = 214, /* check the client hasn't rebooted */ + CBTellMeAboutYourself = 65538, /* get client capabilities */ +}; + +#define AFS_CAP_ERROR_TRANSLATION 0x1 + +#endif /* AFS_FS_H */ diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h new file mode 100644 index 000000000..20ab344ba --- /dev/null +++ b/fs/afs/afs_fs.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* AFS File Service definitions + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef AFS_FS_H +#define AFS_FS_H + +#define AFS_FS_PORT 7000 /* AFS file server port */ +#define FS_SERVICE 1 /* AFS File Service ID */ + +enum AFS_FS_Operations { + FSFETCHDATA = 130, /* AFS Fetch file data */ + FSFETCHACL = 131, /* AFS Fetch file ACL */ + FSFETCHSTATUS = 132, /* AFS Fetch file status */ + FSSTOREDATA = 133, /* AFS Store file data */ + FSSTOREACL = 134, /* AFS Store file ACL */ + FSSTORESTATUS = 135, /* AFS Store file status */ + FSREMOVEFILE = 136, /* AFS Remove a file */ + FSCREATEFILE = 137, /* AFS Create a file */ + FSRENAME = 138, /* AFS Rename or move a file or directory */ + FSSYMLINK = 139, /* AFS Create a symbolic link */ + FSLINK = 140, /* AFS Create a hard link */ + FSMAKEDIR = 141, /* AFS Create a directory */ + FSREMOVEDIR = 142, /* AFS Remove a directory */ + FSGIVEUPCALLBACKS = 147, /* AFS Discard callback promises */ + FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ + FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ + FSGETROOTVOLUME = 151, /* AFS Get root volume name */ + FSBULKSTATUS = 155, /* AFS Fetch multiple file statuses */ + FSSETLOCK = 156, /* AFS Request a file lock */ + FSEXTENDLOCK = 157, /* AFS Extend a file lock */ + FSRELEASELOCK = 158, /* AFS Release a file lock */ + FSLOOKUP = 161, /* AFS lookup file in directory */ + FSINLINEBULKSTATUS = 65536, /* AFS Fetch multiple file statuses with inline errors */ + FSFETCHDATA64 = 65537, /* AFS Fetch file data */ + FSSTOREDATA64 = 65538, /* AFS Store file data */ + FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */ + FSGETCAPABILITIES = 65540, /* Probe and get the capabilities of a fileserver */ +}; + +enum AFS_FS_Errors { + VRESTARTING = -100, /* Server is restarting */ + VSALVAGE = 101, /* volume needs salvaging */ + VNOVNODE = 102, /* no such file/dir (vnode) */ + VNOVOL = 103, /* no such volume or volume unavailable */ + VVOLEXISTS = 104, /* volume name already exists */ + VNOSERVICE = 105, /* volume not currently in service */ + VOFFLINE = 106, /* volume is currently offline (more info available [VVL-spec]) */ + VONLINE = 107, /* volume is already online */ + VDISKFULL = 108, /* disk partition is full */ + VOVERQUOTA = 109, /* volume's maximum quota exceeded */ + VBUSY = 110, /* volume is temporarily unavailable */ + VMOVED = 111, /* volume moved to new server - ask this FS where */ + VIO = 112, /* I/O error in volume */ + VSALVAGING = 113, /* Volume is being salvaged */ + VRESTRICTED = 120, /* Volume is restricted from using */ +}; + +#endif /* AFS_FS_H */ diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h new file mode 100644 index 000000000..9c65ffb8a --- /dev/null +++ b/fs/afs/afs_vl.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* AFS Volume Location Service client interface + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef AFS_VL_H +#define AFS_VL_H + +#include "afs.h" + +#define AFS_VL_PORT 7003 /* volume location service port */ +#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */ +#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */ + +enum AFSVL_Operations { + VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */ + VLGETENTRYBYNAME = 504, /* AFS Get VLDB entry by name */ + VLPROBE = 514, /* AFS probe VL service */ + VLGETENTRYBYIDU = 526, /* AFS Get VLDB entry by ID (UUID-variant) */ + VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */ + VLGETADDRSU = 533, /* AFS Get addrs for fileserver */ + YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */ + YVLGETCELLNAME = 64014, /* YFS Get actual cell name */ + VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */ +}; + +enum AFSVL_Errors { + AFSVL_IDEXIST = 363520, /* Volume Id entry exists in vl database */ + AFSVL_IO = 363521, /* I/O related error */ + AFSVL_NAMEEXIST = 363522, /* Volume name entry exists in vl database */ + AFSVL_CREATEFAIL = 363523, /* Internal creation failure */ + AFSVL_NOENT = 363524, /* No such entry */ + AFSVL_EMPTY = 363525, /* Vl database is empty */ + AFSVL_ENTDELETED = 363526, /* Entry is deleted (soft delete) */ + AFSVL_BADNAME = 363527, /* Volume name is illegal */ + AFSVL_BADINDEX = 363528, /* Index is out of range */ + AFSVL_BADVOLTYPE = 363529, /* Bad volume type */ + AFSVL_BADSERVER = 363530, /* Illegal server number (out of range) */ + AFSVL_BADPARTITION = 363531, /* Bad partition number */ + AFSVL_REPSFULL = 363532, /* Run out of space for Replication sites */ + AFSVL_NOREPSERVER = 363533, /* No such Replication server site exists */ + AFSVL_DUPREPSERVER = 363534, /* Replication site already exists */ + AFSVL_RWNOTFOUND = 363535, /* Parent R/W entry not found */ + AFSVL_BADREFCOUNT = 363536, /* Illegal Reference Count number */ + AFSVL_SIZEEXCEEDED = 363537, /* Vl size for attributes exceeded */ + AFSVL_BADENTRY = 363538, /* Bad incoming vl entry */ + AFSVL_BADVOLIDBUMP = 363539, /* Illegal max volid increment */ + AFSVL_IDALREADYHASHED = 363540, /* RO/BACK id already hashed */ + AFSVL_ENTRYLOCKED = 363541, /* Vl entry is already locked */ + AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */ + AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */ + AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */ + AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server flag */ + AFSVL_PERM = 363546, /* No permission access */ + AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */ +}; + +enum { + YFS_SERVER_INDEX = 0, + YFS_SERVER_UUID = 1, + YFS_SERVER_ENDPOINT = 2, +}; + +enum { + YFS_ENDPOINT_IPV4 = 0, + YFS_ENDPOINT_IPV6 = 1, +}; + +#define YFS_MAXENDPOINTS 16 + +/* + * maps to "struct vldbentry" in vvl-spec.pdf + */ +struct afs_vldbentry { + char name[65]; /* name of volume (with NUL char) */ + afs_voltype_t type; /* volume type */ + unsigned num_servers; /* num servers that hold instances of this vol */ + unsigned clone_id; /* cloning ID */ + + unsigned flags; +#define AFS_VLF_RWEXISTS 0x1000 /* R/W volume exists */ +#define AFS_VLF_ROEXISTS 0x2000 /* R/O volume exists */ +#define AFS_VLF_BACKEXISTS 0x4000 /* backup volume exists */ + + afs_volid_t volume_ids[3]; /* volume IDs */ + + struct { + struct in_addr addr; /* server address */ + unsigned partition; /* partition ID on this server */ + unsigned flags; /* server specific flags */ +#define AFS_VLSF_NEWREPSITE 0x0001 /* Ignore all 'non-new' servers */ +#define AFS_VLSF_ROVOL 0x0002 /* this server holds a R/O instance of the volume */ +#define AFS_VLSF_RWVOL 0x0004 /* this server holds a R/W instance of the volume */ +#define AFS_VLSF_BACKVOL 0x0008 /* this server holds a backup instance of the volume */ +#define AFS_VLSF_UUID 0x0010 /* This server is referred to by its UUID */ +#define AFS_VLSF_DONTUSE 0x0020 /* This server ref should be ignored */ + } servers[8]; +}; + +#define AFS_VLDB_MAXNAMELEN 65 + + +struct afs_ListAddrByAttributes__xdr { + __be32 Mask; +#define AFS_VLADDR_IPADDR 0x1 /* Match by ->ipaddr */ +#define AFS_VLADDR_INDEX 0x2 /* Match by ->index */ +#define AFS_VLADDR_UUID 0x4 /* Match by ->uuid */ + __be32 ipaddr; + __be32 index; + __be32 spare; + struct afs_uuid__xdr uuid; +}; + +struct afs_uvldbentry__xdr { + __be32 name[AFS_VLDB_MAXNAMELEN]; + __be32 nServers; + struct afs_uuid__xdr serverNumber[AFS_NMAXNSERVERS]; + __be32 serverUnique[AFS_NMAXNSERVERS]; + __be32 serverPartition[AFS_NMAXNSERVERS]; + __be32 serverFlags[AFS_NMAXNSERVERS]; + __be32 volumeId[AFS_MAXTYPES]; + __be32 cloneId; + __be32 flags; + __be32 spares1; + __be32 spares2; + __be32 spares3; + __be32 spares4; + __be32 spares5; + __be32 spares6; + __be32 spares7; + __be32 spares8; + __be32 spares9; +}; + +struct afs_address_list { + refcount_t usage; + unsigned int version; + unsigned int nr_addrs; + struct sockaddr_rxrpc addrs[]; +}; + +extern void afs_put_address_list(struct afs_address_list *alist); + +#endif /* AFS_VL_H */ diff --git a/fs/afs/cache.c b/fs/afs/cache.c new file mode 100644 index 000000000..037af93e3 --- /dev/null +++ b/fs/afs/cache.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS caching stuff + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/sched.h> +#include "internal.h" + +static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen, + loff_t object_size); + +struct fscache_netfs afs_cache_netfs = { + .name = "afs", + .version = 2, +}; + +struct fscache_cookie_def afs_cell_cache_index_def = { + .name = "AFS.cell", + .type = FSCACHE_COOKIE_TYPE_INDEX, +}; + +struct fscache_cookie_def afs_volume_cache_index_def = { + .name = "AFS.volume", + .type = FSCACHE_COOKIE_TYPE_INDEX, +}; + +struct fscache_cookie_def afs_vnode_cache_index_def = { + .name = "AFS.vnode", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .check_aux = afs_vnode_cache_check_aux, +}; + +/* + * check that the auxiliary data indicates that the entry is still valid + */ +static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen, + loff_t object_size) +{ + struct afs_vnode *vnode = cookie_netfs_data; + struct afs_vnode_cache_aux aux; + + _enter("{%llx,%x,%llx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, + buffer, buflen); + + memcpy(&aux, buffer, sizeof(aux)); + + /* check the size of the data is what we're expecting */ + if (buflen != sizeof(aux)) { + _leave(" = OBSOLETE [len %hx != %zx]", buflen, sizeof(aux)); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + if (vnode->status.data_version != aux.data_version) { + _leave(" = OBSOLETE [vers %llx != %llx]", + aux.data_version, vnode->status.data_version); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + _leave(" = SUCCESS"); + return FSCACHE_CHECKAUX_OKAY; +} diff --git a/fs/afs/callback.c b/fs/afs/callback.c new file mode 100644 index 000000000..7d9b23d98 --- /dev/null +++ b/fs/afs/callback.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved. + * + * This software may be freely redistributed under the terms of the + * GNU General Public License. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * David Howells <dhowells@redhat.com> + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/circ_buf.h> +#include <linux/sched.h> +#include "internal.h" + +/* + * Allow the fileserver to request callback state (re-)initialisation. + * Unfortunately, UUIDs are not guaranteed unique. + */ +void afs_init_callback_state(struct afs_server *server) +{ + rcu_read_lock(); + do { + server->cb_s_break++; + server = rcu_dereference(server->uuid_next); + } while (0); + rcu_read_unlock(); +} + +/* + * actually break a callback + */ +void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason) +{ + _enter(""); + + clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); + if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + vnode->cb_break++; + afs_clear_permits(vnode); + + if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) + afs_lock_may_be_available(vnode); + + trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true); + } else { + trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false); + } +} + +void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason) +{ + write_seqlock(&vnode->cb_lock); + __afs_break_callback(vnode, reason); + write_sequnlock(&vnode->cb_lock); +} + +/* + * Look up a volume by volume ID under RCU conditions. + */ +static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell, + afs_volid_t vid) +{ + struct afs_volume *volume = NULL; + struct rb_node *p; + int seq = 0; + + do { + /* Unfortunately, rbtree walking doesn't give reliable results + * under just the RCU read lock, so we have to check for + * changes. + */ + read_seqbegin_or_lock(&cell->volume_lock, &seq); + + p = rcu_dereference_raw(cell->volumes.rb_node); + while (p) { + volume = rb_entry(p, struct afs_volume, cell_node); + + if (volume->vid < vid) + p = rcu_dereference_raw(p->rb_left); + else if (volume->vid > vid) + p = rcu_dereference_raw(p->rb_right); + else + break; + volume = NULL; + } + + } while (need_seqretry(&cell->volume_lock, seq)); + + done_seqretry(&cell->volume_lock, seq); + return volume; +} + +/* + * allow the fileserver to explicitly break one callback + * - happens when + * - the backing file is changed + * - a lock is released + */ +static void afs_break_one_callback(struct afs_volume *volume, + struct afs_fid *fid) +{ + struct super_block *sb; + struct afs_vnode *vnode; + struct inode *inode; + + if (fid->vnode == 0 && fid->unique == 0) { + /* The callback break applies to an entire volume. */ + write_lock(&volume->cb_v_break_lock); + volume->cb_v_break++; + trace_afs_cb_break(fid, volume->cb_v_break, + afs_cb_break_for_volume_callback, false); + write_unlock(&volume->cb_v_break_lock); + return; + } + + /* See if we can find a matching inode - even an I_NEW inode needs to + * be marked as it can have its callback broken before we finish + * setting up the local inode. + */ + sb = rcu_dereference(volume->sb); + if (!sb) + return; + + inode = find_inode_rcu(sb, fid->vnode, afs_ilookup5_test_by_fid, fid); + if (inode) { + vnode = AFS_FS_I(inode); + afs_break_callback(vnode, afs_cb_break_for_callback); + } else { + trace_afs_cb_miss(fid, afs_cb_break_for_callback); + } +} + +static void afs_break_some_callbacks(struct afs_server *server, + struct afs_callback_break *cbb, + size_t *_count) +{ + struct afs_callback_break *residue = cbb; + struct afs_volume *volume; + afs_volid_t vid = cbb->fid.vid; + size_t i; + + volume = afs_lookup_volume_rcu(server->cell, vid); + + /* TODO: Find all matching volumes if we couldn't match the server and + * break them anyway. + */ + + for (i = *_count; i > 0; cbb++, i--) { + if (cbb->fid.vid == vid) { + _debug("- Fid { vl=%08llx n=%llu u=%u }", + cbb->fid.vid, + cbb->fid.vnode, + cbb->fid.unique); + --*_count; + if (volume) + afs_break_one_callback(volume, &cbb->fid); + } else { + *residue++ = *cbb; + } + } +} + +/* + * allow the fileserver to break callback promises + */ +void afs_break_callbacks(struct afs_server *server, size_t count, + struct afs_callback_break *callbacks) +{ + _enter("%p,%zu,", server, count); + + ASSERT(server != NULL); + + rcu_read_lock(); + + while (count > 0) + afs_break_some_callbacks(server, callbacks, &count); + + rcu_read_unlock(); + return; +} diff --git a/fs/afs/cell.c b/fs/afs/cell.c new file mode 100644 index 000000000..cfd3c4dab --- /dev/null +++ b/fs/afs/cell.c @@ -0,0 +1,967 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS cell and server record management + * + * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <linux/key.h> +#include <linux/ctype.h> +#include <linux/dns_resolver.h> +#include <linux/sched.h> +#include <linux/inet.h> +#include <linux/namei.h> +#include <keys/rxrpc-type.h> +#include "internal.h" + +static unsigned __read_mostly afs_cell_gc_delay = 10; +static unsigned __read_mostly afs_cell_min_ttl = 10 * 60; +static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60; +static atomic_t cell_debug_id; + +static void afs_queue_cell_manager(struct afs_net *); +static void afs_manage_cell_work(struct work_struct *); + +static void afs_dec_cells_outstanding(struct afs_net *net) +{ + if (atomic_dec_and_test(&net->cells_outstanding)) + wake_up_var(&net->cells_outstanding); +} + +/* + * Set the cell timer to fire after a given delay, assuming it's not already + * set for an earlier time. + */ +static void afs_set_cell_timer(struct afs_net *net, time64_t delay) +{ + if (net->live) { + atomic_inc(&net->cells_outstanding); + if (timer_reduce(&net->cells_timer, jiffies + delay * HZ)) + afs_dec_cells_outstanding(net); + } else { + afs_queue_cell_manager(net); + } +} + +/* + * Look up and get an activation reference on a cell record. The caller must + * hold net->cells_lock at least read-locked. + */ +static struct afs_cell *afs_find_cell_locked(struct afs_net *net, + const char *name, unsigned int namesz, + enum afs_cell_trace reason) +{ + struct afs_cell *cell = NULL; + struct rb_node *p; + int n; + + _enter("%*.*s", namesz, namesz, name); + + if (name && namesz == 0) + return ERR_PTR(-EINVAL); + if (namesz > AFS_MAXCELLNAME) + return ERR_PTR(-ENAMETOOLONG); + + if (!name) { + cell = net->ws_cell; + if (!cell) + return ERR_PTR(-EDESTADDRREQ); + goto found; + } + + p = net->cells.rb_node; + while (p) { + cell = rb_entry(p, struct afs_cell, net_node); + + n = strncasecmp(cell->name, name, + min_t(size_t, cell->name_len, namesz)); + if (n == 0) + n = cell->name_len - namesz; + if (n < 0) + p = p->rb_left; + else if (n > 0) + p = p->rb_right; + else + goto found; + } + + return ERR_PTR(-ENOENT); + +found: + return afs_use_cell(cell, reason); +} + +/* + * Look up and get an activation reference on a cell record. + */ +struct afs_cell *afs_find_cell(struct afs_net *net, + const char *name, unsigned int namesz, + enum afs_cell_trace reason) +{ + struct afs_cell *cell; + + down_read(&net->cells_lock); + cell = afs_find_cell_locked(net, name, namesz, reason); + up_read(&net->cells_lock); + return cell; +} + +/* + * Set up a cell record and fill in its name, VL server address list and + * allocate an anonymous key + */ +static struct afs_cell *afs_alloc_cell(struct afs_net *net, + const char *name, unsigned int namelen, + const char *addresses) +{ + struct afs_vlserver_list *vllist; + struct afs_cell *cell; + int i, ret; + + ASSERT(name); + if (namelen == 0) + return ERR_PTR(-EINVAL); + if (namelen > AFS_MAXCELLNAME) { + _leave(" = -ENAMETOOLONG"); + return ERR_PTR(-ENAMETOOLONG); + } + + /* Prohibit cell names that contain unprintable chars, '/' and '@' or + * that begin with a dot. This also precludes "@cell". + */ + if (name[0] == '.') + return ERR_PTR(-EINVAL); + for (i = 0; i < namelen; i++) { + char ch = name[i]; + if (!isprint(ch) || ch == '/' || ch == '@') + return ERR_PTR(-EINVAL); + } + + _enter("%*.*s,%s", namelen, namelen, name, addresses); + + cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL); + if (!cell) { + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } + + cell->name = kmalloc(namelen + 1, GFP_KERNEL); + if (!cell->name) { + kfree(cell); + return ERR_PTR(-ENOMEM); + } + + cell->net = net; + cell->name_len = namelen; + for (i = 0; i < namelen; i++) + cell->name[i] = tolower(name[i]); + cell->name[i] = 0; + + atomic_set(&cell->ref, 1); + atomic_set(&cell->active, 0); + INIT_WORK(&cell->manager, afs_manage_cell_work); + cell->volumes = RB_ROOT; + INIT_HLIST_HEAD(&cell->proc_volumes); + seqlock_init(&cell->volume_lock); + cell->fs_servers = RB_ROOT; + seqlock_init(&cell->fs_lock); + rwlock_init(&cell->vl_servers_lock); + cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS); + + /* Provide a VL server list, filling it in if we were given a list of + * addresses to use. + */ + if (addresses) { + vllist = afs_parse_text_addrs(net, + addresses, strlen(addresses), ':', + VL_SERVICE, AFS_VL_PORT); + if (IS_ERR(vllist)) { + ret = PTR_ERR(vllist); + goto parse_failed; + } + + vllist->source = DNS_RECORD_FROM_CONFIG; + vllist->status = DNS_LOOKUP_NOT_DONE; + cell->dns_expiry = TIME64_MAX; + } else { + ret = -ENOMEM; + vllist = afs_alloc_vlserver_list(0); + if (!vllist) + goto error; + vllist->source = DNS_RECORD_UNAVAILABLE; + vllist->status = DNS_LOOKUP_NOT_DONE; + cell->dns_expiry = ktime_get_real_seconds(); + } + + rcu_assign_pointer(cell->vl_servers, vllist); + + cell->dns_source = vllist->source; + cell->dns_status = vllist->status; + smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */ + atomic_inc(&net->cells_outstanding); + cell->debug_id = atomic_inc_return(&cell_debug_id); + trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc); + + _leave(" = %p", cell); + return cell; + +parse_failed: + if (ret == -EINVAL) + printk(KERN_ERR "kAFS: bad VL server IP address\n"); +error: + kfree(cell->name); + kfree(cell); + _leave(" = %d", ret); + return ERR_PTR(ret); +} + +/* + * afs_lookup_cell - Look up or create a cell record. + * @net: The network namespace + * @name: The name of the cell. + * @namesz: The strlen of the cell name. + * @vllist: A colon/comma separated list of numeric IP addresses or NULL. + * @excl: T if an error should be given if the cell name already exists. + * + * Look up a cell record by name and query the DNS for VL server addresses if + * needed. Note that that actual DNS query is punted off to the manager thread + * so that this function can return immediately if interrupted whilst allowing + * cell records to be shared even if not yet fully constructed. + */ +struct afs_cell *afs_lookup_cell(struct afs_net *net, + const char *name, unsigned int namesz, + const char *vllist, bool excl) +{ + struct afs_cell *cell, *candidate, *cursor; + struct rb_node *parent, **pp; + enum afs_cell_state state; + int ret, n; + + _enter("%s,%s", name, vllist); + + if (!excl) { + cell = afs_find_cell(net, name, namesz, afs_cell_trace_use_lookup); + if (!IS_ERR(cell)) + goto wait_for_cell; + } + + /* Assume we're probably going to create a cell and preallocate and + * mostly set up a candidate record. We can then use this to stash the + * name, the net namespace and VL server addresses. + * + * We also want to do this before we hold any locks as it may involve + * upcalling to userspace to make DNS queries. + */ + candidate = afs_alloc_cell(net, name, namesz, vllist); + if (IS_ERR(candidate)) { + _leave(" = %ld", PTR_ERR(candidate)); + return candidate; + } + + /* Find the insertion point and check to see if someone else added a + * cell whilst we were allocating. + */ + down_write(&net->cells_lock); + + pp = &net->cells.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + cursor = rb_entry(parent, struct afs_cell, net_node); + + n = strncasecmp(cursor->name, name, + min_t(size_t, cursor->name_len, namesz)); + if (n == 0) + n = cursor->name_len - namesz; + if (n < 0) + pp = &(*pp)->rb_left; + else if (n > 0) + pp = &(*pp)->rb_right; + else + goto cell_already_exists; + } + + cell = candidate; + candidate = NULL; + atomic_set(&cell->active, 2); + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), 2, afs_cell_trace_insert); + rb_link_node_rcu(&cell->net_node, parent, pp); + rb_insert_color(&cell->net_node, &net->cells); + up_write(&net->cells_lock); + + afs_queue_cell(cell, afs_cell_trace_get_queue_new); + +wait_for_cell: + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), atomic_read(&cell->active), + afs_cell_trace_wait); + _debug("wait_for_cell"); + wait_var_event(&cell->state, + ({ + state = smp_load_acquire(&cell->state); /* vs error */ + state == AFS_CELL_ACTIVE || state == AFS_CELL_REMOVED; + })); + + /* Check the state obtained from the wait check. */ + if (state == AFS_CELL_REMOVED) { + ret = cell->error; + goto error; + } + + _leave(" = %p [cell]", cell); + return cell; + +cell_already_exists: + _debug("cell exists"); + cell = cursor; + if (excl) { + ret = -EEXIST; + } else { + afs_use_cell(cursor, afs_cell_trace_use_lookup); + ret = 0; + } + up_write(&net->cells_lock); + if (candidate) + afs_put_cell(candidate, afs_cell_trace_put_candidate); + if (ret == 0) + goto wait_for_cell; + goto error_noput; +error: + afs_unuse_cell(net, cell, afs_cell_trace_unuse_lookup); +error_noput: + _leave(" = %d [error]", ret); + return ERR_PTR(ret); +} + +/* + * set the root cell information + * - can be called with a module parameter string + * - can be called from a write to /proc/fs/afs/rootcell + */ +int afs_cell_init(struct afs_net *net, const char *rootcell) +{ + struct afs_cell *old_root, *new_root; + const char *cp, *vllist; + size_t len; + + _enter(""); + + if (!rootcell) { + /* module is loaded with no parameters, or built statically. + * - in the future we might initialize cell DB here. + */ + _leave(" = 0 [no root]"); + return 0; + } + + cp = strchr(rootcell, ':'); + if (!cp) { + _debug("kAFS: no VL server IP addresses specified"); + vllist = NULL; + len = strlen(rootcell); + } else { + vllist = cp + 1; + len = cp - rootcell; + } + + /* allocate a cell record for the root cell */ + new_root = afs_lookup_cell(net, rootcell, len, vllist, false); + if (IS_ERR(new_root)) { + _leave(" = %ld", PTR_ERR(new_root)); + return PTR_ERR(new_root); + } + + if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags)) + afs_use_cell(new_root, afs_cell_trace_use_pin); + + /* install the new cell */ + down_write(&net->cells_lock); + afs_see_cell(new_root, afs_cell_trace_see_ws); + old_root = net->ws_cell; + net->ws_cell = new_root; + up_write(&net->cells_lock); + + afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws); + _leave(" = 0"); + return 0; +} + +/* + * Update a cell's VL server address list from the DNS. + */ +static int afs_update_cell(struct afs_cell *cell) +{ + struct afs_vlserver_list *vllist, *old = NULL, *p; + unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl); + unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl); + time64_t now, expiry = 0; + int ret = 0; + + _enter("%s", cell->name); + + vllist = afs_dns_query(cell, &expiry); + if (IS_ERR(vllist)) { + ret = PTR_ERR(vllist); + + _debug("%s: fail %d", cell->name, ret); + if (ret == -ENOMEM) + goto out_wake; + + vllist = afs_alloc_vlserver_list(0); + if (!vllist) { + if (ret >= 0) + ret = -ENOMEM; + goto out_wake; + } + + switch (ret) { + case -ENODATA: + case -EDESTADDRREQ: + vllist->status = DNS_LOOKUP_GOT_NOT_FOUND; + break; + case -EAGAIN: + case -ECONNREFUSED: + vllist->status = DNS_LOOKUP_GOT_TEMP_FAILURE; + break; + default: + vllist->status = DNS_LOOKUP_GOT_LOCAL_FAILURE; + break; + } + } + + _debug("%s: got list %d %d", cell->name, vllist->source, vllist->status); + cell->dns_status = vllist->status; + + now = ktime_get_real_seconds(); + if (min_ttl > max_ttl) + max_ttl = min_ttl; + if (expiry < now + min_ttl) + expiry = now + min_ttl; + else if (expiry > now + max_ttl) + expiry = now + max_ttl; + + _debug("%s: status %d", cell->name, vllist->status); + if (vllist->source == DNS_RECORD_UNAVAILABLE) { + switch (vllist->status) { + case DNS_LOOKUP_GOT_NOT_FOUND: + /* The DNS said that the cell does not exist or there + * weren't any addresses to be had. + */ + cell->dns_expiry = expiry; + break; + + case DNS_LOOKUP_BAD: + case DNS_LOOKUP_GOT_LOCAL_FAILURE: + case DNS_LOOKUP_GOT_TEMP_FAILURE: + case DNS_LOOKUP_GOT_NS_FAILURE: + default: + cell->dns_expiry = now + 10; + break; + } + } else { + cell->dns_expiry = expiry; + } + + /* Replace the VL server list if the new record has servers or the old + * record doesn't. + */ + write_lock(&cell->vl_servers_lock); + p = rcu_dereference_protected(cell->vl_servers, true); + if (vllist->nr_servers > 0 || p->nr_servers == 0) { + rcu_assign_pointer(cell->vl_servers, vllist); + cell->dns_source = vllist->source; + old = p; + } + write_unlock(&cell->vl_servers_lock); + afs_put_vlserverlist(cell->net, old); + +out_wake: + smp_store_release(&cell->dns_lookup_count, + cell->dns_lookup_count + 1); /* vs source/status */ + wake_up_var(&cell->dns_lookup_count); + _leave(" = %d", ret); + return ret; +} + +/* + * Destroy a cell record + */ +static void afs_cell_destroy(struct rcu_head *rcu) +{ + struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu); + struct afs_net *net = cell->net; + int u; + + _enter("%p{%s}", cell, cell->name); + + u = atomic_read(&cell->ref); + ASSERTCMP(u, ==, 0); + trace_afs_cell(cell->debug_id, u, atomic_read(&cell->active), afs_cell_trace_free); + + afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers)); + afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias); + key_put(cell->anonymous_key); + kfree(cell->name); + kfree(cell); + + afs_dec_cells_outstanding(net); + _leave(" [destroyed]"); +} + +/* + * Queue the cell manager. + */ +static void afs_queue_cell_manager(struct afs_net *net) +{ + int outstanding = atomic_inc_return(&net->cells_outstanding); + + _enter("%d", outstanding); + + if (!queue_work(afs_wq, &net->cells_manager)) + afs_dec_cells_outstanding(net); +} + +/* + * Cell management timer. We have an increment on cells_outstanding that we + * need to pass along to the work item. + */ +void afs_cells_timer(struct timer_list *timer) +{ + struct afs_net *net = container_of(timer, struct afs_net, cells_timer); + + _enter(""); + if (!queue_work(afs_wq, &net->cells_manager)) + afs_dec_cells_outstanding(net); +} + +/* + * Get a reference on a cell record. + */ +struct afs_cell *afs_get_cell(struct afs_cell *cell, enum afs_cell_trace reason) +{ + int u; + + if (atomic_read(&cell->ref) <= 0) + BUG(); + + u = atomic_inc_return(&cell->ref); + trace_afs_cell(cell->debug_id, u, atomic_read(&cell->active), reason); + return cell; +} + +/* + * Drop a reference on a cell record. + */ +void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason) +{ + if (cell) { + unsigned int debug_id = cell->debug_id; + unsigned int u, a; + + a = atomic_read(&cell->active); + u = atomic_dec_return(&cell->ref); + trace_afs_cell(debug_id, u, a, reason); + if (u == 0) { + a = atomic_read(&cell->active); + WARN(a != 0, "Cell active count %u > 0\n", a); + call_rcu(&cell->rcu, afs_cell_destroy); + } + } +} + +/* + * Note a cell becoming more active. + */ +struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason) +{ + int u, a; + + if (atomic_read(&cell->ref) <= 0) + BUG(); + + u = atomic_read(&cell->ref); + a = atomic_inc_return(&cell->active); + trace_afs_cell(cell->debug_id, u, a, reason); + return cell; +} + +/* + * Record a cell becoming less active. When the active counter reaches 1, it + * is scheduled for destruction, but may get reactivated. + */ +void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason) +{ + unsigned int debug_id; + time64_t now, expire_delay; + int u, a; + + if (!cell) + return; + + _enter("%s", cell->name); + + now = ktime_get_real_seconds(); + cell->last_inactive = now; + expire_delay = 0; + if (cell->vl_servers->nr_servers) + expire_delay = afs_cell_gc_delay; + + debug_id = cell->debug_id; + u = atomic_read(&cell->ref); + a = atomic_dec_return(&cell->active); + trace_afs_cell(debug_id, u, a, reason); + WARN_ON(a == 0); + if (a == 1) + /* 'cell' may now be garbage collected. */ + afs_set_cell_timer(net, expire_delay); +} + +/* + * Note that a cell has been seen. + */ +void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason) +{ + int u, a; + + u = atomic_read(&cell->ref); + a = atomic_read(&cell->active); + trace_afs_cell(cell->debug_id, u, a, reason); +} + +/* + * Queue a cell for management, giving the workqueue a ref to hold. + */ +void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason) +{ + afs_get_cell(cell, reason); + if (!queue_work(afs_wq, &cell->manager)) + afs_put_cell(cell, afs_cell_trace_put_queue_fail); +} + +/* + * Allocate a key to use as a placeholder for anonymous user security. + */ +static int afs_alloc_anon_key(struct afs_cell *cell) +{ + struct key *key; + char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp; + + /* Create a key to represent an anonymous user. */ + memcpy(keyname, "afs@", 4); + dp = keyname + 4; + cp = cell->name; + do { + *dp++ = tolower(*cp); + } while (*cp++); + + key = rxrpc_get_null_key(keyname); + if (IS_ERR(key)) + return PTR_ERR(key); + + cell->anonymous_key = key; + + _debug("anon key %p{%x}", + cell->anonymous_key, key_serial(cell->anonymous_key)); + return 0; +} + +/* + * Activate a cell. + */ +static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) +{ + struct hlist_node **p; + struct afs_cell *pcell; + int ret; + + if (!cell->anonymous_key) { + ret = afs_alloc_anon_key(cell); + if (ret < 0) + return ret; + } + +#ifdef CONFIG_AFS_FSCACHE + cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, + &afs_cell_cache_index_def, + cell->name, strlen(cell->name), + NULL, 0, + cell, 0, true); +#endif + ret = afs_proc_cell_setup(cell); + if (ret < 0) + return ret; + + mutex_lock(&net->proc_cells_lock); + for (p = &net->proc_cells.first; *p; p = &(*p)->next) { + pcell = hlist_entry(*p, struct afs_cell, proc_link); + if (strcmp(cell->name, pcell->name) < 0) + break; + } + + cell->proc_link.pprev = p; + cell->proc_link.next = *p; + rcu_assign_pointer(*p, &cell->proc_link.next); + if (cell->proc_link.next) + cell->proc_link.next->pprev = &cell->proc_link.next; + + afs_dynroot_mkdir(net, cell); + mutex_unlock(&net->proc_cells_lock); + return 0; +} + +/* + * Deactivate a cell. + */ +static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) +{ + _enter("%s", cell->name); + + afs_proc_cell_remove(cell); + + mutex_lock(&net->proc_cells_lock); + hlist_del_rcu(&cell->proc_link); + afs_dynroot_rmdir(net, cell); + mutex_unlock(&net->proc_cells_lock); + +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(cell->cache, NULL, false); + cell->cache = NULL; +#endif + + _leave(""); +} + +/* + * Manage a cell record, initialising and destroying it, maintaining its DNS + * records. + */ +static void afs_manage_cell(struct afs_cell *cell) +{ + struct afs_net *net = cell->net; + int ret, active; + + _enter("%s", cell->name); + +again: + _debug("state %u", cell->state); + switch (cell->state) { + case AFS_CELL_INACTIVE: + case AFS_CELL_FAILED: + down_write(&net->cells_lock); + active = 1; + if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) { + rb_erase(&cell->net_node, &net->cells); + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), 0, + afs_cell_trace_unuse_delete); + smp_store_release(&cell->state, AFS_CELL_REMOVED); + } + up_write(&net->cells_lock); + if (cell->state == AFS_CELL_REMOVED) { + wake_up_var(&cell->state); + goto final_destruction; + } + if (cell->state == AFS_CELL_FAILED) + goto done; + smp_store_release(&cell->state, AFS_CELL_UNSET); + wake_up_var(&cell->state); + goto again; + + case AFS_CELL_UNSET: + smp_store_release(&cell->state, AFS_CELL_ACTIVATING); + wake_up_var(&cell->state); + goto again; + + case AFS_CELL_ACTIVATING: + ret = afs_activate_cell(net, cell); + if (ret < 0) + goto activation_failed; + + smp_store_release(&cell->state, AFS_CELL_ACTIVE); + wake_up_var(&cell->state); + goto again; + + case AFS_CELL_ACTIVE: + if (atomic_read(&cell->active) > 1) { + if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) { + ret = afs_update_cell(cell); + if (ret < 0) + cell->error = ret; + } + goto done; + } + smp_store_release(&cell->state, AFS_CELL_DEACTIVATING); + wake_up_var(&cell->state); + goto again; + + case AFS_CELL_DEACTIVATING: + if (atomic_read(&cell->active) > 1) + goto reverse_deactivation; + afs_deactivate_cell(net, cell); + smp_store_release(&cell->state, AFS_CELL_INACTIVE); + wake_up_var(&cell->state); + goto again; + + case AFS_CELL_REMOVED: + goto done; + + default: + break; + } + _debug("bad state %u", cell->state); + BUG(); /* Unhandled state */ + +activation_failed: + cell->error = ret; + afs_deactivate_cell(net, cell); + + smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */ + wake_up_var(&cell->state); + goto again; + +reverse_deactivation: + smp_store_release(&cell->state, AFS_CELL_ACTIVE); + wake_up_var(&cell->state); + _leave(" [deact->act]"); + return; + +done: + _leave(" [done %u]", cell->state); + return; + +final_destruction: + /* The root volume is pinning the cell */ + afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root); + cell->root_volume = NULL; + afs_put_cell(cell, afs_cell_trace_put_destroy); +} + +static void afs_manage_cell_work(struct work_struct *work) +{ + struct afs_cell *cell = container_of(work, struct afs_cell, manager); + + afs_manage_cell(cell); + afs_put_cell(cell, afs_cell_trace_put_queue_work); +} + +/* + * Manage the records of cells known to a network namespace. This includes + * updating the DNS records and garbage collecting unused cells that were + * automatically added. + * + * Note that constructed cell records may only be removed from net->cells by + * this work item, so it is safe for this work item to stash a cursor pointing + * into the tree and then return to caller (provided it skips cells that are + * still under construction). + * + * Note also that we were given an increment on net->cells_outstanding by + * whoever queued us that we need to deal with before returning. + */ +void afs_manage_cells(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, cells_manager); + struct rb_node *cursor; + time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; + bool purging = !net->live; + + _enter(""); + + /* Trawl the cell database looking for cells that have expired from + * lack of use and cells whose DNS results have expired and dispatch + * their managers. + */ + down_read(&net->cells_lock); + + for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) { + struct afs_cell *cell = + rb_entry(cursor, struct afs_cell, net_node); + unsigned active; + bool sched_cell = false; + + active = atomic_read(&cell->active); + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), + active, afs_cell_trace_manage); + + ASSERTCMP(active, >=, 1); + + if (purging) { + if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) { + active = atomic_dec_return(&cell->active); + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), + active, afs_cell_trace_unuse_pin); + } + } + + if (active == 1) { + struct afs_vlserver_list *vllist; + time64_t expire_at = cell->last_inactive; + + read_lock(&cell->vl_servers_lock); + vllist = rcu_dereference_protected( + cell->vl_servers, + lockdep_is_held(&cell->vl_servers_lock)); + if (vllist->nr_servers > 0) + expire_at += afs_cell_gc_delay; + read_unlock(&cell->vl_servers_lock); + if (purging || expire_at <= now) + sched_cell = true; + else if (expire_at < next_manage) + next_manage = expire_at; + } + + if (!purging) { + if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) + sched_cell = true; + } + + if (sched_cell) + afs_queue_cell(cell, afs_cell_trace_get_queue_manage); + } + + up_read(&net->cells_lock); + + /* Update the timer on the way out. We have to pass an increment on + * cells_outstanding in the namespace that we are in to the timer or + * the work scheduler. + */ + if (!purging && next_manage < TIME64_MAX) { + now = ktime_get_real_seconds(); + + if (next_manage - now <= 0) { + if (queue_work(afs_wq, &net->cells_manager)) + atomic_inc(&net->cells_outstanding); + } else { + afs_set_cell_timer(net, next_manage - now); + } + } + + afs_dec_cells_outstanding(net); + _leave(" [%d]", atomic_read(&net->cells_outstanding)); +} + +/* + * Purge in-memory cell database. + */ +void afs_cell_purge(struct afs_net *net) +{ + struct afs_cell *ws; + + _enter(""); + + down_write(&net->cells_lock); + ws = net->ws_cell; + net->ws_cell = NULL; + up_write(&net->cells_lock); + afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws); + + _debug("del timer"); + if (del_timer_sync(&net->cells_timer)) + atomic_dec(&net->cells_outstanding); + + _debug("kick mgr"); + afs_queue_cell_manager(net); + + _debug("wait"); + wait_var_event(&net->cells_outstanding, + !atomic_read(&net->cells_outstanding)); + _leave(""); +} diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c new file mode 100644 index 000000000..2a528b704 --- /dev/null +++ b/fs/afs/cmservice.c @@ -0,0 +1,667 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS Cache Manager Service + * + * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/ip.h> +#include "internal.h" +#include "afs_cm.h" +#include "protocol_yfs.h" + +static int afs_deliver_cb_init_call_back_state(struct afs_call *); +static int afs_deliver_cb_init_call_back_state3(struct afs_call *); +static int afs_deliver_cb_probe(struct afs_call *); +static int afs_deliver_cb_callback(struct afs_call *); +static int afs_deliver_cb_probe_uuid(struct afs_call *); +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *); +static void afs_cm_destructor(struct afs_call *); +static void SRXAFSCB_CallBack(struct work_struct *); +static void SRXAFSCB_InitCallBackState(struct work_struct *); +static void SRXAFSCB_Probe(struct work_struct *); +static void SRXAFSCB_ProbeUuid(struct work_struct *); +static void SRXAFSCB_TellMeAboutYourself(struct work_struct *); + +static int afs_deliver_yfs_cb_callback(struct afs_call *); + +/* + * CB.CallBack operation type + */ +static const struct afs_call_type afs_SRXCBCallBack = { + .name = "CB.CallBack", + .deliver = afs_deliver_cb_callback, + .destructor = afs_cm_destructor, + .work = SRXAFSCB_CallBack, +}; + +/* + * CB.InitCallBackState operation type + */ +static const struct afs_call_type afs_SRXCBInitCallBackState = { + .name = "CB.InitCallBackState", + .deliver = afs_deliver_cb_init_call_back_state, + .destructor = afs_cm_destructor, + .work = SRXAFSCB_InitCallBackState, +}; + +/* + * CB.InitCallBackState3 operation type + */ +static const struct afs_call_type afs_SRXCBInitCallBackState3 = { + .name = "CB.InitCallBackState3", + .deliver = afs_deliver_cb_init_call_back_state3, + .destructor = afs_cm_destructor, + .work = SRXAFSCB_InitCallBackState, +}; + +/* + * CB.Probe operation type + */ +static const struct afs_call_type afs_SRXCBProbe = { + .name = "CB.Probe", + .deliver = afs_deliver_cb_probe, + .destructor = afs_cm_destructor, + .work = SRXAFSCB_Probe, +}; + +/* + * CB.ProbeUuid operation type + */ +static const struct afs_call_type afs_SRXCBProbeUuid = { + .name = "CB.ProbeUuid", + .deliver = afs_deliver_cb_probe_uuid, + .destructor = afs_cm_destructor, + .work = SRXAFSCB_ProbeUuid, +}; + +/* + * CB.TellMeAboutYourself operation type + */ +static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { + .name = "CB.TellMeAboutYourself", + .deliver = afs_deliver_cb_tell_me_about_yourself, + .destructor = afs_cm_destructor, + .work = SRXAFSCB_TellMeAboutYourself, +}; + +/* + * YFS CB.CallBack operation type + */ +static const struct afs_call_type afs_SRXYFSCB_CallBack = { + .name = "YFSCB.CallBack", + .deliver = afs_deliver_yfs_cb_callback, + .destructor = afs_cm_destructor, + .work = SRXAFSCB_CallBack, +}; + +/* + * route an incoming cache manager call + * - return T if supported, F if not + */ +bool afs_cm_incoming_call(struct afs_call *call) +{ + _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID); + + switch (call->operation_ID) { + case CBCallBack: + call->type = &afs_SRXCBCallBack; + return true; + case CBInitCallBackState: + call->type = &afs_SRXCBInitCallBackState; + return true; + case CBInitCallBackState3: + call->type = &afs_SRXCBInitCallBackState3; + return true; + case CBProbe: + call->type = &afs_SRXCBProbe; + return true; + case CBProbeUuid: + call->type = &afs_SRXCBProbeUuid; + return true; + case CBTellMeAboutYourself: + call->type = &afs_SRXCBTellMeAboutYourself; + return true; + case YFSCBCallBack: + if (call->service_id != YFS_CM_SERVICE) + return false; + call->type = &afs_SRXYFSCB_CallBack; + return true; + default: + return false; + } +} + +/* + * Find the server record by peer address and record a probe to the cache + * manager from a server. + */ +static int afs_find_cm_server_by_peer(struct afs_call *call) +{ + struct sockaddr_rxrpc srx; + struct afs_server *server; + + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); + + server = afs_find_server(call->net, &srx); + if (!server) { + trace_afs_cm_no_server(call, &srx); + return 0; + } + + call->server = server; + return 0; +} + +/* + * Find the server record by server UUID and record a probe to the cache + * manager from a server. + */ +static int afs_find_cm_server_by_uuid(struct afs_call *call, + struct afs_uuid *uuid) +{ + struct afs_server *server; + + rcu_read_lock(); + server = afs_find_server_by_uuid(call->net, call->request); + rcu_read_unlock(); + if (!server) { + trace_afs_cm_no_server_u(call, call->request); + return 0; + } + + call->server = server; + return 0; +} + +/* + * Clean up a cache manager call. + */ +static void afs_cm_destructor(struct afs_call *call) +{ + kfree(call->buffer); + call->buffer = NULL; +} + +/* + * Abort a service call from within an action function. + */ +static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error, + const char *why) +{ + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + abort_code, error, why); + afs_set_call_complete(call, error, 0); +} + +/* + * The server supplied a list of callbacks that it wanted to break. + */ +static void SRXAFSCB_CallBack(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + + _enter(""); + + /* We need to break the callbacks before sending the reply as the + * server holds up change visibility till it receives our reply so as + * to maintain cache coherency. + */ + if (call->server) { + trace_afs_server(call->server, + atomic_read(&call->server->ref), + atomic_read(&call->server->active), + afs_server_trace_callback); + afs_break_callbacks(call->server, call->count, call->request); + } + + afs_send_empty_reply(call); + afs_put_call(call); + _leave(""); +} + +/* + * deliver request data to a CB.CallBack call + */ +static int afs_deliver_cb_callback(struct afs_call *call) +{ + struct afs_callback_break *cb; + __be32 *bp; + int ret, loop; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + /* extract the FID array and its count in two steps */ + fallthrough; + case 1: + _debug("extract FID count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("FID count: %u", call->count); + if (call->count > AFSCBMAX) + return afs_protocol_error(call, afs_eproto_cb_fid_count); + + call->buffer = kmalloc(array3_size(call->count, 3, 4), + GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + afs_extract_to_buf(call, call->count * 3 * 4); + call->unmarshall++; + + fallthrough; + case 2: + _debug("extract FID array"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + _debug("unmarshall FID array"); + call->request = kcalloc(call->count, + sizeof(struct afs_callback_break), + GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + cb = call->request; + bp = call->buffer; + for (loop = call->count; loop > 0; loop--, cb++) { + cb->fid.vid = ntohl(*bp++); + cb->fid.vnode = ntohl(*bp++); + cb->fid.unique = ntohl(*bp++); + } + + afs_extract_to_tmp(call); + call->unmarshall++; + + /* extract the callback array and its count in two steps */ + fallthrough; + case 3: + _debug("extract CB count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count2 = ntohl(call->tmp); + _debug("CB count: %u", call->count2); + if (call->count2 != call->count && call->count2 != 0) + return afs_protocol_error(call, afs_eproto_cb_count); + call->iter = &call->def_iter; + iov_iter_discard(&call->def_iter, READ, call->count2 * 3 * 4); + call->unmarshall++; + + fallthrough; + case 4: + _debug("extract discard %zu/%u", + iov_iter_count(call->iter), call->count2 * 3 * 4); + + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + call->unmarshall++; + case 5: + break; + } + + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return afs_io_error(call, afs_io_error_cm_reply); + + /* we'll need the file server record as that tells us which set of + * vnodes to operate upon */ + return afs_find_cm_server_by_peer(call); +} + +/* + * allow the fileserver to request callback state (re-)initialisation + */ +static void SRXAFSCB_InitCallBackState(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + + _enter("{%p}", call->server); + + if (call->server) + afs_init_callback_state(call->server); + afs_send_empty_reply(call); + afs_put_call(call); + _leave(""); +} + +/* + * deliver request data to a CB.InitCallBackState call + */ +static int afs_deliver_cb_init_call_back_state(struct afs_call *call) +{ + int ret; + + _enter(""); + + afs_extract_discard(call, 0); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + /* we'll need the file server record as that tells us which set of + * vnodes to operate upon */ + return afs_find_cm_server_by_peer(call); +} + +/* + * deliver request data to a CB.InitCallBackState3 call + */ +static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) +{ + struct afs_uuid *r; + unsigned loop; + __be32 *b; + int ret; + + _enter(""); + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + afs_extract_to_buf(call, 11 * sizeof(__be32)); + call->unmarshall++; + + fallthrough; + case 1: + _debug("extract UUID"); + ret = afs_extract_data(call, false); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + _debug("unmarshall UUID"); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + b = call->buffer; + r = call->request; + r->time_low = b[0]; + r->time_mid = htons(ntohl(b[1])); + r->time_hi_and_version = htons(ntohl(b[2])); + r->clock_seq_hi_and_reserved = ntohl(b[3]); + r->clock_seq_low = ntohl(b[4]); + + for (loop = 0; loop < 6; loop++) + r->node[loop] = ntohl(b[loop + 5]); + + call->unmarshall++; + + case 2: + break; + } + + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return afs_io_error(call, afs_io_error_cm_reply); + + /* we'll need the file server record as that tells us which set of + * vnodes to operate upon */ + return afs_find_cm_server_by_uuid(call, call->request); +} + +/* + * allow the fileserver to see if the cache manager is still alive + */ +static void SRXAFSCB_Probe(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + + _enter(""); + afs_send_empty_reply(call); + afs_put_call(call); + _leave(""); +} + +/* + * deliver request data to a CB.Probe call + */ +static int afs_deliver_cb_probe(struct afs_call *call) +{ + int ret; + + _enter(""); + + afs_extract_discard(call, 0); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return afs_io_error(call, afs_io_error_cm_reply); + return afs_find_cm_server_by_peer(call); +} + +/* + * Allow the fileserver to quickly find out if the cache manager has been + * rebooted. + */ +static void SRXAFSCB_ProbeUuid(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + struct afs_uuid *r = call->request; + + _enter(""); + + if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) + afs_send_empty_reply(call); + else + afs_abort_service_call(call, 1, 1, "K-1"); + + afs_put_call(call); + _leave(""); +} + +/* + * deliver request data to a CB.ProbeUuid call + */ +static int afs_deliver_cb_probe_uuid(struct afs_call *call) +{ + struct afs_uuid *r; + unsigned loop; + __be32 *b; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + afs_extract_to_buf(call, 11 * sizeof(__be32)); + call->unmarshall++; + + fallthrough; + case 1: + _debug("extract UUID"); + ret = afs_extract_data(call, false); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + _debug("unmarshall UUID"); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + b = call->buffer; + r = call->request; + r->time_low = b[0]; + r->time_mid = htons(ntohl(b[1])); + r->time_hi_and_version = htons(ntohl(b[2])); + r->clock_seq_hi_and_reserved = ntohl(b[3]); + r->clock_seq_low = ntohl(b[4]); + + for (loop = 0; loop < 6; loop++) + r->node[loop] = ntohl(b[loop + 5]); + + call->unmarshall++; + + case 2: + break; + } + + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return afs_io_error(call, afs_io_error_cm_reply); + return afs_find_cm_server_by_peer(call); +} + +/* + * allow the fileserver to ask about the cache manager's capabilities + */ +static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + int loop; + + struct { + struct /* InterfaceAddr */ { + __be32 nifs; + __be32 uuid[11]; + __be32 ifaddr[32]; + __be32 netmask[32]; + __be32 mtu[32]; + } ia; + struct /* Capabilities */ { + __be32 capcount; + __be32 caps[1]; + } cap; + } reply; + + _enter(""); + + memset(&reply, 0, sizeof(reply)); + + reply.ia.uuid[0] = call->net->uuid.time_low; + reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid)); + reply.ia.uuid[2] = htonl(ntohs(call->net->uuid.time_hi_and_version)); + reply.ia.uuid[3] = htonl((s8) call->net->uuid.clock_seq_hi_and_reserved); + reply.ia.uuid[4] = htonl((s8) call->net->uuid.clock_seq_low); + for (loop = 0; loop < 6; loop++) + reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]); + + reply.cap.capcount = htonl(1); + reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION); + afs_send_simple_reply(call, &reply, sizeof(reply)); + afs_put_call(call); + _leave(""); +} + +/* + * deliver request data to a CB.TellMeAboutYourself call + */ +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) +{ + int ret; + + _enter(""); + + afs_extract_discard(call, 0); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return afs_io_error(call, afs_io_error_cm_reply); + return afs_find_cm_server_by_peer(call); +} + +/* + * deliver request data to a YFS CB.CallBack call + */ +static int afs_deliver_yfs_cb_callback(struct afs_call *call) +{ + struct afs_callback_break *cb; + struct yfs_xdr_YFSFid *bp; + size_t size; + int ret, loop; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + /* extract the FID array and its count in two steps */ + fallthrough; + case 1: + _debug("extract FID count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("FID count: %u", call->count); + if (call->count > YFSCBMAX) + return afs_protocol_error(call, afs_eproto_cb_fid_count); + + size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid)); + call->buffer = kmalloc(size, GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + afs_extract_to_buf(call, size); + call->unmarshall++; + + fallthrough; + case 2: + _debug("extract FID array"); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + _debug("unmarshall FID array"); + call->request = kcalloc(call->count, + sizeof(struct afs_callback_break), + GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + cb = call->request; + bp = call->buffer; + for (loop = call->count; loop > 0; loop--, cb++) { + cb->fid.vid = xdr_to_u64(bp->volume); + cb->fid.vnode = xdr_to_u64(bp->vnode.lo); + cb->fid.vnode_hi = ntohl(bp->vnode.hi); + cb->fid.unique = ntohl(bp->vnode.unique); + bp++; + } + + afs_extract_to_tmp(call); + call->unmarshall++; + + case 3: + break; + } + + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return afs_io_error(call, afs_io_error_cm_reply); + + /* We'll need the file server record as that tells us which set of + * vnodes to operate upon. + */ + return afs_find_cm_server_by_peer(call); +} diff --git a/fs/afs/dir.c b/fs/afs/dir.c new file mode 100644 index 000000000..a59d6293a --- /dev/null +++ b/fs/afs/dir.c @@ -0,0 +1,1989 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* dir.c: AFS filesystem directory handling + * + * Copyright (C) 2002, 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/ctype.h> +#include <linux/sched.h> +#include <linux/task_io_accounting_ops.h> +#include "internal.h" +#include "afs_fs.h" +#include "xdr_fs.h" + +static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); +static int afs_dir_open(struct inode *inode, struct file *file); +static int afs_readdir(struct file *file, struct dir_context *ctx); +static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); +static int afs_d_delete(const struct dentry *dentry); +static void afs_d_iput(struct dentry *dentry, struct inode *inode); +static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, + loff_t fpos, u64 ino, unsigned dtype); +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, + loff_t fpos, u64 ino, unsigned dtype); +static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl); +static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +static int afs_rmdir(struct inode *dir, struct dentry *dentry); +static int afs_unlink(struct inode *dir, struct dentry *dentry); +static int afs_link(struct dentry *from, struct inode *dir, + struct dentry *dentry); +static int afs_symlink(struct inode *dir, struct dentry *dentry, + const char *content); +static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags); +static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags); +static void afs_dir_invalidatepage(struct page *page, unsigned int offset, + unsigned int length); + +static int afs_dir_set_page_dirty(struct page *page) +{ + BUG(); /* This should never happen. */ +} + +const struct file_operations afs_dir_file_operations = { + .open = afs_dir_open, + .release = afs_release, + .iterate_shared = afs_readdir, + .lock = afs_lock, + .llseek = generic_file_llseek, +}; + +const struct inode_operations afs_dir_inode_operations = { + .create = afs_create, + .lookup = afs_lookup, + .link = afs_link, + .unlink = afs_unlink, + .symlink = afs_symlink, + .mkdir = afs_mkdir, + .rmdir = afs_rmdir, + .rename = afs_rename, + .permission = afs_permission, + .getattr = afs_getattr, + .setattr = afs_setattr, +}; + +const struct address_space_operations afs_dir_aops = { + .set_page_dirty = afs_dir_set_page_dirty, + .releasepage = afs_dir_releasepage, + .invalidatepage = afs_dir_invalidatepage, +}; + +const struct dentry_operations afs_fs_dentry_operations = { + .d_revalidate = afs_d_revalidate, + .d_delete = afs_d_delete, + .d_release = afs_d_release, + .d_automount = afs_d_automount, + .d_iput = afs_d_iput, +}; + +struct afs_lookup_one_cookie { + struct dir_context ctx; + struct qstr name; + bool found; + struct afs_fid fid; +}; + +struct afs_lookup_cookie { + struct dir_context ctx; + struct qstr name; + bool found; + bool one_only; + unsigned short nr_fids; + struct afs_fid fids[50]; +}; + +/* + * check that a directory page is valid + */ +static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page, + loff_t i_size) +{ + struct afs_xdr_dir_page *dbuf; + loff_t latter, off; + int tmp, qty; + + /* Determine how many magic numbers there should be in this page, but + * we must take care because the directory may change size under us. + */ + off = page_offset(page); + if (i_size <= off) + goto checked; + + latter = i_size - off; + if (latter >= PAGE_SIZE) + qty = PAGE_SIZE; + else + qty = latter; + qty /= sizeof(union afs_xdr_dir_block); + + /* check them */ + dbuf = kmap(page); + for (tmp = 0; tmp < qty; tmp++) { + if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) { + printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n", + __func__, dvnode->vfs_inode.i_ino, tmp, qty, + ntohs(dbuf->blocks[tmp].hdr.magic)); + trace_afs_dir_check_failed(dvnode, off, i_size); + kunmap(page); + trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic); + goto error; + } + + /* Make sure each block is NUL terminated so we can reasonably + * use string functions on it. The filenames in the page + * *should* be NUL-terminated anyway. + */ + ((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0; + } + + kunmap(page); + +checked: + afs_stat_v(dvnode, n_read_dir); + return true; + +error: + return false; +} + +/* + * Check the contents of a directory that we've just read. + */ +static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req) +{ + struct afs_xdr_dir_page *dbuf; + unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block); + + for (i = 0; i < req->nr_pages; i++) + if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len)) + goto bad; + return true; + +bad: + pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n", + dvnode->fid.vid, dvnode->fid.vnode, + req->file_size, req->len, req->actual_len, req->remain); + pr_warn("DIR %llx %x %x %x\n", + req->pos, req->index, req->nr_pages, req->offset); + + for (i = 0; i < req->nr_pages; i++) { + dbuf = kmap(req->pages[i]); + for (j = 0; j < qty; j++) { + union afs_xdr_dir_block *block = &dbuf->blocks[j]; + + pr_warn("[%02x] %32phN\n", i * qty + j, block); + } + kunmap(req->pages[i]); + } + return false; +} + +/* + * open an AFS directory file + */ +static int afs_dir_open(struct inode *inode, struct file *file) +{ + _enter("{%lu}", inode->i_ino); + + BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); + + if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags)) + return -ENOENT; + + return afs_open(inode, file); +} + +/* + * Read the directory into the pagecache in one go, scrubbing the previous + * contents. The list of pages is returned, pinning them so that they don't + * get reclaimed during the iteration. + */ +static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) + __acquires(&dvnode->validate_lock) +{ + struct afs_read *req; + loff_t i_size; + int nr_pages, nr_inline, i, n; + int ret = -ENOMEM; + +retry: + i_size = i_size_read(&dvnode->vfs_inode); + if (i_size < 2048) + return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small)); + if (i_size > 2048 * 1024) { + trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); + return ERR_PTR(-EFBIG); + } + + _enter("%llu", i_size); + + /* Get a request record to hold the page list. We want to hold it + * inline if we can, but we don't want to make an order 1 allocation. + */ + nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE; + nr_inline = nr_pages; + if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *)) + nr_inline = 0; + + req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); + + refcount_set(&req->usage, 1); + req->nr_pages = nr_pages; + req->actual_len = i_size; /* May change */ + req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */ + req->data_version = dvnode->status.data_version; /* May change */ + if (nr_inline > 0) { + req->pages = req->array; + } else { + req->pages = kcalloc(nr_pages, sizeof(struct page *), + GFP_KERNEL); + if (!req->pages) + goto error; + } + + /* Get a list of all the pages that hold or will hold the directory + * content. We need to fill in any gaps that we might find where the + * memory reclaimer has been at work. If there are any gaps, we will + * need to reread the entire directory contents. + */ + i = 0; + do { + n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i, + req->nr_pages - i, + req->pages + i); + _debug("find %u at %u/%u", n, i, req->nr_pages); + if (n == 0) { + gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask; + + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_inval); + + ret = -ENOMEM; + req->pages[i] = __page_cache_alloc(gfp); + if (!req->pages[i]) + goto error; + ret = add_to_page_cache_lru(req->pages[i], + dvnode->vfs_inode.i_mapping, + i, gfp); + if (ret < 0) + goto error; + + attach_page_private(req->pages[i], (void *)1); + unlock_page(req->pages[i]); + i++; + } else { + i += n; + } + } while (i < req->nr_pages); + + /* If we're going to reload, we need to lock all the pages to prevent + * races. + */ + ret = -ERESTARTSYS; + if (down_read_killable(&dvnode->validate_lock) < 0) + goto error; + + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + goto success; + + up_read(&dvnode->validate_lock); + if (down_write_killable(&dvnode->validate_lock) < 0) + goto error; + + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { + trace_afs_reload_dir(dvnode); + ret = afs_fetch_data(dvnode, key, req); + if (ret < 0) + goto error_unlock; + + task_io_account_read(PAGE_SIZE * req->nr_pages); + + if (req->len < req->file_size) + goto content_has_grown; + + /* Validate the data we just read. */ + ret = -EIO; + if (!afs_dir_check_pages(dvnode, req)) + goto error_unlock; + + // TODO: Trim excess pages + + set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); + } + + downgrade_write(&dvnode->validate_lock); +success: + return req; + +error_unlock: + up_write(&dvnode->validate_lock); +error: + afs_put_read(req); + _leave(" = %d", ret); + return ERR_PTR(ret); + +content_has_grown: + up_write(&dvnode->validate_lock); + afs_put_read(req); + goto retry; +} + +/* + * deal with one block in an AFS directory + */ +static int afs_dir_iterate_block(struct afs_vnode *dvnode, + struct dir_context *ctx, + union afs_xdr_dir_block *block, + unsigned blkoff) +{ + union afs_xdr_dirent *dire; + unsigned offset, next, curr; + size_t nlen; + int tmp; + + _enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block); + + curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent); + + /* walk through the block, an entry at a time */ + for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS); + offset < AFS_DIR_SLOTS_PER_BLOCK; + offset = next + ) { + next = offset + 1; + + /* skip entries marked unused in the bitmap */ + if (!(block->hdr.bitmap[offset / 8] & + (1 << (offset % 8)))) { + _debug("ENT[%zu.%u]: unused", + blkoff / sizeof(union afs_xdr_dir_block), offset); + if (offset >= curr) + ctx->pos = blkoff + + next * sizeof(union afs_xdr_dirent); + continue; + } + + /* got a valid entry */ + dire = &block->dirents[offset]; + nlen = strnlen(dire->u.name, + sizeof(*block) - + offset * sizeof(union afs_xdr_dirent)); + + _debug("ENT[%zu.%u]: %s %zu \"%s\"", + blkoff / sizeof(union afs_xdr_dir_block), offset, + (offset < curr ? "skip" : "fill"), + nlen, dire->u.name); + + /* work out where the next possible entry is */ + for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_xdr_dirent)) { + if (next >= AFS_DIR_SLOTS_PER_BLOCK) { + _debug("ENT[%zu.%u]:" + " %u travelled beyond end dir block" + " (len %u/%zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, next, tmp, nlen); + return afs_bad(dvnode, afs_file_error_dir_over_end); + } + if (!(block->hdr.bitmap[next / 8] & + (1 << (next % 8)))) { + _debug("ENT[%zu.%u]:" + " %u unmarked extension (len %u/%zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, next, tmp, nlen); + return afs_bad(dvnode, afs_file_error_dir_unmarked_ext); + } + + _debug("ENT[%zu.%u]: ext %u/%zu", + blkoff / sizeof(union afs_xdr_dir_block), + next, tmp, nlen); + next++; + } + + /* skip if starts before the current position */ + if (offset < curr) { + if (next > curr) + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); + continue; + } + + /* found the next entry */ + if (!dir_emit(ctx, dire->u.name, nlen, + ntohl(dire->u.vnode), + (ctx->actor == afs_lookup_filldir || + ctx->actor == afs_lookup_one_filldir)? + ntohl(dire->u.unique) : DT_UNKNOWN)) { + _leave(" = 0 [full]"); + return 0; + } + + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); + } + + _leave(" = 1 [more]"); + return 1; +} + +/* + * iterate through the data blob that lists the contents of an AFS directory + */ +static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, + struct key *key, afs_dataversion_t *_dir_version) +{ + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_xdr_dir_page *dbuf; + union afs_xdr_dir_block *dblock; + struct afs_read *req; + struct page *page; + unsigned blkoff, limit; + int ret; + + _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos); + + if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) { + _leave(" = -ESTALE"); + return -ESTALE; + } + + req = afs_read_dir(dvnode, key); + if (IS_ERR(req)) + return PTR_ERR(req); + *_dir_version = req->data_version; + + /* round the file position up to the next entry boundary */ + ctx->pos += sizeof(union afs_xdr_dirent) - 1; + ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1); + + /* walk through the blocks in sequence */ + ret = 0; + while (ctx->pos < req->actual_len) { + blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1); + + /* Fetch the appropriate page from the directory and re-add it + * to the LRU. + */ + page = req->pages[blkoff / PAGE_SIZE]; + if (!page) { + ret = afs_bad(dvnode, afs_file_error_dir_missing_page); + break; + } + mark_page_accessed(page); + + limit = blkoff & ~(PAGE_SIZE - 1); + + dbuf = kmap(page); + + /* deal with the individual blocks stashed on this page */ + do { + dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / + sizeof(union afs_xdr_dir_block)]; + ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff); + if (ret != 1) { + kunmap(page); + goto out; + } + + blkoff += sizeof(union afs_xdr_dir_block); + + } while (ctx->pos < dir->i_size && blkoff < limit); + + kunmap(page); + ret = 0; + } + +out: + up_read(&dvnode->validate_lock); + afs_put_read(req); + _leave(" = %d", ret); + return ret; +} + +/* + * read an AFS directory + */ +static int afs_readdir(struct file *file, struct dir_context *ctx) +{ + afs_dataversion_t dir_version; + + return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file), + &dir_version); +} + +/* + * Search the directory for a single name + * - if afs_dir_iterate_block() spots this function, it'll pass the FID + * uniquifier through dtype + */ +static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, + int nlen, loff_t fpos, u64 ino, unsigned dtype) +{ + struct afs_lookup_one_cookie *cookie = + container_of(ctx, struct afs_lookup_one_cookie, ctx); + + _enter("{%s,%u},%s,%u,,%llu,%u", + cookie->name.name, cookie->name.len, name, nlen, + (unsigned long long) ino, dtype); + + /* insanity checks first */ + BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); + + if (cookie->name.len != nlen || + memcmp(cookie->name.name, name, nlen) != 0) { + _leave(" = 0 [no]"); + return 0; + } + + cookie->fid.vnode = ino; + cookie->fid.unique = dtype; + cookie->found = 1; + + _leave(" = -1 [found]"); + return -1; +} + +/* + * Do a lookup of a single name in a directory + * - just returns the FID the dentry name maps to if found + */ +static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, + struct afs_fid *fid, struct key *key, + afs_dataversion_t *_dir_version) +{ + struct afs_super_info *as = dir->i_sb->s_fs_info; + struct afs_lookup_one_cookie cookie = { + .ctx.actor = afs_lookup_one_filldir, + .name = dentry->d_name, + .fid.vid = as->volume->vid + }; + int ret; + + _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); + + /* search the directory */ + ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version); + if (ret < 0) { + _leave(" = %d [iter]", ret); + return ret; + } + + ret = -ENOENT; + if (!cookie.found) { + _leave(" = -ENOENT [not found]"); + return -ENOENT; + } + + *fid = cookie.fid; + _leave(" = 0 { vn=%llu u=%u }", fid->vnode, fid->unique); + return 0; +} + +/* + * search the directory for a name + * - if afs_dir_iterate_block() spots this function, it'll pass the FID + * uniquifier through dtype + */ +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, + int nlen, loff_t fpos, u64 ino, unsigned dtype) +{ + struct afs_lookup_cookie *cookie = + container_of(ctx, struct afs_lookup_cookie, ctx); + int ret; + + _enter("{%s,%u},%s,%u,,%llu,%u", + cookie->name.name, cookie->name.len, name, nlen, + (unsigned long long) ino, dtype); + + /* insanity checks first */ + BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); + + if (cookie->found) { + if (cookie->nr_fids < 50) { + cookie->fids[cookie->nr_fids].vnode = ino; + cookie->fids[cookie->nr_fids].unique = dtype; + cookie->nr_fids++; + } + } else if (cookie->name.len == nlen && + memcmp(cookie->name.name, name, nlen) == 0) { + cookie->fids[1].vnode = ino; + cookie->fids[1].unique = dtype; + cookie->found = 1; + if (cookie->one_only) + return -1; + } + + ret = cookie->nr_fids >= 50 ? -1 : 0; + _leave(" = %d", ret); + return ret; +} + +/* + * Deal with the result of a successful lookup operation. Turn all the files + * into inodes and save the first one - which is the one we actually want. + */ +static void afs_do_lookup_success(struct afs_operation *op) +{ + struct afs_vnode_param *vp; + struct afs_vnode *vnode; + struct inode *inode; + u32 abort_code; + int i; + + _enter(""); + + for (i = 0; i < op->nr_files; i++) { + switch (i) { + case 0: + vp = &op->file[0]; + abort_code = vp->scb.status.abort_code; + if (abort_code != 0) { + op->ac.abort_code = abort_code; + op->error = afs_abort_to_error(abort_code); + } + break; + + case 1: + vp = &op->file[1]; + break; + + default: + vp = &op->more_files[i - 2]; + break; + } + + if (!vp->scb.have_status && !vp->scb.have_error) + continue; + + _debug("do [%u]", i); + if (vp->vnode) { + if (!test_bit(AFS_VNODE_UNSET, &vp->vnode->flags)) + afs_vnode_commit_status(op, vp); + } else if (vp->scb.status.abort_code == 0) { + inode = afs_iget(op, vp); + if (!IS_ERR(inode)) { + vnode = AFS_FS_I(inode); + afs_cache_permit(vnode, op->key, + 0 /* Assume vnode->cb_break is 0 */ + + op->cb_v_break, + &vp->scb); + vp->vnode = vnode; + vp->put_vnode = true; + } + } else { + _debug("- abort %d %llx:%llx.%x", + vp->scb.status.abort_code, + vp->fid.vid, vp->fid.vnode, vp->fid.unique); + } + } + + _leave(""); +} + +static const struct afs_operation_ops afs_inline_bulk_status_operation = { + .issue_afs_rpc = afs_fs_inline_bulk_status, + .issue_yfs_rpc = yfs_fs_inline_bulk_status, + .success = afs_do_lookup_success, +}; + +static const struct afs_operation_ops afs_lookup_fetch_status_operation = { + .issue_afs_rpc = afs_fs_fetch_status, + .issue_yfs_rpc = yfs_fs_fetch_status, + .success = afs_do_lookup_success, + .aborted = afs_check_for_remote_deletion, +}; + +/* + * See if we know that the server we expect to use doesn't support + * FS.InlineBulkStatus. + */ +static bool afs_server_supports_ibulk(struct afs_vnode *dvnode) +{ + struct afs_server_list *slist; + struct afs_volume *volume = dvnode->volume; + struct afs_server *server; + bool ret = true; + int i; + + if (!test_bit(AFS_VOLUME_MAYBE_NO_IBULK, &volume->flags)) + return true; + + rcu_read_lock(); + slist = rcu_dereference(volume->servers); + + for (i = 0; i < slist->nr_servers; i++) { + server = slist->servers[i].server; + if (server == dvnode->cb_server) { + if (test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags)) + ret = false; + break; + } + } + + rcu_read_unlock(); + return ret; +} + +/* + * Do a lookup in a directory. We make use of bulk lookup to query a slew of + * files in one go and create inodes for them. The inode of the file we were + * asked for is returned. + */ +static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, + struct key *key) +{ + struct afs_lookup_cookie *cookie; + struct afs_vnode_param *vp; + struct afs_operation *op; + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; + struct inode *inode = NULL, *ti; + afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version); + long ret; + int i; + + _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); + + cookie = kzalloc(sizeof(struct afs_lookup_cookie), GFP_KERNEL); + if (!cookie) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < ARRAY_SIZE(cookie->fids); i++) + cookie->fids[i].vid = dvnode->fid.vid; + cookie->ctx.actor = afs_lookup_filldir; + cookie->name = dentry->d_name; + cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want + * and slot 1 for the directory */ + + if (!afs_server_supports_ibulk(dvnode)) + cookie->one_only = true; + + /* search the directory */ + ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version); + if (ret < 0) + goto out; + + dentry->d_fsdata = (void *)(unsigned long)data_version; + + ret = -ENOENT; + if (!cookie->found) + goto out; + + /* Check to see if we already have an inode for the primary fid. */ + inode = ilookup5(dir->i_sb, cookie->fids[1].vnode, + afs_ilookup5_test_by_fid, &cookie->fids[1]); + if (inode) + goto out; /* We do */ + + /* Okay, we didn't find it. We need to query the server - and whilst + * we're doing that, we're going to attempt to look up a bunch of other + * vnodes also. + */ + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto out; + } + + afs_op_set_vnode(op, 0, dvnode); + afs_op_set_fid(op, 1, &cookie->fids[1]); + + op->nr_files = cookie->nr_fids; + _debug("nr_files %u", op->nr_files); + + /* Need space for examining all the selected files */ + op->error = -ENOMEM; + if (op->nr_files > 2) { + op->more_files = kvcalloc(op->nr_files - 2, + sizeof(struct afs_vnode_param), + GFP_KERNEL); + if (!op->more_files) + goto out_op; + + for (i = 2; i < op->nr_files; i++) { + vp = &op->more_files[i - 2]; + vp->fid = cookie->fids[i]; + + /* Find any inodes that already exist and get their + * callback counters. + */ + ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode, + afs_ilookup5_test_by_fid, &vp->fid); + if (!IS_ERR_OR_NULL(ti)) { + vnode = AFS_FS_I(ti); + vp->dv_before = vnode->status.data_version; + vp->cb_break_before = afs_calc_vnode_cb_break(vnode); + vp->vnode = vnode; + vp->put_vnode = true; + vp->speculative = true; /* vnode not locked */ + } + } + } + + /* Try FS.InlineBulkStatus first. Abort codes for the individual + * lookups contained therein are stored in the reply without aborting + * the whole operation. + */ + op->error = -ENOTSUPP; + if (!cookie->one_only) { + op->ops = &afs_inline_bulk_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + } + + if (op->error == -ENOTSUPP) { + /* We could try FS.BulkStatus next, but this aborts the entire + * op if any of the lookups fails - so, for the moment, revert + * to FS.FetchStatus for op->file[1]. + */ + op->fetch_status.which = 1; + op->ops = &afs_lookup_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + } + inode = ERR_PTR(op->error); + +out_op: + if (op->error == 0) { + inode = &op->file[1].vnode->vfs_inode; + op->file[1].vnode = NULL; + } + + if (op->file[0].scb.have_status) + dentry->d_fsdata = (void *)(unsigned long)op->file[0].scb.status.data_version; + else + dentry->d_fsdata = (void *)(unsigned long)op->file[0].dv_before; + ret = afs_put_operation(op); +out: + kfree(cookie); + _leave(""); + return inode ?: ERR_PTR(ret); +} + +/* + * Look up an entry in a directory with @sys substitution. + */ +static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry, + struct key *key) +{ + struct afs_sysnames *subs; + struct afs_net *net = afs_i2net(dir); + struct dentry *ret; + char *buf, *p, *name; + int len, i; + + _enter(""); + + ret = ERR_PTR(-ENOMEM); + p = buf = kmalloc(AFSNAMEMAX, GFP_KERNEL); + if (!buf) + goto out_p; + if (dentry->d_name.len > 4) { + memcpy(p, dentry->d_name.name, dentry->d_name.len - 4); + p += dentry->d_name.len - 4; + } + + /* There is an ordered list of substitutes that we have to try. */ + read_lock(&net->sysnames_lock); + subs = net->sysnames; + refcount_inc(&subs->usage); + read_unlock(&net->sysnames_lock); + + for (i = 0; i < subs->nr; i++) { + name = subs->subs[i]; + len = dentry->d_name.len - 4 + strlen(name); + if (len >= AFSNAMEMAX) { + ret = ERR_PTR(-ENAMETOOLONG); + goto out_s; + } + + strcpy(p, name); + ret = lookup_one_len(buf, dentry->d_parent, len); + if (IS_ERR(ret) || d_is_positive(ret)) + goto out_s; + dput(ret); + } + + /* We don't want to d_add() the @sys dentry here as we don't want to + * the cached dentry to hide changes to the sysnames list. + */ + ret = NULL; +out_s: + afs_put_sysnames(subs); + kfree(buf); +out_p: + key_put(key); + return ret; +} + +/* + * look up an entry in a directory + */ +static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_fid fid = {}; + struct inode *inode; + struct dentry *d; + struct key *key; + int ret; + + _enter("{%llx:%llu},%p{%pd},", + dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry); + + ASSERTCMP(d_inode(dentry), ==, NULL); + + if (dentry->d_name.len >= AFSNAMEMAX) { + _leave(" = -ENAMETOOLONG"); + return ERR_PTR(-ENAMETOOLONG); + } + + if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) { + _leave(" = -ESTALE"); + return ERR_PTR(-ESTALE); + } + + key = afs_request_key(dvnode->volume->cell); + if (IS_ERR(key)) { + _leave(" = %ld [key]", PTR_ERR(key)); + return ERR_CAST(key); + } + + ret = afs_validate(dvnode, key); + if (ret < 0) { + key_put(key); + _leave(" = %d [val]", ret); + return ERR_PTR(ret); + } + + if (dentry->d_name.len >= 4 && + dentry->d_name.name[dentry->d_name.len - 4] == '@' && + dentry->d_name.name[dentry->d_name.len - 3] == 's' && + dentry->d_name.name[dentry->d_name.len - 2] == 'y' && + dentry->d_name.name[dentry->d_name.len - 1] == 's') + return afs_lookup_atsys(dir, dentry, key); + + afs_stat_v(dvnode, n_lookup); + inode = afs_do_lookup(dir, dentry, key); + key_put(key); + if (inode == ERR_PTR(-ENOENT)) + inode = afs_try_auto_mntpt(dentry, dir); + + if (!IS_ERR_OR_NULL(inode)) + fid = AFS_FS_I(inode)->fid; + + _debug("splice %p", dentry->d_inode); + d = d_splice_alias(inode, dentry); + if (!IS_ERR_OR_NULL(d)) { + d->d_fsdata = dentry->d_fsdata; + trace_afs_lookup(dvnode, &d->d_name, &fid); + } else { + trace_afs_lookup(dvnode, &dentry->d_name, &fid); + } + _leave(""); + return d; +} + +/* + * Check the validity of a dentry under RCU conditions. + */ +static int afs_d_revalidate_rcu(struct dentry *dentry) +{ + struct afs_vnode *dvnode; + struct dentry *parent; + struct inode *dir; + long dir_version, de_version; + + _enter("%p", dentry); + + /* Check the parent directory is still valid first. */ + parent = READ_ONCE(dentry->d_parent); + dir = d_inode_rcu(parent); + if (!dir) + return -ECHILD; + dvnode = AFS_FS_I(dir); + if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) + return -ECHILD; + + if (!afs_check_validity(dvnode)) + return -ECHILD; + + /* We only need to invalidate a dentry if the server's copy changed + * behind our back. If we made the change, it's no problem. Note that + * on a 32-bit system, we only have 32 bits in the dentry to store the + * version. + */ + dir_version = (long)READ_ONCE(dvnode->status.data_version); + de_version = (long)READ_ONCE(dentry->d_fsdata); + if (de_version != dir_version) { + dir_version = (long)READ_ONCE(dvnode->invalid_before); + if (de_version - dir_version < 0) + return -ECHILD; + } + + return 1; /* Still valid */ +} + +/* + * check that a dentry lookup hit has found a valid entry + * - NOTE! the hit can be a negative hit too, so we can't assume we have an + * inode + */ +static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct afs_vnode *vnode, *dir; + struct afs_fid fid; + struct dentry *parent; + struct inode *inode; + struct key *key; + afs_dataversion_t dir_version, invalid_before; + long de_version; + int ret; + + if (flags & LOOKUP_RCU) + return afs_d_revalidate_rcu(dentry); + + if (d_really_is_positive(dentry)) { + vnode = AFS_FS_I(d_inode(dentry)); + _enter("{v={%llx:%llu} n=%pd fl=%lx},", + vnode->fid.vid, vnode->fid.vnode, dentry, + vnode->flags); + } else { + _enter("{neg n=%pd}", dentry); + } + + key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell); + if (IS_ERR(key)) + key = NULL; + + /* Hold the parent dentry so we can peer at it */ + parent = dget_parent(dentry); + dir = AFS_FS_I(d_inode(parent)); + + /* validate the parent directory */ + afs_validate(dir, key); + + if (test_bit(AFS_VNODE_DELETED, &dir->flags)) { + _debug("%pd: parent dir deleted", dentry); + goto not_found; + } + + /* We only need to invalidate a dentry if the server's copy changed + * behind our back. If we made the change, it's no problem. Note that + * on a 32-bit system, we only have 32 bits in the dentry to store the + * version. + */ + dir_version = dir->status.data_version; + de_version = (long)dentry->d_fsdata; + if (de_version == (long)dir_version) + goto out_valid_noupdate; + + invalid_before = dir->invalid_before; + if (de_version - (long)invalid_before >= 0) + goto out_valid; + + _debug("dir modified"); + afs_stat_v(dir, n_reval); + + /* search the directory for this vnode */ + ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key, &dir_version); + switch (ret) { + case 0: + /* the filename maps to something */ + if (d_really_is_negative(dentry)) + goto not_found; + inode = d_inode(dentry); + if (is_bad_inode(inode)) { + printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n", + dentry); + goto not_found; + } + + vnode = AFS_FS_I(inode); + + /* if the vnode ID has changed, then the dirent points to a + * different file */ + if (fid.vnode != vnode->fid.vnode) { + _debug("%pd: dirent changed [%llu != %llu]", + dentry, fid.vnode, + vnode->fid.vnode); + goto not_found; + } + + /* if the vnode ID uniqifier has changed, then the file has + * been deleted and replaced, and the original vnode ID has + * been reused */ + if (fid.unique != vnode->fid.unique) { + _debug("%pd: file deleted (uq %u -> %u I:%u)", + dentry, fid.unique, + vnode->fid.unique, + vnode->vfs_inode.i_generation); + goto not_found; + } + goto out_valid; + + case -ENOENT: + /* the filename is unknown */ + _debug("%pd: dirent not found", dentry); + if (d_really_is_positive(dentry)) + goto not_found; + goto out_valid; + + default: + _debug("failed to iterate dir %pd: %d", + parent, ret); + goto not_found; + } + +out_valid: + dentry->d_fsdata = (void *)(unsigned long)dir_version; +out_valid_noupdate: + dput(parent); + key_put(key); + _leave(" = 1 [valid]"); + return 1; + +not_found: + _debug("dropping dentry %pd2", dentry); + dput(parent); + key_put(key); + + _leave(" = 0 [bad]"); + return 0; +} + +/* + * allow the VFS to enquire as to whether a dentry should be unhashed (mustn't + * sleep) + * - called from dput() when d_count is going to 0. + * - return 1 to request dentry be unhashed, 0 otherwise + */ +static int afs_d_delete(const struct dentry *dentry) +{ + _enter("%pd", dentry); + + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + goto zap; + + if (d_really_is_positive(dentry) && + (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(d_inode(dentry))->flags) || + test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(d_inode(dentry))->flags))) + goto zap; + + _leave(" = 0 [keep]"); + return 0; + +zap: + _leave(" = 1 [zap]"); + return 1; +} + +/* + * Clean up sillyrename files on dentry removal. + */ +static void afs_d_iput(struct dentry *dentry, struct inode *inode) +{ + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + afs_silly_iput(dentry, inode); + iput(inode); +} + +/* + * handle dentry release + */ +void afs_d_release(struct dentry *dentry) +{ + _enter("%pd", dentry); +} + +void afs_check_for_remote_deletion(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + + switch (op->ac.abort_code) { + case VNOVNODE: + set_bit(AFS_VNODE_DELETED, &vnode->flags); + afs_break_callback(vnode, afs_cb_break_for_deleted); + } +} + +/* + * Create a new inode for create/mkdir/symlink + */ +static void afs_vnode_new_inode(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[1]; + struct afs_vnode *vnode; + struct inode *inode; + + _enter(""); + + ASSERTCMP(op->error, ==, 0); + + inode = afs_iget(op, vp); + if (IS_ERR(inode)) { + /* ENOMEM or EINTR at a really inconvenient time - just abandon + * the new directory on the server. + */ + op->error = PTR_ERR(inode); + return; + } + + vnode = AFS_FS_I(inode); + set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); + if (!op->error) + afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb); + d_instantiate(op->dentry, inode); +} + +static void afs_create_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + op->ctime = op->file[0].scb.status.mtime_client; + afs_vnode_commit_status(op, &op->file[0]); + afs_update_dentry_version(op, &op->file[0], op->dentry); + afs_vnode_new_inode(op); +} + +static void afs_create_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + struct afs_vnode *dvnode = dvp->vnode; + + _enter("op=%08x", op->debug_id); + + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) + afs_edit_dir_add(dvnode, &op->dentry->d_name, &vp->fid, + op->create.reason); + up_write(&dvnode->validate_lock); +} + +static void afs_create_put(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + + if (op->error) + d_drop(op->dentry); +} + +static const struct afs_operation_ops afs_mkdir_operation = { + .issue_afs_rpc = afs_fs_make_dir, + .issue_yfs_rpc = yfs_fs_make_dir, + .success = afs_create_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_create_edit_dir, + .put = afs_create_put, +}; + +/* + * create a directory on an AFS filesystem + */ +static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct afs_operation *op; + struct afs_vnode *dvnode = AFS_FS_I(dir); + + _enter("{%llx:%llu},{%pd},%ho", + dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); + + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + d_drop(dentry); + return PTR_ERR(op); + } + + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + op->dentry = dentry; + op->create.mode = S_IFDIR | mode; + op->create.reason = afs_edit_dir_for_mkdir; + op->mtime = current_time(dir); + op->ops = &afs_mkdir_operation; + return afs_do_sync_operation(op); +} + +/* + * Remove a subdir from a directory. + */ +static void afs_dir_remove_subdir(struct dentry *dentry) +{ + if (d_really_is_positive(dentry)) { + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + + clear_nlink(&vnode->vfs_inode); + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + } +} + +static void afs_rmdir_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + op->ctime = op->file[0].scb.status.mtime_client; + afs_vnode_commit_status(op, &op->file[0]); + afs_update_dentry_version(op, &op->file[0], op->dentry); +} + +static void afs_rmdir_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode *dvnode = dvp->vnode; + + _enter("op=%08x", op->debug_id); + afs_dir_remove_subdir(op->dentry); + + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) + afs_edit_dir_remove(dvnode, &op->dentry->d_name, + afs_edit_dir_for_rmdir); + up_write(&dvnode->validate_lock); +} + +static void afs_rmdir_put(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + if (op->file[1].vnode) + up_write(&op->file[1].vnode->rmdir_lock); +} + +static const struct afs_operation_ops afs_rmdir_operation = { + .issue_afs_rpc = afs_fs_remove_dir, + .issue_yfs_rpc = yfs_fs_remove_dir, + .success = afs_rmdir_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_rmdir_edit_dir, + .put = afs_rmdir_put, +}; + +/* + * remove a directory from an AFS filesystem + */ +static int afs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct afs_operation *op; + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; + int ret; + + _enter("{%llx:%llu},{%pd}", + dvnode->fid.vid, dvnode->fid.vnode, dentry); + + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + + op->dentry = dentry; + op->ops = &afs_rmdir_operation; + + /* Try to make sure we have a callback promise on the victim. */ + if (d_really_is_positive(dentry)) { + vnode = AFS_FS_I(d_inode(dentry)); + ret = afs_validate(vnode, op->key); + if (ret < 0) + goto error; + } + + if (vnode) { + ret = down_write_killable(&vnode->rmdir_lock); + if (ret < 0) + goto error; + op->file[1].vnode = vnode; + } + + return afs_do_sync_operation(op); + +error: + return afs_put_operation(op); +} + +/* + * Remove a link to a file or symlink from a directory. + * + * If the file was not deleted due to excess hard links, the fileserver will + * break the callback promise on the file - if it had one - before it returns + * to us, and if it was deleted, it won't + * + * However, if we didn't have a callback promise outstanding, or it was + * outstanding on a different server, then it won't break it either... + */ +static void afs_dir_remove_link(struct afs_operation *op) +{ + struct afs_vnode *dvnode = op->file[0].vnode; + struct afs_vnode *vnode = op->file[1].vnode; + struct dentry *dentry = op->dentry; + int ret; + + if (op->error != 0 || + (op->file[1].scb.have_status && op->file[1].scb.have_error)) + return; + if (d_really_is_positive(dentry)) + return; + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + /* Already done */ + } else if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { + write_seqlock(&vnode->cb_lock); + drop_nlink(&vnode->vfs_inode); + if (vnode->vfs_inode.i_nlink == 0) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + __afs_break_callback(vnode, afs_cb_break_for_unlink); + } + write_sequnlock(&vnode->cb_lock); + } else { + afs_break_callback(vnode, afs_cb_break_for_unlink); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + _debug("AFS_VNODE_DELETED"); + + ret = afs_validate(vnode, op->key); + if (ret != -ESTALE) + op->error = ret; + } + + _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, op->error); +} + +static void afs_unlink_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + op->ctime = op->file[0].scb.status.mtime_client; + afs_check_dir_conflict(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[1]); + afs_update_dentry_version(op, &op->file[0], op->dentry); + afs_dir_remove_link(op); +} + +static void afs_unlink_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode *dvnode = dvp->vnode; + + _enter("op=%08x", op->debug_id); + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) + afs_edit_dir_remove(dvnode, &op->dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); +} + +static void afs_unlink_put(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + if (op->unlink.need_rehash && op->error < 0 && op->error != -ENOENT) + d_rehash(op->dentry); +} + +static const struct afs_operation_ops afs_unlink_operation = { + .issue_afs_rpc = afs_fs_remove_file, + .issue_yfs_rpc = yfs_fs_remove_file, + .success = afs_unlink_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_unlink_edit_dir, + .put = afs_unlink_put, +}; + +/* + * Remove a file or symlink from an AFS filesystem. + */ +static int afs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct afs_operation *op; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + int ret; + + _enter("{%llx:%llu},{%pd}", + dvnode->fid.vid, dvnode->fid.vnode, dentry); + + if (dentry->d_name.len >= AFSNAMEMAX) + return -ENAMETOOLONG; + + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + + /* Try to make sure we have a callback promise on the victim. */ + ret = afs_validate(vnode, op->key); + if (ret < 0) { + op->error = ret; + goto error; + } + + spin_lock(&dentry->d_lock); + if (d_count(dentry) > 1) { + spin_unlock(&dentry->d_lock); + /* Start asynchronous writeout of the inode */ + write_inode_now(d_inode(dentry), 0); + op->error = afs_sillyrename(dvnode, vnode, dentry, op->key); + goto error; + } + if (!d_unhashed(dentry)) { + /* Prevent a race with RCU lookup. */ + __d_drop(dentry); + op->unlink.need_rehash = true; + } + spin_unlock(&dentry->d_lock); + + op->file[1].vnode = vnode; + op->file[1].update_ctime = true; + op->file[1].op_unlinked = true; + op->dentry = dentry; + op->ops = &afs_unlink_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + + /* If there was a conflict with a third party, check the status of the + * unlinked vnode. + */ + if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) { + op->file[1].update_ctime = false; + op->fetch_status.which = 1; + op->ops = &afs_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + } + + return afs_put_operation(op); + +error: + return afs_put_operation(op); +} + +static const struct afs_operation_ops afs_create_operation = { + .issue_afs_rpc = afs_fs_create_file, + .issue_yfs_rpc = yfs_fs_create_file, + .success = afs_create_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_create_edit_dir, + .put = afs_create_put, +}; + +/* + * create a regular file on an AFS filesystem + */ +static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) +{ + struct afs_operation *op; + struct afs_vnode *dvnode = AFS_FS_I(dir); + int ret = -ENAMETOOLONG; + + _enter("{%llx:%llu},{%pd},%ho", + dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); + + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto error; + } + + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + + op->dentry = dentry; + op->create.mode = S_IFREG | mode; + op->create.reason = afs_edit_dir_for_create; + op->mtime = current_time(dir); + op->ops = &afs_create_operation; + return afs_do_sync_operation(op); + +error: + d_drop(dentry); + _leave(" = %d", ret); + return ret; +} + +static void afs_link_success(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + + _enter("op=%08x", op->debug_id); + op->ctime = dvp->scb.status.mtime_client; + afs_vnode_commit_status(op, dvp); + afs_vnode_commit_status(op, vp); + afs_update_dentry_version(op, dvp, op->dentry); + if (op->dentry_2->d_parent == op->dentry->d_parent) + afs_update_dentry_version(op, dvp, op->dentry_2); + ihold(&vp->vnode->vfs_inode); + d_instantiate(op->dentry, &vp->vnode->vfs_inode); +} + +static void afs_link_put(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + if (op->error) + d_drop(op->dentry); +} + +static const struct afs_operation_ops afs_link_operation = { + .issue_afs_rpc = afs_fs_link, + .issue_yfs_rpc = yfs_fs_link, + .success = afs_link_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_create_edit_dir, + .put = afs_link_put, +}; + +/* + * create a hard link between files in an AFS filesystem + */ +static int afs_link(struct dentry *from, struct inode *dir, + struct dentry *dentry) +{ + struct afs_operation *op; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_vnode *vnode = AFS_FS_I(d_inode(from)); + int ret = -ENAMETOOLONG; + + _enter("{%llx:%llu},{%llx:%llu},{%pd}", + vnode->fid.vid, vnode->fid.vnode, + dvnode->fid.vid, dvnode->fid.vnode, + dentry); + + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto error; + } + + afs_op_set_vnode(op, 0, dvnode); + afs_op_set_vnode(op, 1, vnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + op->file[1].update_ctime = true; + + op->dentry = dentry; + op->dentry_2 = from; + op->ops = &afs_link_operation; + op->create.reason = afs_edit_dir_for_link; + return afs_do_sync_operation(op); + +error: + d_drop(dentry); + _leave(" = %d", ret); + return ret; +} + +static const struct afs_operation_ops afs_symlink_operation = { + .issue_afs_rpc = afs_fs_symlink, + .issue_yfs_rpc = yfs_fs_symlink, + .success = afs_create_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_create_edit_dir, + .put = afs_create_put, +}; + +/* + * create a symlink in an AFS filesystem + */ +static int afs_symlink(struct inode *dir, struct dentry *dentry, + const char *content) +{ + struct afs_operation *op; + struct afs_vnode *dvnode = AFS_FS_I(dir); + int ret; + + _enter("{%llx:%llu},{%pd},%s", + dvnode->fid.vid, dvnode->fid.vnode, dentry, + content); + + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + + ret = -EINVAL; + if (strlen(content) >= AFSPATHMAX) + goto error; + + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto error; + } + + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; + + op->dentry = dentry; + op->ops = &afs_symlink_operation; + op->create.reason = afs_edit_dir_for_symlink; + op->create.symlink = content; + op->mtime = current_time(dir); + return afs_do_sync_operation(op); + +error: + d_drop(dentry); + _leave(" = %d", ret); + return ret; +} + +static void afs_rename_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + + op->ctime = op->file[0].scb.status.mtime_client; + afs_check_dir_conflict(op, &op->file[1]); + afs_vnode_commit_status(op, &op->file[0]); + if (op->file[1].vnode != op->file[0].vnode) { + op->ctime = op->file[1].scb.status.mtime_client; + afs_vnode_commit_status(op, &op->file[1]); + } +} + +static void afs_rename_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; + struct afs_vnode *orig_dvnode = orig_dvp->vnode; + struct afs_vnode *new_dvnode = new_dvp->vnode; + struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry)); + struct dentry *old_dentry = op->dentry; + struct dentry *new_dentry = op->dentry_2; + struct inode *new_inode; + + _enter("op=%08x", op->debug_id); + + if (op->rename.rehash) { + d_rehash(op->rename.rehash); + op->rename.rehash = NULL; + } + + down_write(&orig_dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) && + orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta) + afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, + afs_edit_dir_for_rename_0); + + if (new_dvnode != orig_dvnode) { + up_write(&orig_dvnode->validate_lock); + down_write(&new_dvnode->validate_lock); + } + + if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) && + new_dvnode->status.data_version == new_dvp->dv_before + new_dvp->dv_delta) { + if (!op->rename.new_negative) + afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, + afs_edit_dir_for_rename_1); + + afs_edit_dir_add(new_dvnode, &new_dentry->d_name, + &vnode->fid, afs_edit_dir_for_rename_2); + } + + new_inode = d_inode(new_dentry); + if (new_inode) { + spin_lock(&new_inode->i_lock); + if (S_ISDIR(new_inode->i_mode)) + clear_nlink(new_inode); + else if (new_inode->i_nlink > 0) + drop_nlink(new_inode); + spin_unlock(&new_inode->i_lock); + } + + /* Now we can update d_fsdata on the dentries to reflect their + * new parent's data_version. + * + * Note that if we ever implement RENAME_EXCHANGE, we'll have + * to update both dentries with opposing dir versions. + */ + afs_update_dentry_version(op, new_dvp, op->dentry); + afs_update_dentry_version(op, new_dvp, op->dentry_2); + + d_move(old_dentry, new_dentry); + + up_write(&new_dvnode->validate_lock); +} + +static void afs_rename_put(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + if (op->rename.rehash) + d_rehash(op->rename.rehash); + dput(op->rename.tmp); + if (op->error) + d_rehash(op->dentry); +} + +static const struct afs_operation_ops afs_rename_operation = { + .issue_afs_rpc = afs_fs_rename, + .issue_yfs_rpc = yfs_fs_rename, + .success = afs_rename_success, + .edit_dir = afs_rename_edit_dir, + .put = afs_rename_put, +}; + +/* + * rename a file in an AFS filesystem and/or move it between directories + */ +static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct afs_operation *op; + struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; + int ret; + + if (flags) + return -EINVAL; + + /* Don't allow silly-rename files be moved around. */ + if (old_dentry->d_flags & DCACHE_NFSFS_RENAMED) + return -EINVAL; + + vnode = AFS_FS_I(d_inode(old_dentry)); + orig_dvnode = AFS_FS_I(old_dir); + new_dvnode = AFS_FS_I(new_dir); + + _enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}", + orig_dvnode->fid.vid, orig_dvnode->fid.vnode, + vnode->fid.vid, vnode->fid.vnode, + new_dvnode->fid.vid, new_dvnode->fid.vnode, + new_dentry); + + op = afs_alloc_operation(NULL, orig_dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, orig_dvnode); + afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */ + op->file[0].dv_delta = 1; + op->file[1].dv_delta = 1; + op->file[0].modification = true; + op->file[1].modification = true; + op->file[0].update_ctime = true; + op->file[1].update_ctime = true; + + op->dentry = old_dentry; + op->dentry_2 = new_dentry; + op->rename.new_negative = d_is_negative(new_dentry); + op->ops = &afs_rename_operation; + + /* For non-directories, check whether the target is busy and if so, + * make a copy of the dentry and then do a silly-rename. If the + * silly-rename succeeds, the copied dentry is hashed and becomes the + * new target. + */ + if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) { + /* To prevent any new references to the target during the + * rename, we unhash the dentry in advance. + */ + if (!d_unhashed(new_dentry)) { + d_drop(new_dentry); + op->rename.rehash = new_dentry; + } + + if (d_count(new_dentry) > 2) { + /* copy the target dentry's name */ + ret = -ENOMEM; + op->rename.tmp = d_alloc(new_dentry->d_parent, + &new_dentry->d_name); + if (!op->rename.tmp) + goto error; + + ret = afs_sillyrename(new_dvnode, + AFS_FS_I(d_inode(new_dentry)), + new_dentry, op->key); + if (ret) + goto error; + + op->dentry_2 = op->rename.tmp; + op->rename.rehash = NULL; + op->rename.new_negative = true; + } + } + + /* This bit is potentially nasty as there's a potential race with + * afs_d_revalidate{,_rcu}(). We have to change d_fsdata on the dentry + * to reflect it's new parent's new data_version after the op, but + * d_revalidate may see old_dentry between the op having taken place + * and the version being updated. + * + * So drop the old_dentry for now to make other threads go through + * lookup instead - which we hold a lock against. + */ + d_drop(old_dentry); + + return afs_do_sync_operation(op); + +error: + return afs_put_operation(op); +} + +/* + * Release a directory page and clean up its private state if it's not busy + * - return true if the page can now be released, false if not + */ +static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags) +{ + struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + + _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); + + detach_page_private(page); + + /* The directory will need reloading. */ + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_relpg); + return 1; +} + +/* + * invalidate part or all of a page + * - release a page and clean up its private data if offset is 0 (indicating + * the entire page) + */ +static void afs_dir_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) +{ + struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + + _enter("{%lu},%u,%u", page->index, offset, length); + + BUG_ON(!PageLocked(page)); + + /* The directory will need reloading. */ + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_inval); + + /* we clean up only if the entire page is being invalidated */ + if (offset == 0 && length == PAGE_SIZE) + detach_page_private(page); +} diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c new file mode 100644 index 000000000..3a9cffc08 --- /dev/null +++ b/fs/afs/dir_edit.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS filesystem directory editing + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/pagemap.h> +#include <linux/iversion.h> +#include "internal.h" +#include "xdr_fs.h" + +/* + * Find a number of contiguous clear bits in a directory block bitmask. + * + * There are 64 slots, which means we can load the entire bitmap into a + * variable. The first bit doesn't count as it corresponds to the block header + * slot. nr_slots is between 1 and 9. + */ +static int afs_find_contig_bits(union afs_xdr_dir_block *block, unsigned int nr_slots) +{ + u64 bitmap; + u32 mask; + int bit, n; + + bitmap = (u64)block->hdr.bitmap[0] << 0 * 8; + bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8; + bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8; + bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8; + bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8; + bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8; + bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8; + bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8; + bitmap >>= 1; /* The first entry is metadata */ + bit = 1; + mask = (1 << nr_slots) - 1; + + do { + if (sizeof(unsigned long) == 8) + n = ffz(bitmap); + else + n = ((u32)bitmap) != 0 ? + ffz((u32)bitmap) : + ffz((u32)(bitmap >> 32)) + 32; + bitmap >>= n; + bit += n; + + if ((bitmap & mask) == 0) { + if (bit > 64 - nr_slots) + return -1; + return bit; + } + + n = __ffs(bitmap); + bitmap >>= n; + bit += n; + } while (bitmap); + + return -1; +} + +/* + * Set a number of contiguous bits in the directory block bitmap. + */ +static void afs_set_contig_bits(union afs_xdr_dir_block *block, + int bit, unsigned int nr_slots) +{ + u64 mask; + + mask = (1 << nr_slots) - 1; + mask <<= bit; + + block->hdr.bitmap[0] |= (u8)(mask >> 0 * 8); + block->hdr.bitmap[1] |= (u8)(mask >> 1 * 8); + block->hdr.bitmap[2] |= (u8)(mask >> 2 * 8); + block->hdr.bitmap[3] |= (u8)(mask >> 3 * 8); + block->hdr.bitmap[4] |= (u8)(mask >> 4 * 8); + block->hdr.bitmap[5] |= (u8)(mask >> 5 * 8); + block->hdr.bitmap[6] |= (u8)(mask >> 6 * 8); + block->hdr.bitmap[7] |= (u8)(mask >> 7 * 8); +} + +/* + * Clear a number of contiguous bits in the directory block bitmap. + */ +static void afs_clear_contig_bits(union afs_xdr_dir_block *block, + int bit, unsigned int nr_slots) +{ + u64 mask; + + mask = (1 << nr_slots) - 1; + mask <<= bit; + + block->hdr.bitmap[0] &= ~(u8)(mask >> 0 * 8); + block->hdr.bitmap[1] &= ~(u8)(mask >> 1 * 8); + block->hdr.bitmap[2] &= ~(u8)(mask >> 2 * 8); + block->hdr.bitmap[3] &= ~(u8)(mask >> 3 * 8); + block->hdr.bitmap[4] &= ~(u8)(mask >> 4 * 8); + block->hdr.bitmap[5] &= ~(u8)(mask >> 5 * 8); + block->hdr.bitmap[6] &= ~(u8)(mask >> 6 * 8); + block->hdr.bitmap[7] &= ~(u8)(mask >> 7 * 8); +} + +/* + * Scan a directory block looking for a dirent of the right name. + */ +static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name, + unsigned int blocknum) +{ + union afs_xdr_dirent *de; + u64 bitmap; + int d, len, n; + + _enter(""); + + bitmap = (u64)block->hdr.bitmap[0] << 0 * 8; + bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8; + bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8; + bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8; + bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8; + bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8; + bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8; + bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8; + + for (d = (blocknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS); + d < AFS_DIR_SLOTS_PER_BLOCK; + d++) { + if (!((bitmap >> d) & 1)) + continue; + de = &block->dirents[d]; + if (de->u.valid != 1) + continue; + + /* The block was NUL-terminated by afs_dir_check_page(). */ + len = strlen(de->u.name); + if (len == name->len && + memcmp(de->u.name, name->name, name->len) == 0) + return d; + + n = round_up(12 + len + 1 + 4, AFS_DIR_DIRENT_SIZE); + n /= AFS_DIR_DIRENT_SIZE; + d += n - 1; + } + + return -1; +} + +/* + * Initialise a new directory block. Note that block 0 is special and contains + * some extra metadata. + */ +static void afs_edit_init_block(union afs_xdr_dir_block *meta, + union afs_xdr_dir_block *block, int block_num) +{ + memset(block, 0, sizeof(*block)); + block->hdr.npages = htons(1); + block->hdr.magic = AFS_DIR_MAGIC; + block->hdr.bitmap[0] = 1; + + if (block_num == 0) { + block->hdr.bitmap[0] = 0xff; + block->hdr.bitmap[1] = 0x1f; + memset(block->meta.alloc_ctrs, + AFS_DIR_SLOTS_PER_BLOCK, + sizeof(block->meta.alloc_ctrs)); + meta->meta.alloc_ctrs[0] = + AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS0; + } + + if (block_num < AFS_DIR_BLOCKS_WITH_CTR) + meta->meta.alloc_ctrs[block_num] = + AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS; +} + +/* + * Edit a directory's file data to add a new directory entry. Doing this after + * create, mkdir, symlink, link or rename if the data version number is + * incremented by exactly one avoids the need to re-download the entire + * directory contents. + * + * The caller must hold the inode locked. + */ +void afs_edit_dir_add(struct afs_vnode *vnode, + struct qstr *name, struct afs_fid *new_fid, + enum afs_edit_dir_reason why) +{ + union afs_xdr_dir_block *meta, *block; + struct afs_xdr_dir_page *meta_page, *dir_page; + union afs_xdr_dirent *de; + struct page *page0, *page; + unsigned int need_slots, nr_blocks, b; + pgoff_t index; + loff_t i_size; + gfp_t gfp; + int slot; + + _enter(",,{%d,%s},", name->len, name->name); + + i_size = i_size_read(&vnode->vfs_inode); + if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || + (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + return; + } + + gfp = vnode->vfs_inode.i_mapping->gfp_mask; + page0 = find_or_create_page(vnode->vfs_inode.i_mapping, 0, gfp); + if (!page0) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + _leave(" [fgp]"); + return; + } + + /* Work out how many slots we're going to need. */ + need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); + need_slots /= AFS_DIR_DIRENT_SIZE; + + meta_page = kmap(page0); + meta = &meta_page->blocks[0]; + if (i_size == 0) + goto new_directory; + nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; + + /* Find a block that has sufficient slots available. Each VM page + * contains two or more directory blocks. + */ + for (b = 0; b < nr_blocks + 1; b++) { + /* If the directory extended into a new page, then we need to + * tack a new page on the end. + */ + index = b / AFS_DIR_BLOCKS_PER_PAGE; + if (index == 0) { + page = page0; + dir_page = meta_page; + } else { + if (nr_blocks >= AFS_DIR_MAX_BLOCKS) + goto error; + gfp = vnode->vfs_inode.i_mapping->gfp_mask; + page = find_or_create_page(vnode->vfs_inode.i_mapping, + index, gfp); + if (!page) + goto error; + if (!PagePrivate(page)) + attach_page_private(page, (void *)1); + dir_page = kmap(page); + } + + /* Abandon the edit if we got a callback break. */ + if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + goto invalidated; + + block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE]; + + _debug("block %u: %2u %3u %u", + b, + (b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99, + ntohs(block->hdr.npages), + ntohs(block->hdr.magic)); + + /* Initialise the block if necessary. */ + if (b == nr_blocks) { + _debug("init %u", b); + afs_edit_init_block(meta, block, b); + afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE); + } + + /* Only lower dir pages have a counter in the header. */ + if (b >= AFS_DIR_BLOCKS_WITH_CTR || + meta->meta.alloc_ctrs[b] >= need_slots) { + /* We need to try and find one or more consecutive + * slots to hold the entry. + */ + slot = afs_find_contig_bits(block, need_slots); + if (slot >= 0) { + _debug("slot %u", slot); + goto found_space; + } + } + + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + } + + /* There are no spare slots of sufficient size, yet the operation + * succeeded. Download the directory again. + */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; + +new_directory: + afs_edit_init_block(meta, meta, 0); + i_size = AFS_DIR_BLOCK_SIZE; + afs_set_i_size(vnode, i_size); + slot = AFS_DIR_RESV_BLOCKS0; + page = page0; + block = meta; + nr_blocks = 1; + b = 0; + +found_space: + /* Set the dirent slot. */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_create, b, slot, + new_fid->vnode, new_fid->unique, name->name); + de = &block->dirents[slot]; + de->u.valid = 1; + de->u.unused[0] = 0; + de->u.hash_next = 0; // TODO: Really need to maintain this + de->u.vnode = htonl(new_fid->vnode); + de->u.unique = htonl(new_fid->unique); + memcpy(de->u.name, name->name, name->len + 1); + de->u.name[name->len] = 0; + + /* Adjust the bitmap. */ + afs_set_contig_bits(block, slot, need_slots); + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + + /* Adjust the allocation counter. */ + if (b < AFS_DIR_BLOCKS_WITH_CTR) + meta->meta.alloc_ctrs[b] -= need_slots; + + inode_inc_iversion_raw(&vnode->vfs_inode); + afs_stat_v(vnode, n_dir_cr); + _debug("Insert %s in %u[%u]", name->name, b, slot); + +out_unmap: + unlock_page(page0); + kunmap(page0); + put_page(page0); + _leave(""); + return; + +invalidated: + trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + if (page != page0) { + kunmap(page); + put_page(page); + } + goto out_unmap; + +error: + trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; +} + +/* + * Edit a directory's file data to remove a new directory entry. Doing this + * after unlink, rmdir or rename if the data version number is incremented by + * exactly one avoids the need to re-download the entire directory contents. + * + * The caller must hold the inode locked. + */ +void afs_edit_dir_remove(struct afs_vnode *vnode, + struct qstr *name, enum afs_edit_dir_reason why) +{ + struct afs_xdr_dir_page *meta_page, *dir_page; + union afs_xdr_dir_block *meta, *block; + union afs_xdr_dirent *de; + struct page *page0, *page; + unsigned int need_slots, nr_blocks, b; + pgoff_t index; + loff_t i_size; + int slot; + + _enter(",,{%d,%s},", name->len, name->name); + + i_size = i_size_read(&vnode->vfs_inode); + if (i_size < AFS_DIR_BLOCK_SIZE || + i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || + (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + return; + } + nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; + + page0 = find_lock_page(vnode->vfs_inode.i_mapping, 0); + if (!page0) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + _leave(" [fgp]"); + return; + } + + /* Work out how many slots we're going to discard. */ + need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); + need_slots /= AFS_DIR_DIRENT_SIZE; + + meta_page = kmap(page0); + meta = &meta_page->blocks[0]; + + /* Find a page that has sufficient slots available. Each VM page + * contains two or more directory blocks. + */ + for (b = 0; b < nr_blocks; b++) { + index = b / AFS_DIR_BLOCKS_PER_PAGE; + if (index != 0) { + page = find_lock_page(vnode->vfs_inode.i_mapping, index); + if (!page) + goto error; + dir_page = kmap(page); + } else { + page = page0; + dir_page = meta_page; + } + + /* Abandon the edit if we got a callback break. */ + if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + goto invalidated; + + block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE]; + + if (b > AFS_DIR_BLOCKS_WITH_CTR || + meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) { + slot = afs_dir_scan_block(block, name, b); + if (slot >= 0) + goto found_dirent; + } + + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + } + + /* Didn't find the dirent to clobber. Download the directory again. */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent, + 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; + +found_dirent: + de = &block->dirents[slot]; + + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot, + ntohl(de->u.vnode), ntohl(de->u.unique), + name->name); + + memset(de, 0, sizeof(*de) * need_slots); + + /* Adjust the bitmap. */ + afs_clear_contig_bits(block, slot, need_slots); + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + + /* Adjust the allocation counter. */ + if (b < AFS_DIR_BLOCKS_WITH_CTR) + meta->meta.alloc_ctrs[b] += need_slots; + + inode_set_iversion_raw(&vnode->vfs_inode, vnode->status.data_version); + afs_stat_v(vnode, n_dir_rm); + _debug("Remove %s from %u[%u]", name->name, b, slot); + +out_unmap: + unlock_page(page0); + kunmap(page0); + put_page(page0); + _leave(""); + return; + +invalidated: + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval, + 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + goto out_unmap; + +error: + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error, + 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; +} diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c new file mode 100644 index 000000000..dae9a57d7 --- /dev/null +++ b/fs/afs/dir_silly.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS silly rename handling + * + * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * - Derived from NFS's sillyrename. + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/fsnotify.h> +#include "internal.h" + +static void afs_silly_rename_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + + afs_check_dir_conflict(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[0]); +} + +static void afs_silly_rename_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode *dvnode = dvp->vnode; + struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry)); + struct dentry *old = op->dentry; + struct dentry *new = op->dentry_2; + + spin_lock(&old->d_lock); + old->d_flags |= DCACHE_NFSFS_RENAMED; + spin_unlock(&old->d_lock); + if (dvnode->silly_key != op->key) { + key_put(dvnode->silly_key); + dvnode->silly_key = key_get(op->key); + } + + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) { + afs_edit_dir_remove(dvnode, &old->d_name, + afs_edit_dir_for_silly_0); + afs_edit_dir_add(dvnode, &new->d_name, + &vnode->fid, afs_edit_dir_for_silly_1); + } + up_write(&dvnode->validate_lock); +} + +static const struct afs_operation_ops afs_silly_rename_operation = { + .issue_afs_rpc = afs_fs_rename, + .issue_yfs_rpc = yfs_fs_rename, + .success = afs_silly_rename_success, + .edit_dir = afs_silly_rename_edit_dir, +}; + +/* + * Actually perform the silly rename step. + */ +static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode, + struct dentry *old, struct dentry *new, + struct key *key) +{ + struct afs_operation *op; + + _enter("%pd,%pd", old, new); + + op = afs_alloc_operation(key, dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, dvnode); + afs_op_set_vnode(op, 1, dvnode); + op->file[0].dv_delta = 1; + op->file[1].dv_delta = 1; + op->file[0].modification = true; + op->file[1].modification = true; + op->file[0].update_ctime = true; + op->file[1].update_ctime = true; + + op->dentry = old; + op->dentry_2 = new; + op->ops = &afs_silly_rename_operation; + + trace_afs_silly_rename(vnode, false); + return afs_do_sync_operation(op); +} + +/** + * afs_sillyrename - Perform a silly-rename of a dentry + * + * AFS is stateless and the server doesn't know when the client is holding a + * file open. To prevent application problems when a file is unlinked while + * it's still open, the client performs a "silly-rename". That is, it renames + * the file to a hidden file in the same directory, and only performs the + * unlink once the last reference to it is put. + * + * The final cleanup is done during dentry_iput. + */ +int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode, + struct dentry *dentry, struct key *key) +{ + static unsigned int sillycounter; + struct dentry *sdentry = NULL; + unsigned char silly[16]; + int ret = -EBUSY; + + _enter(""); + + /* We don't allow a dentry to be silly-renamed twice. */ + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + return -EBUSY; + + sdentry = NULL; + do { + int slen; + + dput(sdentry); + sillycounter++; + + /* Create a silly name. Note that the ".__afs" prefix is + * understood by the salvager and must not be changed. + */ + slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter); + sdentry = lookup_one_len(silly, dentry->d_parent, slen); + + /* N.B. Better to return EBUSY here ... it could be dangerous + * to delete the file while it's in use. + */ + if (IS_ERR(sdentry)) + goto out; + } while (!d_is_negative(sdentry)); + + ihold(&vnode->vfs_inode); + + ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key); + switch (ret) { + case 0: + /* The rename succeeded. */ + set_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags); + d_move(dentry, sdentry); + break; + case -ERESTARTSYS: + /* The result of the rename is unknown. Play it safe by forcing + * a new lookup. + */ + d_drop(dentry); + d_drop(sdentry); + } + + iput(&vnode->vfs_inode); + dput(sdentry); +out: + _leave(" = %d", ret); + return ret; +} + +static void afs_silly_unlink_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + afs_check_dir_conflict(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[1]); + afs_update_dentry_version(op, &op->file[0], op->dentry); +} + +static void afs_silly_unlink_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode *dvnode = dvp->vnode; + + _enter("op=%08x", op->debug_id); + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) + afs_edit_dir_remove(dvnode, &op->dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); +} + +static const struct afs_operation_ops afs_silly_unlink_operation = { + .issue_afs_rpc = afs_fs_remove_file, + .issue_yfs_rpc = yfs_fs_remove_file, + .success = afs_silly_unlink_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_silly_unlink_edit_dir, +}; + +/* + * Tell the server to remove a sillyrename file. + */ +static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode, + struct dentry *dentry, struct key *key) +{ + struct afs_operation *op; + + _enter(""); + + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, dvnode); + afs_op_set_vnode(op, 1, vnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + op->file[1].op_unlinked = true; + op->file[1].update_ctime = true; + + op->dentry = dentry; + op->ops = &afs_silly_unlink_operation; + + trace_afs_silly_rename(vnode, true); + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + + /* If there was a conflict with a third party, check the status of the + * unlinked vnode. + */ + if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) { + op->file[1].update_ctime = false; + op->fetch_status.which = 1; + op->ops = &afs_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + } + + return afs_put_operation(op); +} + +/* + * Remove sillyrename file on iput. + */ +int afs_silly_iput(struct dentry *dentry, struct inode *inode) +{ + struct afs_vnode *dvnode = AFS_FS_I(d_inode(dentry->d_parent)); + struct afs_vnode *vnode = AFS_FS_I(inode); + struct dentry *alias; + int ret; + + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + + _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode); + + down_read(&dvnode->rmdir_lock); + + alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq); + if (IS_ERR(alias)) { + up_read(&dvnode->rmdir_lock); + return 0; + } + + if (!d_in_lookup(alias)) { + /* We raced with lookup... See if we need to transfer the + * sillyrename information to the aliased dentry. + */ + ret = 0; + spin_lock(&alias->d_lock); + if (d_really_is_positive(alias) && + !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { + alias->d_flags |= DCACHE_NFSFS_RENAMED; + ret = 1; + } + spin_unlock(&alias->d_lock); + up_read(&dvnode->rmdir_lock); + dput(alias); + return ret; + } + + /* Stop lock-release from complaining. */ + spin_lock(&vnode->lock); + vnode->lock_state = AFS_VNODE_LOCK_DELETED; + trace_afs_flock_ev(vnode, NULL, afs_flock_silly_delete, 0); + spin_unlock(&vnode->lock); + + afs_do_silly_unlink(dvnode, vnode, dentry, dvnode->silly_key); + up_read(&dvnode->rmdir_lock); + d_lookup_done(alias); + dput(alias); + return 1; +} diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c new file mode 100644 index 000000000..96b404d9e --- /dev/null +++ b/fs/afs/dynroot.c @@ -0,0 +1,396 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS dynamic root handling + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/dns_resolver.h> +#include "internal.h" + +static atomic_t afs_autocell_ino; + +/* + * iget5() comparator for inode created by autocell operations + * + * These pseudo inodes don't match anything. + */ +static int afs_iget5_pseudo_test(struct inode *inode, void *opaque) +{ + return 0; +} + +/* + * iget5() inode initialiser + */ +static int afs_iget5_pseudo_set(struct inode *inode, void *opaque) +{ + struct afs_super_info *as = AFS_FS_S(inode->i_sb); + struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_fid *fid = opaque; + + vnode->volume = as->volume; + vnode->fid = *fid; + inode->i_ino = fid->vnode; + inode->i_generation = fid->unique; + return 0; +} + +/* + * Create an inode for a dynamic root directory or an autocell dynamic + * automount dir. + */ +struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) +{ + struct afs_super_info *as = AFS_FS_S(sb); + struct afs_vnode *vnode; + struct inode *inode; + struct afs_fid fid = {}; + + _enter(""); + + if (as->volume) + fid.vid = as->volume->vid; + if (root) { + fid.vnode = 1; + fid.unique = 1; + } else { + fid.vnode = atomic_inc_return(&afs_autocell_ino); + fid.unique = 0; + } + + inode = iget5_locked(sb, fid.vnode, + afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); + if (!inode) { + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } + + _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }", + inode, inode->i_ino, fid.vid, fid.vnode, fid.unique); + + vnode = AFS_FS_I(inode); + + /* there shouldn't be an existing inode */ + BUG_ON(!(inode->i_state & I_NEW)); + + inode->i_size = 0; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + if (root) { + inode->i_op = &afs_dynroot_inode_operations; + inode->i_fop = &simple_dir_operations; + } else { + inode->i_op = &afs_autocell_inode_operations; + } + set_nlink(inode, 2); + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode); + inode->i_blocks = 0; + inode->i_generation = 0; + + set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); + if (!root) { + set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + inode->i_flags |= S_AUTOMOUNT; + } + + inode->i_flags |= S_NOATIME; + unlock_new_inode(inode); + _leave(" = %p", inode); + return inode; +} + +/* + * Probe to see if a cell may exist. This prevents positive dentries from + * being created unnecessarily. + */ +static int afs_probe_cell_name(struct dentry *dentry) +{ + struct afs_cell *cell; + struct afs_net *net = afs_d2net(dentry); + const char *name = dentry->d_name.name; + size_t len = dentry->d_name.len; + char *result = NULL; + int ret; + + /* Names prefixed with a dot are R/W mounts. */ + if (name[0] == '.') { + if (len == 1) + return -EINVAL; + name++; + len--; + } + + cell = afs_find_cell(net, name, len, afs_cell_trace_use_probe); + if (!IS_ERR(cell)) { + afs_unuse_cell(net, cell, afs_cell_trace_unuse_probe); + return 0; + } + + ret = dns_query(net->net, "afsdb", name, len, "srv=1", + &result, NULL, false); + if (ret == -ENODATA || ret == -ENOKEY || ret == 0) + ret = -ENOENT; + if (ret > 0 && ret >= sizeof(struct dns_server_list_v1_header)) { + struct dns_server_list_v1_header *v1 = (void *)result; + + if (v1->hdr.zero == 0 && + v1->hdr.content == DNS_PAYLOAD_IS_SERVER_LIST && + v1->hdr.version == 1 && + (v1->status != DNS_LOOKUP_GOOD && + v1->status != DNS_LOOKUP_GOOD_WITH_BAD)) + return -ENOENT; + + } + + kfree(result); + return ret; +} + +/* + * Try to auto mount the mountpoint with pseudo directory, if the autocell + * operation is setted. + */ +struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir) +{ + struct afs_vnode *vnode = AFS_FS_I(dir); + struct inode *inode; + int ret = -ENOENT; + + _enter("%p{%pd}, {%llx:%llu}", + dentry, dentry, vnode->fid.vid, vnode->fid.vnode); + + if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) + goto out; + + ret = afs_probe_cell_name(dentry); + if (ret < 0) + goto out; + + inode = afs_iget_pseudo_dir(dir->i_sb, false); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + + _leave("= %p", inode); + return inode; + +out: + _leave("= %d", ret); + return ret == -ENOENT ? NULL : ERR_PTR(ret); +} + +/* + * Look up @cell in a dynroot directory. This is a substitution for the + * local cell name for the net namespace. + */ +static struct dentry *afs_lookup_atcell(struct dentry *dentry) +{ + struct afs_cell *cell; + struct afs_net *net = afs_d2net(dentry); + struct dentry *ret; + char *name; + int len; + + if (!net->ws_cell) + return ERR_PTR(-ENOENT); + + ret = ERR_PTR(-ENOMEM); + name = kmalloc(AFS_MAXCELLNAME + 1, GFP_KERNEL); + if (!name) + goto out_p; + + down_read(&net->cells_lock); + cell = net->ws_cell; + if (cell) { + len = cell->name_len; + memcpy(name, cell->name, len + 1); + } + up_read(&net->cells_lock); + + ret = ERR_PTR(-ENOENT); + if (!cell) + goto out_n; + + ret = lookup_one_len(name, dentry->d_parent, len); + + /* We don't want to d_add() the @cell dentry here as we don't want to + * the cached dentry to hide changes to the local cell name. + */ + +out_n: + kfree(name); +out_p: + return ret; +} + +/* + * Look up an entry in a dynroot directory. + */ +static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + _enter("%pd", dentry); + + ASSERTCMP(d_inode(dentry), ==, NULL); + + if (flags & LOOKUP_CREATE) + return ERR_PTR(-EOPNOTSUPP); + + if (dentry->d_name.len >= AFSNAMEMAX) { + _leave(" = -ENAMETOOLONG"); + return ERR_PTR(-ENAMETOOLONG); + } + + if (dentry->d_name.len == 5 && + memcmp(dentry->d_name.name, "@cell", 5) == 0) + return afs_lookup_atcell(dentry); + + return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry); +} + +const struct inode_operations afs_dynroot_inode_operations = { + .lookup = afs_dynroot_lookup, +}; + +/* + * Dirs in the dynamic root don't need revalidation. + */ +static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 1; +} + +const struct dentry_operations afs_dynroot_dentry_operations = { + .d_revalidate = afs_dynroot_d_revalidate, + .d_delete = always_delete_dentry, + .d_release = afs_d_release, + .d_automount = afs_d_automount, +}; + +/* + * Create a manually added cell mount directory. + * - The caller must hold net->proc_cells_lock + */ +int afs_dynroot_mkdir(struct afs_net *net, struct afs_cell *cell) +{ + struct super_block *sb = net->dynroot_sb; + struct dentry *root, *subdir; + int ret; + + if (!sb || atomic_read(&sb->s_active) == 0) + return 0; + + /* Let the ->lookup op do the creation */ + root = sb->s_root; + inode_lock(root->d_inode); + subdir = lookup_one_len(cell->name, root, cell->name_len); + if (IS_ERR(subdir)) { + ret = PTR_ERR(subdir); + goto unlock; + } + + /* Note that we're retaining an extra ref on the dentry */ + subdir->d_fsdata = (void *)1UL; + ret = 0; +unlock: + inode_unlock(root->d_inode); + return ret; +} + +/* + * Remove a manually added cell mount directory. + * - The caller must hold net->proc_cells_lock + */ +void afs_dynroot_rmdir(struct afs_net *net, struct afs_cell *cell) +{ + struct super_block *sb = net->dynroot_sb; + struct dentry *root, *subdir; + + if (!sb || atomic_read(&sb->s_active) == 0) + return; + + root = sb->s_root; + inode_lock(root->d_inode); + + /* Don't want to trigger a lookup call, which will re-add the cell */ + subdir = try_lookup_one_len(cell->name, root, cell->name_len); + if (IS_ERR_OR_NULL(subdir)) { + _debug("lookup %ld", PTR_ERR(subdir)); + goto no_dentry; + } + + _debug("rmdir %pd %u", subdir, d_count(subdir)); + + if (subdir->d_fsdata) { + _debug("unpin %u", d_count(subdir)); + subdir->d_fsdata = NULL; + dput(subdir); + } + dput(subdir); +no_dentry: + inode_unlock(root->d_inode); + _leave(""); +} + +/* + * Populate a newly created dynamic root with cell names. + */ +int afs_dynroot_populate(struct super_block *sb) +{ + struct afs_cell *cell; + struct afs_net *net = afs_sb2net(sb); + int ret; + + mutex_lock(&net->proc_cells_lock); + + net->dynroot_sb = sb; + hlist_for_each_entry(cell, &net->proc_cells, proc_link) { + ret = afs_dynroot_mkdir(net, cell); + if (ret < 0) + goto error; + } + + ret = 0; +out: + mutex_unlock(&net->proc_cells_lock); + return ret; + +error: + net->dynroot_sb = NULL; + goto out; +} + +/* + * When a dynamic root that's in the process of being destroyed, depopulate it + * of pinned directories. + */ +void afs_dynroot_depopulate(struct super_block *sb) +{ + struct afs_net *net = afs_sb2net(sb); + struct dentry *root = sb->s_root, *subdir, *tmp; + + /* Prevent more subdirs from being created */ + mutex_lock(&net->proc_cells_lock); + if (net->dynroot_sb == sb) + net->dynroot_sb = NULL; + mutex_unlock(&net->proc_cells_lock); + + if (root) { + inode_lock(root->d_inode); + + /* Remove all the pins for dirs created for manually added cells */ + list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) { + if (subdir->d_fsdata) { + subdir->d_fsdata = NULL; + dput(subdir); + } + } + + inode_unlock(root->d_inode); + } +} diff --git a/fs/afs/file.c b/fs/afs/file.c new file mode 100644 index 000000000..960b64268 --- /dev/null +++ b/fs/afs/file.c @@ -0,0 +1,733 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS filesystem file handling + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/writeback.h> +#include <linux/gfp.h> +#include <linux/task_io_accounting_ops.h> +#include <linux/mm.h> +#include "internal.h" + +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); +static int afs_readpage(struct file *file, struct page *page); +static void afs_invalidatepage(struct page *page, unsigned int offset, + unsigned int length); +static int afs_releasepage(struct page *page, gfp_t gfp_flags); + +static int afs_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages); + +const struct file_operations afs_file_operations = { + .open = afs_open, + .release = afs_release, + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + .write_iter = afs_file_write, + .mmap = afs_file_mmap, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, + .fsync = afs_fsync, + .lock = afs_lock, + .flock = afs_flock, +}; + +const struct inode_operations afs_file_inode_operations = { + .getattr = afs_getattr, + .setattr = afs_setattr, + .permission = afs_permission, +}; + +const struct address_space_operations afs_fs_aops = { + .readpage = afs_readpage, + .readpages = afs_readpages, + .set_page_dirty = afs_set_page_dirty, + .launder_page = afs_launder_page, + .releasepage = afs_releasepage, + .invalidatepage = afs_invalidatepage, + .write_begin = afs_write_begin, + .write_end = afs_write_end, + .writepage = afs_writepage, + .writepages = afs_writepages, +}; + +static const struct vm_operations_struct afs_vm_ops = { + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = afs_page_mkwrite, +}; + +/* + * Discard a pin on a writeback key. + */ +void afs_put_wb_key(struct afs_wb_key *wbk) +{ + if (wbk && refcount_dec_and_test(&wbk->usage)) { + key_put(wbk->key); + kfree(wbk); + } +} + +/* + * Cache key for writeback. + */ +int afs_cache_wb_key(struct afs_vnode *vnode, struct afs_file *af) +{ + struct afs_wb_key *wbk, *p; + + wbk = kzalloc(sizeof(struct afs_wb_key), GFP_KERNEL); + if (!wbk) + return -ENOMEM; + refcount_set(&wbk->usage, 2); + wbk->key = af->key; + + spin_lock(&vnode->wb_lock); + list_for_each_entry(p, &vnode->wb_keys, vnode_link) { + if (p->key == wbk->key) + goto found; + } + + key_get(wbk->key); + list_add_tail(&wbk->vnode_link, &vnode->wb_keys); + spin_unlock(&vnode->wb_lock); + af->wb = wbk; + return 0; + +found: + refcount_inc(&p->usage); + spin_unlock(&vnode->wb_lock); + af->wb = p; + kfree(wbk); + return 0; +} + +/* + * open an AFS file or directory and attach a key to it + */ +int afs_open(struct inode *inode, struct file *file) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_file *af; + struct key *key; + int ret; + + _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode); + + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto error; + } + + af = kzalloc(sizeof(*af), GFP_KERNEL); + if (!af) { + ret = -ENOMEM; + goto error_key; + } + af->key = key; + + ret = afs_validate(vnode, key); + if (ret < 0) + goto error_af; + + if (file->f_mode & FMODE_WRITE) { + ret = afs_cache_wb_key(vnode, af); + if (ret < 0) + goto error_af; + } + + if (file->f_flags & O_TRUNC) + set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); + + file->private_data = af; + _leave(" = 0"); + return 0; + +error_af: + kfree(af); +error_key: + key_put(key); +error: + _leave(" = %d", ret); + return ret; +} + +/* + * release an AFS file or directory and discard its key + */ +int afs_release(struct inode *inode, struct file *file) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_file *af = file->private_data; + int ret = 0; + + _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode); + + if ((file->f_mode & FMODE_WRITE)) + ret = vfs_fsync(file, 0); + + file->private_data = NULL; + if (af->wb) + afs_put_wb_key(af->wb); + key_put(af->key); + kfree(af); + afs_prune_wb_keys(vnode); + _leave(" = %d", ret); + return ret; +} + +/* + * Dispose of a ref to a read record. + */ +void afs_put_read(struct afs_read *req) +{ + int i; + + if (refcount_dec_and_test(&req->usage)) { + if (req->pages) { + for (i = 0; i < req->nr_pages; i++) + if (req->pages[i]) + put_page(req->pages[i]); + if (req->pages != req->array) + kfree(req->pages); + } + kfree(req); + } +} + +#ifdef CONFIG_AFS_FSCACHE +/* + * deal with notification that a page was read from the cache + */ +static void afs_file_readpage_read_complete(struct page *page, + void *data, + int error) +{ + _enter("%p,%p,%d", page, data, error); + + /* if the read completes with an error, we just unlock the page and let + * the VM reissue the readpage */ + if (!error) + SetPageUptodate(page); + unlock_page(page); +} +#endif + +static void afs_fetch_data_success(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + + _enter("op=%08x", op->debug_id); + afs_vnode_commit_status(op, &op->file[0]); + afs_stat_v(vnode, n_fetches); + atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes); +} + +static void afs_fetch_data_put(struct afs_operation *op) +{ + afs_put_read(op->fetch.req); +} + +static const struct afs_operation_ops afs_fetch_data_operation = { + .issue_afs_rpc = afs_fs_fetch_data, + .issue_yfs_rpc = yfs_fs_fetch_data, + .success = afs_fetch_data_success, + .aborted = afs_check_for_remote_deletion, + .put = afs_fetch_data_put, +}; + +/* + * Fetch file data from the volume. + */ +int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *req) +{ + struct afs_operation *op; + + _enter("%s{%llx:%llu.%u},%x,,,", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + op = afs_alloc_operation(key, vnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, vnode); + + op->fetch.req = afs_get_read(req); + op->ops = &afs_fetch_data_operation; + return afs_do_sync_operation(op); +} + +/* + * read page from file, directory or symlink, given a key to use + */ +int afs_page_filler(void *data, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_read *req; + struct key *key = data; + int ret; + + _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); + + BUG_ON(!PageLocked(page)); + + ret = -ESTALE; + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + goto error; + + /* is it cached? */ +#ifdef CONFIG_AFS_FSCACHE + ret = fscache_read_or_alloc_page(vnode->cache, + page, + afs_file_readpage_read_complete, + NULL, + GFP_KERNEL); +#else + ret = -ENOBUFS; +#endif + switch (ret) { + /* read BIO submitted (page in cache) */ + case 0: + break; + + /* page not yet cached */ + case -ENODATA: + _debug("cache said ENODATA"); + goto go_on; + + /* page will not be cached */ + case -ENOBUFS: + _debug("cache said ENOBUFS"); + + fallthrough; + default: + go_on: + req = kzalloc(struct_size(req, array, 1), GFP_KERNEL); + if (!req) + goto enomem; + + /* We request a full page. If the page is a partial one at the + * end of the file, the server will return a short read and the + * unmarshalling code will clear the unfilled space. + */ + refcount_set(&req->usage, 1); + req->pos = (loff_t)page->index << PAGE_SHIFT; + req->len = PAGE_SIZE; + req->nr_pages = 1; + req->pages = req->array; + req->pages[0] = page; + get_page(page); + + /* read the contents of the file from the server into the + * page */ + ret = afs_fetch_data(vnode, key, req); + afs_put_read(req); + + if (ret < 0) { + if (ret == -ENOENT) { + _debug("got NOENT from server" + " - marking file deleted and stale"); + set_bit(AFS_VNODE_DELETED, &vnode->flags); + ret = -ESTALE; + } + +#ifdef CONFIG_AFS_FSCACHE + fscache_uncache_page(vnode->cache, page); +#endif + BUG_ON(PageFsCache(page)); + + if (ret == -EINTR || + ret == -ENOMEM || + ret == -ERESTARTSYS || + ret == -EAGAIN) + goto error; + goto io_error; + } + + SetPageUptodate(page); + + /* send the page to the cache */ +#ifdef CONFIG_AFS_FSCACHE + if (PageFsCache(page) && + fscache_write_page(vnode->cache, page, vnode->status.size, + GFP_KERNEL) != 0) { + fscache_uncache_page(vnode->cache, page); + BUG_ON(PageFsCache(page)); + } +#endif + unlock_page(page); + } + + _leave(" = 0"); + return 0; + +io_error: + SetPageError(page); + goto error; +enomem: + ret = -ENOMEM; +error: + unlock_page(page); + _leave(" = %d", ret); + return ret; +} + +/* + * read page from file, directory or symlink, given a file to nominate the key + * to be used + */ +static int afs_readpage(struct file *file, struct page *page) +{ + struct key *key; + int ret; + + if (file) { + key = afs_file_key(file); + ASSERT(key != NULL); + ret = afs_page_filler(key, page); + } else { + struct inode *inode = page->mapping->host; + key = afs_request_key(AFS_FS_S(inode->i_sb)->cell); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + } else { + ret = afs_page_filler(key, page); + key_put(key); + } + } + return ret; +} + +/* + * Make pages available as they're filled. + */ +static void afs_readpages_page_done(struct afs_read *req) +{ +#ifdef CONFIG_AFS_FSCACHE + struct afs_vnode *vnode = req->vnode; +#endif + struct page *page = req->pages[req->index]; + + req->pages[req->index] = NULL; + SetPageUptodate(page); + + /* send the page to the cache */ +#ifdef CONFIG_AFS_FSCACHE + if (PageFsCache(page) && + fscache_write_page(vnode->cache, page, vnode->status.size, + GFP_KERNEL) != 0) { + fscache_uncache_page(vnode->cache, page); + BUG_ON(PageFsCache(page)); + } +#endif + unlock_page(page); + put_page(page); +} + +/* + * Read a contiguous set of pages. + */ +static int afs_readpages_one(struct file *file, struct address_space *mapping, + struct list_head *pages) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct afs_read *req; + struct list_head *p; + struct page *first, *page; + struct key *key = afs_file_key(file); + pgoff_t index; + int ret, n, i; + + /* Count the number of contiguous pages at the front of the list. Note + * that the list goes prev-wards rather than next-wards. + */ + first = lru_to_page(pages); + index = first->index + 1; + n = 1; + for (p = first->lru.prev; p != pages; p = p->prev) { + page = list_entry(p, struct page, lru); + if (page->index != index) + break; + index++; + n++; + } + + req = kzalloc(struct_size(req, array, n), GFP_NOFS); + if (!req) + return -ENOMEM; + + refcount_set(&req->usage, 1); + req->vnode = vnode; + req->page_done = afs_readpages_page_done; + req->pos = first->index; + req->pos <<= PAGE_SHIFT; + req->pages = req->array; + + /* Transfer the pages to the request. We add them in until one fails + * to add to the LRU and then we stop (as that'll make a hole in the + * contiguous run. + * + * Note that it's possible for the file size to change whilst we're + * doing this, but we rely on the server returning less than we asked + * for if the file shrank. We also rely on this to deal with a partial + * page at the end of the file. + */ + do { + page = lru_to_page(pages); + list_del(&page->lru); + index = page->index; + if (add_to_page_cache_lru(page, mapping, index, + readahead_gfp_mask(mapping))) { +#ifdef CONFIG_AFS_FSCACHE + fscache_uncache_page(vnode->cache, page); +#endif + put_page(page); + break; + } + + req->pages[req->nr_pages++] = page; + req->len += PAGE_SIZE; + } while (req->nr_pages < n); + + if (req->nr_pages == 0) { + kfree(req); + return 0; + } + + ret = afs_fetch_data(vnode, key, req); + if (ret < 0) + goto error; + + task_io_account_read(PAGE_SIZE * req->nr_pages); + afs_put_read(req); + return 0; + +error: + if (ret == -ENOENT) { + _debug("got NOENT from server" + " - marking file deleted and stale"); + set_bit(AFS_VNODE_DELETED, &vnode->flags); + ret = -ESTALE; + } + + for (i = 0; i < req->nr_pages; i++) { + page = req->pages[i]; + if (page) { +#ifdef CONFIG_AFS_FSCACHE + fscache_uncache_page(vnode->cache, page); +#endif + SetPageError(page); + unlock_page(page); + } + } + + afs_put_read(req); + return ret; +} + +/* + * read a set of pages + */ +static int afs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + struct key *key = afs_file_key(file); + struct afs_vnode *vnode; + int ret = 0; + + _enter("{%d},{%lu},,%d", + key_serial(key), mapping->host->i_ino, nr_pages); + + ASSERT(key != NULL); + + vnode = AFS_FS_I(mapping->host); + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + _leave(" = -ESTALE"); + return -ESTALE; + } + + /* attempt to read as many of the pages as possible */ +#ifdef CONFIG_AFS_FSCACHE + ret = fscache_read_or_alloc_pages(vnode->cache, + mapping, + pages, + &nr_pages, + afs_file_readpage_read_complete, + NULL, + mapping_gfp_mask(mapping)); +#else + ret = -ENOBUFS; +#endif + + switch (ret) { + /* all pages are being read from the cache */ + case 0: + BUG_ON(!list_empty(pages)); + BUG_ON(nr_pages != 0); + _leave(" = 0 [reading all]"); + return 0; + + /* there were pages that couldn't be read from the cache */ + case -ENODATA: + case -ENOBUFS: + break; + + /* other error */ + default: + _leave(" = %d", ret); + return ret; + } + + while (!list_empty(pages)) { + ret = afs_readpages_one(file, mapping, pages); + if (ret < 0) + break; + } + + _leave(" = %d [netting]", ret); + return ret; +} + +/* + * Adjust the dirty region of the page on truncation or full invalidation, + * getting rid of the markers altogether if the region is entirely invalidated. + */ +static void afs_invalidate_dirty(struct page *page, unsigned int offset, + unsigned int length) +{ + struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + unsigned long priv; + unsigned int f, t, end = offset + length; + + priv = page_private(page); + + /* we clean up only if the entire page is being invalidated */ + if (offset == 0 && length == thp_size(page)) + goto full_invalidate; + + /* If the page was dirtied by page_mkwrite(), the PTE stays writable + * and we don't get another notification to tell us to expand it + * again. + */ + if (afs_is_page_dirty_mmapped(priv)) + return; + + /* We may need to shorten the dirty region */ + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); + + if (t <= offset || f >= end) + return; /* Doesn't overlap */ + + if (f < offset && t > end) + return; /* Splits the dirty region - just absorb it */ + + if (f >= offset && t <= end) + goto undirty; + + if (f < offset) + t = offset; + else + f = end; + if (f == t) + goto undirty; + + priv = afs_page_dirty(f, t); + set_page_private(page, priv); + trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page->index, priv); + return; + +undirty: + trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page->index, priv); + clear_page_dirty_for_io(page); +full_invalidate: + priv = (unsigned long)detach_page_private(page); + trace_afs_page_dirty(vnode, tracepoint_string("inval"), page->index, priv); +} + +/* + * invalidate part or all of a page + * - release a page and clean up its private data if offset is 0 (indicating + * the entire page) + */ +static void afs_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) +{ + _enter("{%lu},%u,%u", page->index, offset, length); + + BUG_ON(!PageLocked(page)); + +#ifdef CONFIG_AFS_FSCACHE + /* we clean up only if the entire page is being invalidated */ + if (offset == 0 && length == PAGE_SIZE) { + if (PageFsCache(page)) { + struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + fscache_wait_on_page_write(vnode->cache, page); + fscache_uncache_page(vnode->cache, page); + } + } +#endif + + if (PagePrivate(page)) + afs_invalidate_dirty(page, offset, length); + + _leave(""); +} + +/* + * release a page and clean up its private state if it's not busy + * - return true if the page can now be released, false if not + */ +static int afs_releasepage(struct page *page, gfp_t gfp_flags) +{ + struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + unsigned long priv; + + _enter("{{%llx:%llu}[%lu],%lx},%x", + vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, + gfp_flags); + + /* deny if page is being written to the cache and the caller hasn't + * elected to wait */ +#ifdef CONFIG_AFS_FSCACHE + if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) { + _leave(" = F [cache busy]"); + return 0; + } +#endif + + if (PagePrivate(page)) { + priv = (unsigned long)detach_page_private(page); + trace_afs_page_dirty(vnode, tracepoint_string("rel"), + page->index, priv); + } + + /* indicate that the page can be released */ + _leave(" = T"); + return 1; +} + +/* + * Handle setting up a memory mapping on an AFS file. + */ +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int ret; + + ret = generic_file_mmap(file, vma); + if (ret == 0) + vma->vm_ops = &afs_vm_ops; + return ret; +} diff --git a/fs/afs/flock.c b/fs/afs/flock.c new file mode 100644 index 000000000..466ad609f --- /dev/null +++ b/fs/afs/flock.c @@ -0,0 +1,881 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS file locking support + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include "internal.h" + +#define AFS_LOCK_GRANTED 0 +#define AFS_LOCK_PENDING 1 +#define AFS_LOCK_YOUR_TRY 2 + +struct workqueue_struct *afs_lock_manager; + +static void afs_next_locker(struct afs_vnode *vnode, int error); +static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl); +static void afs_fl_release_private(struct file_lock *fl); + +static const struct file_lock_operations afs_lock_ops = { + .fl_copy_lock = afs_fl_copy_lock, + .fl_release_private = afs_fl_release_private, +}; + +static inline void afs_set_lock_state(struct afs_vnode *vnode, enum afs_lock_state state) +{ + _debug("STATE %u -> %u", vnode->lock_state, state); + vnode->lock_state = state; +} + +static atomic_t afs_file_lock_debug_id; + +/* + * if the callback is broken on this vnode, then the lock may now be available + */ +void afs_lock_may_be_available(struct afs_vnode *vnode) +{ + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); + + spin_lock(&vnode->lock); + if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) + afs_next_locker(vnode, 0); + trace_afs_flock_ev(vnode, NULL, afs_flock_callback_break, 0); + spin_unlock(&vnode->lock); +} + +/* + * the lock will time out in 5 minutes unless we extend it, so schedule + * extension in a bit less than that time + */ +static void afs_schedule_lock_extension(struct afs_vnode *vnode) +{ + ktime_t expires_at, now, duration; + u64 duration_j; + + expires_at = ktime_add_ms(vnode->locked_at, AFS_LOCKWAIT * 1000 / 2); + now = ktime_get_real(); + duration = ktime_sub(expires_at, now); + if (duration <= 0) + duration_j = 0; + else + duration_j = nsecs_to_jiffies(ktime_to_ns(duration)); + + queue_delayed_work(afs_lock_manager, &vnode->lock_work, duration_j); +} + +/* + * In the case of successful completion of a lock operation, record the time + * the reply appeared and start the lock extension timer. + */ +void afs_lock_op_done(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode *vnode = op->file[0].vnode; + + if (call->error == 0) { + spin_lock(&vnode->lock); + trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0); + vnode->locked_at = call->issue_time; + afs_schedule_lock_extension(vnode); + spin_unlock(&vnode->lock); + } +} + +/* + * grant one or more locks (readlocks are allowed to jump the queue if the + * first lock in the queue is itself a readlock) + * - the caller must hold the vnode lock + */ +static void afs_grant_locks(struct afs_vnode *vnode) +{ + struct file_lock *p, *_p; + bool exclusive = (vnode->lock_type == AFS_LOCK_WRITE); + + list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) { + if (!exclusive && p->fl_type == F_WRLCK) + continue; + + list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks); + p->fl_u.afs.state = AFS_LOCK_GRANTED; + trace_afs_flock_op(vnode, p, afs_flock_op_grant); + wake_up(&p->fl_wait); + } +} + +/* + * If an error is specified, reject every pending lock that matches the + * authentication and type of the lock we failed to get. If there are any + * remaining lockers, try to wake up one of them to have a go. + */ +static void afs_next_locker(struct afs_vnode *vnode, int error) +{ + struct file_lock *p, *_p, *next = NULL; + struct key *key = vnode->lock_key; + unsigned int fl_type = F_RDLCK; + + _enter(""); + + if (vnode->lock_type == AFS_LOCK_WRITE) + fl_type = F_WRLCK; + + list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) { + if (error && + p->fl_type == fl_type && + afs_file_key(p->fl_file) == key) { + list_del_init(&p->fl_u.afs.link); + p->fl_u.afs.state = error; + wake_up(&p->fl_wait); + } + + /* Select the next locker to hand off to. */ + if (next && + (next->fl_type == F_WRLCK || p->fl_type == F_RDLCK)) + continue; + next = p; + } + + vnode->lock_key = NULL; + key_put(key); + + if (next) { + afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING); + next->fl_u.afs.state = AFS_LOCK_YOUR_TRY; + trace_afs_flock_op(vnode, next, afs_flock_op_wake); + wake_up(&next->fl_wait); + } else { + afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE); + trace_afs_flock_ev(vnode, NULL, afs_flock_no_lockers, 0); + } + + _leave(""); +} + +/* + * Kill off all waiters in the the pending lock queue due to the vnode being + * deleted. + */ +static void afs_kill_lockers_enoent(struct afs_vnode *vnode) +{ + struct file_lock *p; + + afs_set_lock_state(vnode, AFS_VNODE_LOCK_DELETED); + + while (!list_empty(&vnode->pending_locks)) { + p = list_entry(vnode->pending_locks.next, + struct file_lock, fl_u.afs.link); + list_del_init(&p->fl_u.afs.link); + p->fl_u.afs.state = -ENOENT; + wake_up(&p->fl_wait); + } + + key_put(vnode->lock_key); + vnode->lock_key = NULL; +} + +static void afs_lock_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + afs_vnode_commit_status(op, &op->file[0]); +} + +static const struct afs_operation_ops afs_set_lock_operation = { + .issue_afs_rpc = afs_fs_set_lock, + .issue_yfs_rpc = yfs_fs_set_lock, + .success = afs_lock_success, + .aborted = afs_check_for_remote_deletion, +}; + +/* + * Get a lock on a file + */ +static int afs_set_lock(struct afs_vnode *vnode, struct key *key, + afs_lock_type_t type) +{ + struct afs_operation *op; + + _enter("%s{%llx:%llu.%u},%x,%u", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key), type); + + op = afs_alloc_operation(key, vnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, vnode); + + op->lock.type = type; + op->ops = &afs_set_lock_operation; + return afs_do_sync_operation(op); +} + +static const struct afs_operation_ops afs_extend_lock_operation = { + .issue_afs_rpc = afs_fs_extend_lock, + .issue_yfs_rpc = yfs_fs_extend_lock, + .success = afs_lock_success, +}; + +/* + * Extend a lock on a file + */ +static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) +{ + struct afs_operation *op; + + _enter("%s{%llx:%llu.%u},%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + op = afs_alloc_operation(key, vnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, vnode); + + op->flags |= AFS_OPERATION_UNINTR; + op->ops = &afs_extend_lock_operation; + return afs_do_sync_operation(op); +} + +static const struct afs_operation_ops afs_release_lock_operation = { + .issue_afs_rpc = afs_fs_release_lock, + .issue_yfs_rpc = yfs_fs_release_lock, + .success = afs_lock_success, +}; + +/* + * Release a lock on a file + */ +static int afs_release_lock(struct afs_vnode *vnode, struct key *key) +{ + struct afs_operation *op; + + _enter("%s{%llx:%llu.%u},%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + op = afs_alloc_operation(key, vnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, vnode); + + op->flags |= AFS_OPERATION_UNINTR; + op->ops = &afs_release_lock_operation; + return afs_do_sync_operation(op); +} + +/* + * do work for a lock, including: + * - probing for a lock we're waiting on but didn't get immediately + * - extending a lock that's close to timing out + */ +void afs_lock_work(struct work_struct *work) +{ + struct afs_vnode *vnode = + container_of(work, struct afs_vnode, lock_work.work); + struct key *key; + int ret; + + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); + + spin_lock(&vnode->lock); + +again: + _debug("wstate %u for %p", vnode->lock_state, vnode); + switch (vnode->lock_state) { + case AFS_VNODE_LOCK_NEED_UNLOCK: + afs_set_lock_state(vnode, AFS_VNODE_LOCK_UNLOCKING); + trace_afs_flock_ev(vnode, NULL, afs_flock_work_unlocking, 0); + spin_unlock(&vnode->lock); + + /* attempt to release the server lock; if it fails, we just + * wait 5 minutes and it'll expire anyway */ + ret = afs_release_lock(vnode, vnode->lock_key); + if (ret < 0 && vnode->lock_state != AFS_VNODE_LOCK_DELETED) { + trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail, + ret); + printk(KERN_WARNING "AFS:" + " Failed to release lock on {%llx:%llx} error %d\n", + vnode->fid.vid, vnode->fid.vnode, ret); + } + + spin_lock(&vnode->lock); + if (ret == -ENOENT) + afs_kill_lockers_enoent(vnode); + else + afs_next_locker(vnode, 0); + spin_unlock(&vnode->lock); + return; + + /* If we've already got a lock, then it must be time to extend that + * lock as AFS locks time out after 5 minutes. + */ + case AFS_VNODE_LOCK_GRANTED: + _debug("extend"); + + ASSERT(!list_empty(&vnode->granted_locks)); + + key = key_get(vnode->lock_key); + afs_set_lock_state(vnode, AFS_VNODE_LOCK_EXTENDING); + trace_afs_flock_ev(vnode, NULL, afs_flock_work_extending, 0); + spin_unlock(&vnode->lock); + + ret = afs_extend_lock(vnode, key); /* RPC */ + key_put(key); + + if (ret < 0) { + trace_afs_flock_ev(vnode, NULL, afs_flock_extend_fail, + ret); + pr_warn("AFS: Failed to extend lock on {%llx:%llx} error %d\n", + vnode->fid.vid, vnode->fid.vnode, ret); + } + + spin_lock(&vnode->lock); + + if (ret == -ENOENT) { + afs_kill_lockers_enoent(vnode); + spin_unlock(&vnode->lock); + return; + } + + if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING) + goto again; + afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED); + + if (ret != 0) + queue_delayed_work(afs_lock_manager, &vnode->lock_work, + HZ * 10); + spin_unlock(&vnode->lock); + _leave(" [ext]"); + return; + + /* If we're waiting for a callback to indicate lock release, we can't + * actually rely on this, so need to recheck at regular intervals. The + * problem is that the server might not notify us if the lock just + * expires (say because a client died) rather than being explicitly + * released. + */ + case AFS_VNODE_LOCK_WAITING_FOR_CB: + _debug("retry"); + afs_next_locker(vnode, 0); + spin_unlock(&vnode->lock); + return; + + case AFS_VNODE_LOCK_DELETED: + afs_kill_lockers_enoent(vnode); + spin_unlock(&vnode->lock); + return; + + default: + /* Looks like a lock request was withdrawn. */ + spin_unlock(&vnode->lock); + _leave(" [no]"); + return; + } +} + +/* + * pass responsibility for the unlocking of a vnode on the server to the + * manager thread, lest a pending signal in the calling thread interrupt + * AF_RXRPC + * - the caller must hold the vnode lock + */ +static void afs_defer_unlock(struct afs_vnode *vnode) +{ + _enter("%u", vnode->lock_state); + + if (list_empty(&vnode->granted_locks) && + (vnode->lock_state == AFS_VNODE_LOCK_GRANTED || + vnode->lock_state == AFS_VNODE_LOCK_EXTENDING)) { + cancel_delayed_work(&vnode->lock_work); + + afs_set_lock_state(vnode, AFS_VNODE_LOCK_NEED_UNLOCK); + trace_afs_flock_ev(vnode, NULL, afs_flock_defer_unlock, 0); + queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0); + } +} + +/* + * Check that our view of the file metadata is up to date and check to see + * whether we think that we have a locking permit. + */ +static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key, + enum afs_flock_mode mode, afs_lock_type_t type) +{ + afs_access_t access; + int ret; + + /* Make sure we've got a callback on this file and that our view of the + * data version is up to date. + */ + ret = afs_validate(vnode, key); + if (ret < 0) + return ret; + + /* Check the permission set to see if we're actually going to be + * allowed to get a lock on this file. + */ + ret = afs_check_permit(vnode, key, &access); + if (ret < 0) + return ret; + + /* At a rough estimation, you need LOCK, WRITE or INSERT perm to + * read-lock a file and WRITE or INSERT perm to write-lock a file. + * + * We can't rely on the server to do this for us since if we want to + * share a read lock that we already have, we won't go the server. + */ + if (type == AFS_LOCK_READ) { + if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK))) + return -EACCES; + } else { + if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE))) + return -EACCES; + } + + return 0; +} + +/* + * request a lock on a file on the server + */ +static int afs_do_setlk(struct file *file, struct file_lock *fl) +{ + struct inode *inode = locks_inode(file); + struct afs_vnode *vnode = AFS_FS_I(inode); + enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode; + afs_lock_type_t type; + struct key *key = afs_file_key(file); + bool partial, no_server_lock = false; + int ret; + + if (mode == afs_flock_mode_unset) + mode = afs_flock_mode_openafs; + + _enter("{%llx:%llu},%llu-%llu,%u,%u", + vnode->fid.vid, vnode->fid.vnode, + fl->fl_start, fl->fl_end, fl->fl_type, mode); + + fl->fl_ops = &afs_lock_ops; + INIT_LIST_HEAD(&fl->fl_u.afs.link); + fl->fl_u.afs.state = AFS_LOCK_PENDING; + + partial = (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX); + type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; + if (mode == afs_flock_mode_write && partial) + type = AFS_LOCK_WRITE; + + ret = afs_do_setlk_check(vnode, key, mode, type); + if (ret < 0) + return ret; + + trace_afs_flock_op(vnode, fl, afs_flock_op_set_lock); + + /* AFS3 protocol only supports full-file locks and doesn't provide any + * method of upgrade/downgrade, so we need to emulate for partial-file + * locks. + * + * The OpenAFS client only gets a server lock for a full-file lock and + * keeps partial-file locks local. Allow this behaviour to be emulated + * (as the default). + */ + if (mode == afs_flock_mode_local || + (partial && mode == afs_flock_mode_openafs)) { + no_server_lock = true; + goto skip_server_lock; + } + + spin_lock(&vnode->lock); + list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks); + + ret = -ENOENT; + if (vnode->lock_state == AFS_VNODE_LOCK_DELETED) + goto error_unlock; + + /* If we've already got a lock on the server then try to move to having + * the VFS grant the requested lock. Note that this means that other + * clients may get starved out. + */ + _debug("try %u", vnode->lock_state); + if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED) { + if (type == AFS_LOCK_READ) { + _debug("instant readlock"); + list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks); + fl->fl_u.afs.state = AFS_LOCK_GRANTED; + goto vnode_is_locked_u; + } + + if (vnode->lock_type == AFS_LOCK_WRITE) { + _debug("instant writelock"); + list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks); + fl->fl_u.afs.state = AFS_LOCK_GRANTED; + goto vnode_is_locked_u; + } + } + + if (vnode->lock_state == AFS_VNODE_LOCK_NONE && + !(fl->fl_flags & FL_SLEEP)) { + ret = -EAGAIN; + if (type == AFS_LOCK_READ) { + if (vnode->status.lock_count == -1) + goto lock_is_contended; /* Write locked */ + } else { + if (vnode->status.lock_count != 0) + goto lock_is_contended; /* Locked */ + } + } + + if (vnode->lock_state != AFS_VNODE_LOCK_NONE) + goto need_to_wait; + +try_to_lock: + /* We don't have a lock on this vnode and we aren't currently waiting + * for one either, so ask the server for a lock. + * + * Note that we need to be careful if we get interrupted by a signal + * after dispatching the request as we may still get the lock, even + * though we don't wait for the reply (it's not too bad a problem - the + * lock will expire in 5 mins anyway). + */ + trace_afs_flock_ev(vnode, fl, afs_flock_try_to_lock, 0); + vnode->lock_key = key_get(key); + vnode->lock_type = type; + afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING); + spin_unlock(&vnode->lock); + + ret = afs_set_lock(vnode, key, type); /* RPC */ + + spin_lock(&vnode->lock); + switch (ret) { + case -EKEYREJECTED: + case -EKEYEXPIRED: + case -EKEYREVOKED: + case -EPERM: + case -EACCES: + fl->fl_u.afs.state = ret; + trace_afs_flock_ev(vnode, fl, afs_flock_fail_perm, ret); + list_del_init(&fl->fl_u.afs.link); + afs_next_locker(vnode, ret); + goto error_unlock; + + case -ENOENT: + fl->fl_u.afs.state = ret; + trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret); + list_del_init(&fl->fl_u.afs.link); + afs_kill_lockers_enoent(vnode); + goto error_unlock; + + default: + fl->fl_u.afs.state = ret; + trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret); + list_del_init(&fl->fl_u.afs.link); + afs_next_locker(vnode, 0); + goto error_unlock; + + case -EWOULDBLOCK: + /* The server doesn't have a lock-waiting queue, so the client + * will have to retry. The server will break the outstanding + * callbacks on a file when a lock is released. + */ + ASSERT(list_empty(&vnode->granted_locks)); + ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link); + goto lock_is_contended; + + case 0: + afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED); + trace_afs_flock_ev(vnode, fl, afs_flock_acquired, type); + afs_grant_locks(vnode); + goto vnode_is_locked_u; + } + +vnode_is_locked_u: + spin_unlock(&vnode->lock); +vnode_is_locked: + /* the lock has been granted by the server... */ + ASSERTCMP(fl->fl_u.afs.state, ==, AFS_LOCK_GRANTED); + +skip_server_lock: + /* ... but the VFS still needs to distribute access on this client. */ + trace_afs_flock_ev(vnode, fl, afs_flock_vfs_locking, 0); + ret = locks_lock_file_wait(file, fl); + trace_afs_flock_ev(vnode, fl, afs_flock_vfs_lock, ret); + if (ret < 0) + goto vfs_rejected_lock; + + /* Again, make sure we've got a callback on this file and, again, make + * sure that our view of the data version is up to date (we ignore + * errors incurred here and deal with the consequences elsewhere). + */ + afs_validate(vnode, key); + _leave(" = 0"); + return 0; + +lock_is_contended: + if (!(fl->fl_flags & FL_SLEEP)) { + list_del_init(&fl->fl_u.afs.link); + afs_next_locker(vnode, 0); + ret = -EAGAIN; + goto error_unlock; + } + + afs_set_lock_state(vnode, AFS_VNODE_LOCK_WAITING_FOR_CB); + trace_afs_flock_ev(vnode, fl, afs_flock_would_block, ret); + queue_delayed_work(afs_lock_manager, &vnode->lock_work, HZ * 5); + +need_to_wait: + /* We're going to have to wait. Either this client doesn't have a lock + * on the server yet and we need to wait for a callback to occur, or + * the client does have a lock on the server, but it's shared and we + * need an exclusive lock. + */ + spin_unlock(&vnode->lock); + + trace_afs_flock_ev(vnode, fl, afs_flock_waiting, 0); + ret = wait_event_interruptible(fl->fl_wait, + fl->fl_u.afs.state != AFS_LOCK_PENDING); + trace_afs_flock_ev(vnode, fl, afs_flock_waited, ret); + + if (fl->fl_u.afs.state >= 0 && fl->fl_u.afs.state != AFS_LOCK_GRANTED) { + spin_lock(&vnode->lock); + + switch (fl->fl_u.afs.state) { + case AFS_LOCK_YOUR_TRY: + fl->fl_u.afs.state = AFS_LOCK_PENDING; + goto try_to_lock; + case AFS_LOCK_PENDING: + if (ret > 0) { + /* We need to retry the lock. We may not be + * notified by the server if it just expired + * rather than being released. + */ + ASSERTCMP(vnode->lock_state, ==, AFS_VNODE_LOCK_WAITING_FOR_CB); + afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING); + fl->fl_u.afs.state = AFS_LOCK_PENDING; + goto try_to_lock; + } + goto error_unlock; + case AFS_LOCK_GRANTED: + default: + break; + } + + spin_unlock(&vnode->lock); + } + + if (fl->fl_u.afs.state == AFS_LOCK_GRANTED) + goto vnode_is_locked; + ret = fl->fl_u.afs.state; + goto error; + +vfs_rejected_lock: + /* The VFS rejected the lock we just obtained, so we have to discard + * what we just got. We defer this to the lock manager work item to + * deal with. + */ + _debug("vfs refused %d", ret); + if (no_server_lock) + goto error; + spin_lock(&vnode->lock); + list_del_init(&fl->fl_u.afs.link); + afs_defer_unlock(vnode); + +error_unlock: + spin_unlock(&vnode->lock); +error: + _leave(" = %d", ret); + return ret; +} + +/* + * unlock on a file on the server + */ +static int afs_do_unlk(struct file *file, struct file_lock *fl) +{ + struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); + int ret; + + _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); + + trace_afs_flock_op(vnode, fl, afs_flock_op_unlock); + + /* Flush all pending writes before doing anything with locks. */ + vfs_fsync(file, 0); + + ret = locks_lock_file_wait(file, fl); + _leave(" = %d [%u]", ret, vnode->lock_state); + return ret; +} + +/* + * return information about a lock we currently hold, if indeed we hold one + */ +static int afs_do_getlk(struct file *file, struct file_lock *fl) +{ + struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); + struct key *key = afs_file_key(file); + int ret, lock_count; + + _enter(""); + + if (vnode->lock_state == AFS_VNODE_LOCK_DELETED) + return -ENOENT; + + fl->fl_type = F_UNLCK; + + /* check local lock records first */ + posix_test_lock(file, fl); + if (fl->fl_type == F_UNLCK) { + /* no local locks; consult the server */ + ret = afs_fetch_status(vnode, key, false, NULL); + if (ret < 0) + goto error; + + lock_count = READ_ONCE(vnode->status.lock_count); + if (lock_count != 0) { + if (lock_count > 0) + fl->fl_type = F_RDLCK; + else + fl->fl_type = F_WRLCK; + fl->fl_start = 0; + fl->fl_end = OFFSET_MAX; + fl->fl_pid = 0; + } + } + + ret = 0; +error: + _leave(" = %d [%hd]", ret, fl->fl_type); + return ret; +} + +/* + * manage POSIX locks on a file + */ +int afs_lock(struct file *file, int cmd, struct file_lock *fl) +{ + struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); + enum afs_flock_operation op; + int ret; + + _enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}", + vnode->fid.vid, vnode->fid.vnode, cmd, + fl->fl_type, fl->fl_flags, + (long long) fl->fl_start, (long long) fl->fl_end); + + /* AFS doesn't support mandatory locks */ + if (__mandatory_lock(&vnode->vfs_inode) && fl->fl_type != F_UNLCK) + return -ENOLCK; + + if (IS_GETLK(cmd)) + return afs_do_getlk(file, fl); + + fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id); + trace_afs_flock_op(vnode, fl, afs_flock_op_lock); + + if (fl->fl_type == F_UNLCK) + ret = afs_do_unlk(file, fl); + else + ret = afs_do_setlk(file, fl); + + switch (ret) { + case 0: op = afs_flock_op_return_ok; break; + case -EAGAIN: op = afs_flock_op_return_eagain; break; + case -EDEADLK: op = afs_flock_op_return_edeadlk; break; + default: op = afs_flock_op_return_error; break; + } + trace_afs_flock_op(vnode, fl, op); + return ret; +} + +/* + * manage FLOCK locks on a file + */ +int afs_flock(struct file *file, int cmd, struct file_lock *fl) +{ + struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); + enum afs_flock_operation op; + int ret; + + _enter("{%llx:%llu},%d,{t=%x,fl=%x}", + vnode->fid.vid, vnode->fid.vnode, cmd, + fl->fl_type, fl->fl_flags); + + /* + * No BSD flocks over NFS allowed. + * Note: we could try to fake a POSIX lock request here by + * using ((u32) filp | 0x80000000) or some such as the pid. + * Not sure whether that would be unique, though, or whether + * that would break in other places. + */ + if (!(fl->fl_flags & FL_FLOCK)) + return -ENOLCK; + + fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id); + trace_afs_flock_op(vnode, fl, afs_flock_op_flock); + + /* we're simulating flock() locks using posix locks on the server */ + if (fl->fl_type == F_UNLCK) + ret = afs_do_unlk(file, fl); + else + ret = afs_do_setlk(file, fl); + + switch (ret) { + case 0: op = afs_flock_op_return_ok; break; + case -EAGAIN: op = afs_flock_op_return_eagain; break; + case -EDEADLK: op = afs_flock_op_return_edeadlk; break; + default: op = afs_flock_op_return_error; break; + } + trace_afs_flock_op(vnode, fl, op); + return ret; +} + +/* + * the POSIX lock management core VFS code copies the lock record and adds the + * copy into its own list, so we need to add that copy to the vnode's lock + * queue in the same place as the original (which will be deleted shortly + * after) + */ +static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file)); + + _enter(""); + + new->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id); + + spin_lock(&vnode->lock); + trace_afs_flock_op(vnode, new, afs_flock_op_copy_lock); + list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link); + spin_unlock(&vnode->lock); +} + +/* + * need to remove this lock from the vnode queue when it's removed from the + * VFS's list + */ +static void afs_fl_release_private(struct file_lock *fl) +{ + struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file)); + + _enter(""); + + spin_lock(&vnode->lock); + + trace_afs_flock_op(vnode, fl, afs_flock_op_release_lock); + list_del_init(&fl->fl_u.afs.link); + if (list_empty(&vnode->granted_locks)) + afs_defer_unlock(vnode); + + _debug("state %u for %p", vnode->lock_state, vnode); + spin_unlock(&vnode->lock); +} diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c new file mode 100644 index 000000000..a82515b47 --- /dev/null +++ b/fs/afs/fs_operation.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Fileserver-directed operation handling. + * + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include "internal.h" + +static atomic_t afs_operation_debug_counter; + +/* + * Create an operation against a volume. + */ +struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *volume) +{ + struct afs_operation *op; + + _enter(""); + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) + return ERR_PTR(-ENOMEM); + + if (!key) { + key = afs_request_key(volume->cell); + if (IS_ERR(key)) { + kfree(op); + return ERR_CAST(key); + } + } else { + key_get(key); + } + + op->key = key; + op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op); + op->net = volume->cell->net; + op->cb_v_break = volume->cb_v_break; + op->debug_id = atomic_inc_return(&afs_operation_debug_counter); + op->error = -EDESTADDRREQ; + op->ac.error = SHRT_MAX; + + _leave(" = [op=%08x]", op->debug_id); + return op; +} + +/* + * Lock the vnode(s) being operated upon. + */ +static bool afs_get_io_locks(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + struct afs_vnode *vnode2 = op->file[1].vnode; + + _enter(""); + + if (op->flags & AFS_OPERATION_UNINTR) { + mutex_lock(&vnode->io_lock); + op->flags |= AFS_OPERATION_LOCK_0; + _leave(" = t [1]"); + return true; + } + + if (!vnode2 || !op->file[1].need_io_lock || vnode == vnode2) + vnode2 = NULL; + + if (vnode2 > vnode) + swap(vnode, vnode2); + + if (mutex_lock_interruptible(&vnode->io_lock) < 0) { + op->error = -ERESTARTSYS; + op->flags |= AFS_OPERATION_STOP; + _leave(" = f [I 0]"); + return false; + } + op->flags |= AFS_OPERATION_LOCK_0; + + if (vnode2) { + if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) { + op->error = -ERESTARTSYS; + op->flags |= AFS_OPERATION_STOP; + mutex_unlock(&vnode->io_lock); + op->flags &= ~AFS_OPERATION_LOCK_0; + _leave(" = f [I 1]"); + return false; + } + op->flags |= AFS_OPERATION_LOCK_1; + } + + _leave(" = t [2]"); + return true; +} + +static void afs_drop_io_locks(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + struct afs_vnode *vnode2 = op->file[1].vnode; + + _enter(""); + + if (op->flags & AFS_OPERATION_LOCK_1) + mutex_unlock(&vnode2->io_lock); + if (op->flags & AFS_OPERATION_LOCK_0) + mutex_unlock(&vnode->io_lock); +} + +static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp, + unsigned int index) +{ + struct afs_vnode *vnode = vp->vnode; + + if (vnode) { + vp->fid = vnode->fid; + vp->dv_before = vnode->status.data_version; + vp->cb_break_before = afs_calc_vnode_cb_break(vnode); + if (vnode->lock_state != AFS_VNODE_LOCK_NONE) + op->flags |= AFS_OPERATION_CUR_ONLY; + if (vp->modification) + set_bit(AFS_VNODE_MODIFYING, &vnode->flags); + } + + if (vp->fid.vnode) + _debug("PREP[%u] {%llx:%llu.%u}", + index, vp->fid.vid, vp->fid.vnode, vp->fid.unique); +} + +/* + * Begin an operation on the fileserver. + * + * Fileserver operations are serialised on the server by vnode, so we serialise + * them here also using the io_lock. + */ +bool afs_begin_vnode_operation(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + + ASSERT(vnode); + + _enter(""); + + if (op->file[0].need_io_lock) + if (!afs_get_io_locks(op)) + return false; + + afs_prepare_vnode(op, &op->file[0], 0); + afs_prepare_vnode(op, &op->file[1], 1); + op->cb_v_break = op->volume->cb_v_break; + _leave(" = true"); + return true; +} + +/* + * Tidy up a filesystem cursor and unlock the vnode. + */ +static void afs_end_vnode_operation(struct afs_operation *op) +{ + _enter(""); + + if (op->error == -EDESTADDRREQ || + op->error == -EADDRNOTAVAIL || + op->error == -ENETUNREACH || + op->error == -EHOSTUNREACH) + afs_dump_edestaddrreq(op); + + afs_drop_io_locks(op); + + if (op->error == -ECONNABORTED) + op->error = afs_abort_to_error(op->ac.abort_code); +} + +/* + * Wait for an in-progress operation to complete. + */ +void afs_wait_for_operation(struct afs_operation *op) +{ + _enter(""); + + while (afs_select_fileserver(op)) { + op->cb_s_break = op->server->cb_s_break; + if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) && + op->ops->issue_yfs_rpc) + op->ops->issue_yfs_rpc(op); + else if (op->ops->issue_afs_rpc) + op->ops->issue_afs_rpc(op); + else + op->ac.error = -ENOTSUPP; + + if (op->call) + op->error = afs_wait_for_call_to_complete(op->call, &op->ac); + } + + switch (op->error) { + case 0: + _debug("success"); + op->ops->success(op); + break; + case -ECONNABORTED: + if (op->ops->aborted) + op->ops->aborted(op); + break; + default: + break; + } + + afs_end_vnode_operation(op); + + if (op->error == 0 && op->ops->edit_dir) { + _debug("edit_dir"); + op->ops->edit_dir(op); + } + _leave(""); +} + +/* + * Dispose of an operation. + */ +int afs_put_operation(struct afs_operation *op) +{ + int i, ret = op->error; + + _enter("op=%08x,%d", op->debug_id, ret); + + if (op->ops && op->ops->put) + op->ops->put(op); + if (op->file[0].modification) + clear_bit(AFS_VNODE_MODIFYING, &op->file[0].vnode->flags); + if (op->file[1].modification && op->file[1].vnode != op->file[0].vnode) + clear_bit(AFS_VNODE_MODIFYING, &op->file[1].vnode->flags); + if (op->file[0].put_vnode) + iput(&op->file[0].vnode->vfs_inode); + if (op->file[1].put_vnode) + iput(&op->file[1].vnode->vfs_inode); + + if (op->more_files) { + for (i = 0; i < op->nr_files - 2; i++) + if (op->more_files[i].put_vnode) + iput(&op->more_files[i].vnode->vfs_inode); + kfree(op->more_files); + } + + afs_end_cursor(&op->ac); + afs_put_serverlist(op->net, op->server_list); + afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op); + key_put(op->key); + kfree(op); + return ret; +} + +int afs_do_sync_operation(struct afs_operation *op) +{ + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + return afs_put_operation(op); +} diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c new file mode 100644 index 000000000..def80365f --- /dev/null +++ b/fs/afs/fs_probe.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS fileserver probing + * + * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include "afs_fs.h" +#include "internal.h" +#include "protocol_yfs.h" + +static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ; +static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ; + +/* + * Start the probe polling timer. We have to supply it with an inc on the + * outstanding server count. + */ +static void afs_schedule_fs_probe(struct afs_net *net, + struct afs_server *server, bool fast) +{ + unsigned long atj; + + if (!net->live) + return; + + atj = server->probed_at; + atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval; + + afs_inc_servers_outstanding(net); + if (timer_reduce(&net->fs_probe_timer, atj)) + afs_dec_servers_outstanding(net); +} + +/* + * Handle the completion of a set of probes. + */ +static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server) +{ + bool responded = server->probe.responded; + + write_seqlock(&net->fs_lock); + if (responded) { + list_add_tail(&server->probe_link, &net->fs_probe_slow); + } else { + server->rtt = UINT_MAX; + clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags); + list_add_tail(&server->probe_link, &net->fs_probe_fast); + } + write_sequnlock(&net->fs_lock); + + afs_schedule_fs_probe(net, server, !responded); +} + +/* + * Handle the completion of a probe. + */ +static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server) +{ + _enter(""); + + if (atomic_dec_and_test(&server->probe_outstanding)) + afs_finished_fs_probe(net, server); + + wake_up_all(&server->probe_wq); +} + +/* + * Handle inability to send a probe due to ENOMEM when trying to allocate a + * call struct. + */ +static void afs_fs_probe_not_done(struct afs_net *net, + struct afs_server *server, + struct afs_addr_cursor *ac) +{ + struct afs_addr_list *alist = ac->alist; + unsigned int index = ac->index; + + _enter(""); + + trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail); + spin_lock(&server->probe_lock); + + server->probe.local_failure = true; + if (server->probe.error == 0) + server->probe.error = -ENOMEM; + + set_bit(index, &alist->failed); + + spin_unlock(&server->probe_lock); + return afs_done_one_fs_probe(net, server); +} + +/* + * Process the result of probing a fileserver. This is called after successful + * or failed delivery of an FS.GetCapabilities operation. + */ +void afs_fileserver_probe_result(struct afs_call *call) +{ + struct afs_addr_list *alist = call->alist; + struct afs_server *server = call->server; + unsigned int index = call->addr_ix; + unsigned int rtt_us = 0; + int ret = call->error; + + _enter("%pU,%u", &server->uuid, index); + + spin_lock(&server->probe_lock); + + switch (ret) { + case 0: + server->probe.error = 0; + goto responded; + case -ECONNABORTED: + if (!server->probe.responded) { + server->probe.abort_code = call->abort_code; + server->probe.error = ret; + } + goto responded; + case -ENOMEM: + case -ENONET: + clear_bit(index, &alist->responded); + server->probe.local_failure = true; + trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); + goto out; + case -ECONNRESET: /* Responded, but call expired. */ + case -ERFKILL: + case -EADDRNOTAVAIL: + case -ENETUNREACH: + case -EHOSTUNREACH: + case -EHOSTDOWN: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + default: + clear_bit(index, &alist->responded); + set_bit(index, &alist->failed); + if (!server->probe.responded && + (server->probe.error == 0 || + server->probe.error == -ETIMEDOUT || + server->probe.error == -ETIME)) + server->probe.error = ret; + trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); + goto out; + } + +responded: + clear_bit(index, &alist->failed); + + if (call->service_id == YFS_FS_SERVICE) { + server->probe.is_yfs = true; + set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } else { + server->probe.not_yfs = true; + if (!server->probe.is_yfs) { + clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } + } + + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { + server->probe.rtt = rtt_us; + server->rtt = rtt_us; + alist->preferred = index; + } + + smp_wmb(); /* Set rtt before responded. */ + server->probe.responded = true; + set_bit(index, &alist->responded); + set_bit(AFS_SERVER_FL_RESPONDING, &server->flags); +out: + spin_unlock(&server->probe_lock); + + _debug("probe %pU [%u] %pISpc rtt=%u ret=%d", + &server->uuid, index, &alist->addrs[index].transport, + rtt_us, ret); + + return afs_done_one_fs_probe(call->net, server); +} + +/* + * Probe one or all of a fileserver's addresses to find out the best route and + * to query its capabilities. + */ +void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, + struct key *key, bool all) +{ + struct afs_addr_cursor ac = { + .index = 0, + }; + + _enter("%pU", &server->uuid); + + read_lock(&server->fs_lock); + ac.alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + afs_get_addrlist(ac.alist); + read_unlock(&server->fs_lock); + + server->probed_at = jiffies; + atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1); + memset(&server->probe, 0, sizeof(server->probe)); + server->probe.rtt = UINT_MAX; + + ac.index = ac.alist->preferred; + if (ac.index < 0 || ac.index >= ac.alist->nr_addrs) + all = true; + + if (all) { + for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) + if (!afs_fs_get_capabilities(net, server, &ac, key)) + afs_fs_probe_not_done(net, server, &ac); + } else { + if (!afs_fs_get_capabilities(net, server, &ac, key)) + afs_fs_probe_not_done(net, server, &ac); + } + + afs_put_addrlist(ac.alist); +} + +/* + * Wait for the first as-yet untried fileserver to respond. + */ +int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) +{ + struct wait_queue_entry *waits; + struct afs_server *server; + unsigned int rtt = UINT_MAX, rtt_s; + bool have_responders = false; + int pref = -1, i; + + _enter("%u,%lx", slist->nr_servers, untried); + + /* Only wait for servers that have a probe outstanding. */ + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + if (!atomic_read(&server->probe_outstanding)) + __clear_bit(i, &untried); + if (server->probe.responded) + have_responders = true; + } + } + if (have_responders || !untried) + return 0; + + waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL); + if (!waits) + return -ENOMEM; + + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + init_waitqueue_entry(&waits[i], current); + add_wait_queue(&server->probe_wq, &waits[i]); + } + } + + for (;;) { + bool still_probing = false; + + set_current_state(TASK_INTERRUPTIBLE); + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + if (server->probe.responded) + goto stop; + if (atomic_read(&server->probe_outstanding)) + still_probing = true; + } + } + + if (!still_probing || signal_pending(current)) + goto stop; + schedule(); + } + +stop: + set_current_state(TASK_RUNNING); + + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + rtt_s = READ_ONCE(server->rtt); + if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) && + rtt_s < rtt) { + pref = i; + rtt = rtt_s; + } + + remove_wait_queue(&server->probe_wq, &waits[i]); + } + } + + kfree(waits); + + if (pref == -1 && signal_pending(current)) + return -ERESTARTSYS; + + if (pref >= 0) + slist->preferred = pref; + return 0; +} + +/* + * Probe timer. We have an increment on fs_outstanding that we need to pass + * along to the work item. + */ +void afs_fs_probe_timer(struct timer_list *timer) +{ + struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer); + + if (!net->live || !queue_work(afs_wq, &net->fs_prober)) + afs_dec_servers_outstanding(net); +} + +/* + * Dispatch a probe to a server. + */ +static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all) + __releases(&net->fs_lock) +{ + struct key *key = NULL; + + /* We remove it from the queues here - it will be added back to + * one of the queues on the completion of the probe. + */ + list_del_init(&server->probe_link); + + afs_get_server(server, afs_server_trace_get_probe); + write_sequnlock(&net->fs_lock); + + afs_fs_probe_fileserver(net, server, key, all); + afs_put_server(net, server, afs_server_trace_put_probe); +} + +/* + * Probe a server immediately without waiting for its due time to come + * round. This is used when all of the addresses have been tried. + */ +void afs_probe_fileserver(struct afs_net *net, struct afs_server *server) +{ + write_seqlock(&net->fs_lock); + if (!list_empty(&server->probe_link)) + return afs_dispatch_fs_probe(net, server, true); + write_sequnlock(&net->fs_lock); +} + +/* + * Probe dispatcher to regularly dispatch probes to keep NAT alive. + */ +void afs_fs_probe_dispatcher(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, fs_prober); + struct afs_server *fast, *slow, *server; + unsigned long nowj, timer_at, poll_at; + bool first_pass = true, set_timer = false; + + if (!net->live) { + afs_dec_servers_outstanding(net); + return; + } + + _enter(""); + + if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) { + afs_dec_servers_outstanding(net); + _leave(" [none]"); + return; + } + +again: + write_seqlock(&net->fs_lock); + + fast = slow = server = NULL; + nowj = jiffies; + timer_at = nowj + MAX_JIFFY_OFFSET; + + if (!list_empty(&net->fs_probe_fast)) { + fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link); + poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval; + if (time_before(nowj, poll_at)) { + timer_at = poll_at; + set_timer = true; + fast = NULL; + } + } + + if (!list_empty(&net->fs_probe_slow)) { + slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link); + poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval; + if (time_before(nowj, poll_at)) { + if (time_before(poll_at, timer_at)) + timer_at = poll_at; + set_timer = true; + slow = NULL; + } + } + + server = fast ?: slow; + if (server) + _debug("probe %pU", &server->uuid); + + if (server && (first_pass || !need_resched())) { + afs_dispatch_fs_probe(net, server, server == fast); + first_pass = false; + goto again; + } + + write_sequnlock(&net->fs_lock); + + if (server) { + if (!queue_work(afs_wq, &net->fs_prober)) + afs_dec_servers_outstanding(net); + _leave(" [requeue]"); + } else if (set_timer) { + if (timer_reduce(&net->fs_probe_timer, timer_at)) + afs_dec_servers_outstanding(net); + _leave(" [timer]"); + } else { + afs_dec_servers_outstanding(net); + _leave(" [quiesce]"); + } +} + +/* + * Wait for a probe on a particular fileserver to complete for 2s. + */ +int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) +{ + struct wait_queue_entry wait; + unsigned long timo = 2 * HZ; + + if (atomic_read(&server->probe_outstanding) == 0) + goto dont_wait; + + init_wait_entry(&wait, 0); + for (;;) { + prepare_to_wait_event(&server->probe_wq, &wait, + is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); + if (timo == 0 || + server->probe.responded || + atomic_read(&server->probe_outstanding) == 0 || + (is_intr && signal_pending(current))) + break; + timo = schedule_timeout(timo); + } + + finish_wait(&server->probe_wq, &wait); + +dont_wait: + if (server->probe.responded) + return 0; + if (is_intr && signal_pending(current)) + return -ERESTARTSYS; + if (timo == 0) + return -ETIME; + return -EDESTADDRREQ; +} + +/* + * Clean up the probing when the namespace is killed off. + */ +void afs_fs_probe_cleanup(struct afs_net *net) +{ + if (del_timer_sync(&net->fs_probe_timer)) + afs_dec_servers_outstanding(net); +} diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c new file mode 100644 index 000000000..0048a32cb --- /dev/null +++ b/fs/afs/fsclient.c @@ -0,0 +1,2106 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS File Server client stubs + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/circ_buf.h> +#include <linux/iversion.h> +#include "internal.h" +#include "afs_fs.h" +#include "xdr_fs.h" + +/* + * decode an AFSFid block + */ +static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid) +{ + const __be32 *bp = *_bp; + + fid->vid = ntohl(*bp++); + fid->vnode = ntohl(*bp++); + fid->unique = ntohl(*bp++); + *_bp = bp; +} + +/* + * Dump a bad file status record. + */ +static void xdr_dump_bad(const __be32 *bp) +{ + __be32 x[4]; + int i; + + pr_notice("AFS XDR: Bad status record\n"); + for (i = 0; i < 5 * 4 * 4; i += 16) { + memcpy(x, bp, 16); + bp += 4; + pr_notice("%03x: %08x %08x %08x %08x\n", + i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); + } + + memcpy(x, bp, 4); + pr_notice("0x50: %08x\n", ntohl(x[0])); +} + +/* + * decode an AFSFetchStatus block + */ +static void xdr_decode_AFSFetchStatus(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) +{ + const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; + struct afs_file_status *status = &scb->status; + bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); + u64 data_version, size; + u32 type, abort_code; + + abort_code = ntohl(xdr->abort_code); + + if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) { + if (xdr->if_version == htonl(0) && + abort_code != 0 && + inline_error) { + /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus + * whereby it doesn't set the interface version in the error + * case. + */ + status->abort_code = abort_code; + scb->have_error = true; + goto advance; + } + + pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); + goto bad; + } + + if (abort_code != 0 && inline_error) { + status->abort_code = abort_code; + scb->have_error = true; + goto advance; + } + + type = ntohl(xdr->type); + switch (type) { + case AFS_FTYPE_FILE: + case AFS_FTYPE_DIR: + case AFS_FTYPE_SYMLINK: + status->type = type; + break; + default: + goto bad; + } + + status->nlink = ntohl(xdr->nlink); + status->author = ntohl(xdr->author); + status->owner = ntohl(xdr->owner); + status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */ + status->anon_access = ntohl(xdr->anon_access); + status->mode = ntohl(xdr->mode) & S_IALLUGO; + status->group = ntohl(xdr->group); + status->lock_count = ntohl(xdr->lock_count); + + status->mtime_client.tv_sec = ntohl(xdr->mtime_client); + status->mtime_client.tv_nsec = 0; + status->mtime_server.tv_sec = ntohl(xdr->mtime_server); + status->mtime_server.tv_nsec = 0; + + size = (u64)ntohl(xdr->size_lo); + size |= (u64)ntohl(xdr->size_hi) << 32; + status->size = size; + + data_version = (u64)ntohl(xdr->data_version_lo); + data_version |= (u64)ntohl(xdr->data_version_hi) << 32; + status->data_version = data_version; + scb->have_status = true; +advance: + *_bp = (const void *)*_bp + sizeof(*xdr); + return; + +bad: + xdr_dump_bad(*_bp); + afs_protocol_error(call, afs_eproto_bad_status); + goto advance; +} + +static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry) +{ + return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry; +} + +static void xdr_decode_AFSCallBack(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) +{ + struct afs_callback *cb = &scb->callback; + const __be32 *bp = *_bp; + + bp++; /* version */ + cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++)); + bp++; /* type */ + scb->have_cb = true; + *_bp = bp; +} + +/* + * decode an AFSVolSync block + */ +static void xdr_decode_AFSVolSync(const __be32 **_bp, + struct afs_volsync *volsync) +{ + const __be32 *bp = *_bp; + u32 creation; + + creation = ntohl(*bp++); + bp++; /* spare2 */ + bp++; /* spare3 */ + bp++; /* spare4 */ + bp++; /* spare5 */ + bp++; /* spare6 */ + *_bp = bp; + + if (volsync) + volsync->creation = creation; +} + +/* + * encode the requested attributes into an AFSStoreStatus block + */ +static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr) +{ + __be32 *bp = *_bp; + u32 mask = 0, mtime = 0, owner = 0, group = 0, mode = 0; + + mask = 0; + if (attr->ia_valid & ATTR_MTIME) { + mask |= AFS_SET_MTIME; + mtime = attr->ia_mtime.tv_sec; + } + + if (attr->ia_valid & ATTR_UID) { + mask |= AFS_SET_OWNER; + owner = from_kuid(&init_user_ns, attr->ia_uid); + } + + if (attr->ia_valid & ATTR_GID) { + mask |= AFS_SET_GROUP; + group = from_kgid(&init_user_ns, attr->ia_gid); + } + + if (attr->ia_valid & ATTR_MODE) { + mask |= AFS_SET_MODE; + mode = attr->ia_mode & S_IALLUGO; + } + + *bp++ = htonl(mask); + *bp++ = htonl(mtime); + *bp++ = htonl(owner); + *bp++ = htonl(group); + *bp++ = htonl(mode); + *bp++ = 0; /* segment size */ + *_bp = bp; +} + +/* + * decode an AFSFetchVolumeStatus block + */ +static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, + struct afs_volume_status *vs) +{ + const __be32 *bp = *_bp; + + vs->vid = ntohl(*bp++); + vs->parent_id = ntohl(*bp++); + vs->online = ntohl(*bp++); + vs->in_service = ntohl(*bp++); + vs->blessed = ntohl(*bp++); + vs->needs_salvage = ntohl(*bp++); + vs->type = ntohl(*bp++); + vs->min_quota = ntohl(*bp++); + vs->max_quota = ntohl(*bp++); + vs->blocks_in_use = ntohl(*bp++); + vs->part_blocks_avail = ntohl(*bp++); + vs->part_max_blocks = ntohl(*bp++); + vs->vol_copy_date = 0; + vs->vol_backup_date = 0; + *_bp = bp; +} + +/* + * deliver reply data to an FS.FetchStatus + */ +static int afs_deliver_fs_fetch_status(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSCallBack(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.FetchStatus operation type + */ +static const struct afs_call_type afs_RXFSFetchStatus = { + .name = "FS.FetchStatus", + .op = afs_FS_FetchStatus, + .deliver = afs_deliver_fs_fetch_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * fetch the status information for a file + */ +void afs_fs_fetch_status(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + call = afs_alloc_flat_call(op->net, &afs_RXFSFetchStatus, + 16, (21 + 3 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSFETCHSTATUS); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * deliver reply data to an FS.FetchData + */ +static int afs_deliver_fs_fetch_data(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; + struct afs_read *req = op->fetch.req; + const __be32 *bp; + unsigned int size; + int ret; + + _enter("{%u,%zu/%llu}", + call->unmarshall, iov_iter_count(call->iter), req->actual_len); + + switch (call->unmarshall) { + case 0: + req->actual_len = 0; + req->index = 0; + req->offset = req->pos & (PAGE_SIZE - 1); + call->unmarshall++; + if (call->operation_ID == FSFETCHDATA64) { + afs_extract_to_tmp64(call); + } else { + call->tmp_u = htonl(0); + afs_extract_to_tmp(call); + } + fallthrough; + + /* extract the returned data length */ + case 1: + _debug("extract data length"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + req->actual_len = be64_to_cpu(call->tmp64); + _debug("DATA length: %llu", req->actual_len); + req->remain = min(req->len, req->actual_len); + if (req->remain == 0) + goto no_more_data; + + call->unmarshall++; + + begin_page: + ASSERTCMP(req->index, <, req->nr_pages); + if (req->remain > PAGE_SIZE - req->offset) + size = PAGE_SIZE - req->offset; + else + size = req->remain; + call->bvec[0].bv_len = size; + call->bvec[0].bv_offset = req->offset; + call->bvec[0].bv_page = req->pages[req->index]; + iov_iter_bvec(&call->def_iter, READ, call->bvec, 1, size); + ASSERTCMP(size, <=, PAGE_SIZE); + fallthrough; + + /* extract the returned data */ + case 2: + _debug("extract data %zu/%llu", + iov_iter_count(call->iter), req->remain); + + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + req->remain -= call->bvec[0].bv_len; + req->offset += call->bvec[0].bv_len; + ASSERTCMP(req->offset, <=, PAGE_SIZE); + if (req->offset == PAGE_SIZE) { + req->offset = 0; + req->index++; + if (req->remain > 0) + goto begin_page; + } + + ASSERTCMP(req->remain, ==, 0); + if (req->actual_len <= req->len) + goto no_more_data; + + /* Discard any excess data the server gave us */ + afs_extract_discard(call, req->actual_len - req->len); + call->unmarshall = 3; + fallthrough; + + case 3: + _debug("extract discard %zu/%llu", + iov_iter_count(call->iter), req->actual_len - req->len); + + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + no_more_data: + call->unmarshall = 4; + afs_extract_to_buf(call, (21 + 3 + 6) * 4); + fallthrough; + + /* extract the metadata */ + case 4: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSCallBack(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); + + req->data_version = vp->scb.status.data_version; + req->file_size = vp->scb.status.size; + + call->unmarshall++; + + case 5: + break; + } + + for (; req->index < req->nr_pages; req->index++) { + if (req->offset < PAGE_SIZE) + zero_user_segment(req->pages[req->index], + req->offset, PAGE_SIZE); + req->offset = 0; + } + + if (req->page_done) + for (req->index = 0; req->index < req->nr_pages; req->index++) + req->page_done(req); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.FetchData operation type + */ +static const struct afs_call_type afs_RXFSFetchData = { + .name = "FS.FetchData", + .op = afs_FS_FetchData, + .deliver = afs_deliver_fs_fetch_data, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSFetchData64 = { + .name = "FS.FetchData64", + .op = afs_FS_FetchData64, + .deliver = afs_deliver_fs_fetch_data, + .destructor = afs_flat_call_destructor, +}; + +/* + * fetch data from a very large file + */ +static void afs_fs_fetch_data64(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_read *req = op->fetch.req; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSFETCHDATA64); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); + bp[4] = htonl(upper_32_bits(req->pos)); + bp[5] = htonl(lower_32_bits(req->pos)); + bp[6] = 0; + bp[7] = htonl(lower_32_bits(req->len)); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * fetch data from a file + */ +void afs_fs_fetch_data(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + struct afs_read *req = op->fetch.req; + __be32 *bp; + + if (upper_32_bits(req->pos) || + upper_32_bits(req->len) || + upper_32_bits(req->pos + req->len)) + return afs_fs_fetch_data64(op); + + _enter(""); + + call = afs_alloc_flat_call(op->net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSFETCHDATA); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); + bp[4] = htonl(lower_32_bits(req->pos)); + bp[5] = htonl(lower_32_bits(req->len)); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * deliver reply data to an FS.CreateFile or an FS.MakeDir + */ +static int afs_deliver_fs_create_vnode(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFid(&bp, &op->file[1].fid); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_AFSCallBack(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.CreateFile and FS.MakeDir operation type + */ +static const struct afs_call_type afs_RXFSCreateFile = { + .name = "FS.CreateFile", + .op = afs_FS_CreateFile, + .deliver = afs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Create a file. + */ +void afs_fs_create_file(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t namesz, reqsz, padsz; + __be32 *bp; + + _enter(""); + + namesz = name->len; + padsz = (4 - (namesz & 3)) & 3; + reqsz = (5 * 4) + namesz + padsz + (6 * 4); + + call = afs_alloc_flat_call(op->net, &afs_RXFSCreateFile, + reqsz, (3 + 21 + 21 + 3 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSCREATEFILE); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(namesz); + memcpy(bp, name->name, namesz); + bp = (void *) bp + namesz; + if (padsz > 0) { + memset(bp, 0, padsz); + bp = (void *) bp + padsz; + } + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */ + *bp++ = 0; /* segment size */ + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +static const struct afs_call_type afs_RXFSMakeDir = { + .name = "FS.MakeDir", + .op = afs_FS_MakeDir, + .deliver = afs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Create a new directory + */ +void afs_fs_make_dir(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t namesz, reqsz, padsz; + __be32 *bp; + + _enter(""); + + namesz = name->len; + padsz = (4 - (namesz & 3)) & 3; + reqsz = (5 * 4) + namesz + padsz + (6 * 4); + + call = afs_alloc_flat_call(op->net, &afs_RXFSMakeDir, + reqsz, (3 + 21 + 21 + 3 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSMAKEDIR); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(namesz); + memcpy(bp, name->name, namesz); + bp = (void *) bp + namesz; + if (padsz > 0) { + memset(bp, 0, padsz); + bp = (void *) bp + padsz; + } + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */ + *bp++ = 0; /* segment size */ + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to any operation that returns status and volume sync. + */ +static int afs_deliver_fs_file_status_and_vol(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.RemoveFile operation type + */ +static const struct afs_call_type afs_RXFSRemoveFile = { + .name = "FS.RemoveFile", + .op = afs_FS_RemoveFile, + .deliver = afs_deliver_fs_file_status_and_vol, + .destructor = afs_flat_call_destructor, +}; + +/* + * Remove a file. + */ +void afs_fs_remove_file(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t namesz, reqsz, padsz; + __be32 *bp; + + _enter(""); + + namesz = name->len; + padsz = (4 - (namesz & 3)) & 3; + reqsz = (5 * 4) + namesz + padsz; + + call = afs_alloc_flat_call(op->net, &afs_RXFSRemoveFile, + reqsz, (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSREMOVEFILE); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(namesz); + memcpy(bp, name->name, namesz); + bp = (void *) bp + namesz; + if (padsz > 0) { + memset(bp, 0, padsz); + bp = (void *) bp + padsz; + } + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +static const struct afs_call_type afs_RXFSRemoveDir = { + .name = "FS.RemoveDir", + .op = afs_FS_RemoveDir, + .deliver = afs_deliver_fs_file_status_and_vol, + .destructor = afs_flat_call_destructor, +}; + +/* + * Remove a directory. + */ +void afs_fs_remove_dir(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t namesz, reqsz, padsz; + __be32 *bp; + + _enter(""); + + namesz = name->len; + padsz = (4 - (namesz & 3)) & 3; + reqsz = (5 * 4) + namesz + padsz; + + call = afs_alloc_flat_call(op->net, &afs_RXFSRemoveDir, + reqsz, (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSREMOVEDIR); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(namesz); + memcpy(bp, name->name, namesz); + bp = (void *) bp + namesz; + if (padsz > 0) { + memset(bp, 0, padsz); + bp = (void *) bp + padsz; + } + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * deliver reply data to an FS.Link + */ +static int afs_deliver_fs_link(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.Link operation type + */ +static const struct afs_call_type afs_RXFSLink = { + .name = "FS.Link", + .op = afs_FS_Link, + .deliver = afs_deliver_fs_link, + .destructor = afs_flat_call_destructor, +}; + +/* + * make a hard link + */ +void afs_fs_link(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + struct afs_call *call; + size_t namesz, reqsz, padsz; + __be32 *bp; + + _enter(""); + + namesz = name->len; + padsz = (4 - (namesz & 3)) & 3; + reqsz = (5 * 4) + namesz + padsz + (3 * 4); + + call = afs_alloc_flat_call(op->net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSLINK); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(namesz); + memcpy(bp, name->name, namesz); + bp = (void *) bp + namesz; + if (padsz > 0) { + memset(bp, 0, padsz); + bp = (void *) bp + padsz; + } + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + trace_afs_make_fs_call1(call, &vp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * deliver reply data to an FS.Symlink + */ +static int afs_deliver_fs_symlink(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFid(&bp, &vp->fid); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.Symlink operation type + */ +static const struct afs_call_type afs_RXFSSymlink = { + .name = "FS.Symlink", + .op = afs_FS_Symlink, + .deliver = afs_deliver_fs_symlink, + .destructor = afs_flat_call_destructor, +}; + +/* + * create a symbolic link + */ +void afs_fs_symlink(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t namesz, reqsz, padsz, c_namesz, c_padsz; + __be32 *bp; + + _enter(""); + + namesz = name->len; + padsz = (4 - (namesz & 3)) & 3; + + c_namesz = strlen(op->create.symlink); + c_padsz = (4 - (c_namesz & 3)) & 3; + + reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4); + + call = afs_alloc_flat_call(op->net, &afs_RXFSSymlink, reqsz, + (3 + 21 + 21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSYMLINK); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(namesz); + memcpy(bp, name->name, namesz); + bp = (void *) bp + namesz; + if (padsz > 0) { + memset(bp, 0, padsz); + bp = (void *) bp + padsz; + } + *bp++ = htonl(c_namesz); + memcpy(bp, op->create.symlink, c_namesz); + bp = (void *) bp + c_namesz; + if (c_padsz > 0) { + memset(bp, 0, c_padsz); + bp = (void *) bp + c_padsz; + } + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = htonl(S_IRWXUGO); /* unix mode */ + *bp++ = 0; /* segment size */ + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * deliver reply data to an FS.Rename + */ +static int afs_deliver_fs_rename(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + bp = call->buffer; + /* If the two dirs are the same, we have two copies of the same status + * report, so we just decode it twice. + */ + xdr_decode_AFSFetchStatus(&bp, call, &orig_dvp->scb); + xdr_decode_AFSFetchStatus(&bp, call, &new_dvp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.Rename operation type + */ +static const struct afs_call_type afs_RXFSRename = { + .name = "FS.Rename", + .op = afs_FS_Rename, + .deliver = afs_deliver_fs_rename, + .destructor = afs_flat_call_destructor, +}; + +/* + * Rename/move a file or directory. + */ +void afs_fs_rename(struct afs_operation *op) +{ + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; + const struct qstr *orig_name = &op->dentry->d_name; + const struct qstr *new_name = &op->dentry_2->d_name; + struct afs_call *call; + size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz; + __be32 *bp; + + _enter(""); + + o_namesz = orig_name->len; + o_padsz = (4 - (o_namesz & 3)) & 3; + + n_namesz = new_name->len; + n_padsz = (4 - (n_namesz & 3)) & 3; + + reqsz = (4 * 4) + + 4 + o_namesz + o_padsz + + (3 * 4) + + 4 + n_namesz + n_padsz; + + call = afs_alloc_flat_call(op->net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSRENAME); + *bp++ = htonl(orig_dvp->fid.vid); + *bp++ = htonl(orig_dvp->fid.vnode); + *bp++ = htonl(orig_dvp->fid.unique); + *bp++ = htonl(o_namesz); + memcpy(bp, orig_name->name, o_namesz); + bp = (void *) bp + o_namesz; + if (o_padsz > 0) { + memset(bp, 0, o_padsz); + bp = (void *) bp + o_padsz; + } + + *bp++ = htonl(new_dvp->fid.vid); + *bp++ = htonl(new_dvp->fid.vnode); + *bp++ = htonl(new_dvp->fid.unique); + *bp++ = htonl(n_namesz); + memcpy(bp, new_name->name, n_namesz); + bp = (void *) bp + n_namesz; + if (n_padsz > 0) { + memset(bp, 0, n_padsz); + bp = (void *) bp + n_padsz; + } + + trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to FS.StoreData or FS.StoreStatus + */ +static int afs_deliver_fs_store_data(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; + const __be32 *bp; + int ret; + + _enter(""); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.StoreData operation type + */ +static const struct afs_call_type afs_RXFSStoreData = { + .name = "FS.StoreData", + .op = afs_FS_StoreData, + .deliver = afs_deliver_fs_store_data, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSStoreData64 = { + .name = "FS.StoreData64", + .op = afs_FS_StoreData64, + .deliver = afs_deliver_fs_store_data, + .destructor = afs_flat_call_destructor, +}; + +/* + * store a set of pages to a very large file + */ +static void afs_fs_store_data64(struct afs_operation *op, + loff_t pos, loff_t size, loff_t i_size) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData64, + (4 + 6 + 3 * 2) * 4, + (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + call->send_pages = true; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA64); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = 0; /* unix mode */ + *bp++ = 0; /* segment size */ + + *bp++ = htonl(upper_32_bits(pos)); + *bp++ = htonl(lower_32_bits(pos)); + *bp++ = htonl(upper_32_bits(size)); + *bp++ = htonl(lower_32_bits(size)); + *bp++ = htonl(upper_32_bits(i_size)); + *bp++ = htonl(lower_32_bits(i_size)); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * store a set of pages + */ +void afs_fs_store_data(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + loff_t size, pos, i_size; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + size = (loff_t)op->store.last_to - (loff_t)op->store.first_offset; + if (op->store.first != op->store.last) + size += (loff_t)(op->store.last - op->store.first) << PAGE_SHIFT; + pos = (loff_t)op->store.first << PAGE_SHIFT; + pos += op->store.first_offset; + + i_size = i_size_read(&vp->vnode->vfs_inode); + if (pos + size > i_size) + i_size = size + pos; + + _debug("size %llx, at %llx, i_size %llx", + (unsigned long long) size, (unsigned long long) pos, + (unsigned long long) i_size); + + if (upper_32_bits(pos) || upper_32_bits(i_size) || upper_32_bits(size) || + upper_32_bits(pos + size)) + return afs_fs_store_data64(op, pos, size, i_size); + + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData, + (4 + 6 + 3) * 4, + (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + call->send_pages = true; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = 0; /* unix mode */ + *bp++ = 0; /* segment size */ + + *bp++ = htonl(lower_32_bits(pos)); + *bp++ = htonl(lower_32_bits(size)); + *bp++ = htonl(lower_32_bits(i_size)); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * FS.StoreStatus operation type + */ +static const struct afs_call_type afs_RXFSStoreStatus = { + .name = "FS.StoreStatus", + .op = afs_FS_StoreStatus, + .deliver = afs_deliver_fs_store_data, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSStoreData_as_Status = { + .name = "FS.StoreData", + .op = afs_FS_StoreData, + .deliver = afs_deliver_fs_store_data, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSStoreData64_as_Status = { + .name = "FS.StoreData64", + .op = afs_FS_StoreData64, + .deliver = afs_deliver_fs_store_data, + .destructor = afs_flat_call_destructor, +}; + +/* + * set the attributes on a very large file, using FS.StoreData rather than + * FS.StoreStatus so as to alter the file size also + */ +static void afs_fs_setattr_size64(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + struct iattr *attr = op->setattr.attr; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + ASSERT(attr->ia_valid & ATTR_SIZE); + + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData64_as_Status, + (4 + 6 + 3 * 2) * 4, + (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA64); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + xdr_encode_AFS_StoreStatus(&bp, attr); + + *bp++ = htonl(upper_32_bits(attr->ia_size)); /* position of start of write */ + *bp++ = htonl(lower_32_bits(attr->ia_size)); + *bp++ = 0; /* size of write */ + *bp++ = 0; + *bp++ = htonl(upper_32_bits(attr->ia_size)); /* new file length */ + *bp++ = htonl(lower_32_bits(attr->ia_size)); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus + * so as to alter the file size also + */ +static void afs_fs_setattr_size(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + struct iattr *attr = op->setattr.attr; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + ASSERT(attr->ia_valid & ATTR_SIZE); + if (upper_32_bits(attr->ia_size)) + return afs_fs_setattr_size64(op); + + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData_as_Status, + (4 + 6 + 3) * 4, + (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + xdr_encode_AFS_StoreStatus(&bp, attr); + + *bp++ = htonl(attr->ia_size); /* position of start of write */ + *bp++ = 0; /* size of write */ + *bp++ = htonl(attr->ia_size); /* new file length */ + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * set the attributes on a file, using FS.StoreData if there's a change in file + * size, and FS.StoreStatus otherwise + */ +void afs_fs_setattr(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + struct iattr *attr = op->setattr.attr; + __be32 *bp; + + if (attr->ia_valid & ATTR_SIZE) + return afs_fs_setattr_size(op); + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreStatus, + (4 + 6) * 4, + (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTORESTATUS); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + xdr_encode_AFS_StoreStatus(&bp, op->setattr.attr); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * deliver reply data to an FS.GetVolumeStatus + */ +static int afs_deliver_fs_get_volume_status(struct afs_call *call) +{ + struct afs_operation *op = call->op; + const __be32 *bp; + char *p; + u32 size; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->unmarshall++; + afs_extract_to_buf(call, 12 * 4); + fallthrough; + + /* extract the returned status record */ + case 1: + _debug("extract status"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_AFSFetchVolumeStatus(&bp, &op->volstatus.vs); + call->unmarshall++; + afs_extract_to_tmp(call); + fallthrough; + + /* extract the volume name length */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("volname length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, afs_eproto_volname_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_to_buf(call, size); + call->unmarshall++; + fallthrough; + + /* extract the volume name */ + case 3: + _debug("extract volname"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + p = call->buffer; + p[call->count] = 0; + _debug("volname '%s'", p); + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* extract the offline message length */ + case 4: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("offline msg length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, afs_eproto_offline_msg_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_to_buf(call, size); + call->unmarshall++; + fallthrough; + + /* extract the offline message */ + case 5: + _debug("extract offline"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + p = call->buffer; + p[call->count] = 0; + _debug("offline '%s'", p); + + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* extract the message of the day length */ + case 6: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("motd length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, afs_eproto_motd_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_to_buf(call, size); + call->unmarshall++; + fallthrough; + + /* extract the message of the day */ + case 7: + _debug("extract motd"); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + p = call->buffer; + p[call->count] = 0; + _debug("motd '%s'", p); + + call->unmarshall++; + + case 8: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.GetVolumeStatus operation type + */ +static const struct afs_call_type afs_RXFSGetVolumeStatus = { + .name = "FS.GetVolumeStatus", + .op = afs_FS_GetVolumeStatus, + .deliver = afs_deliver_fs_get_volume_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * fetch the status of a volume + */ +void afs_fs_get_volume_status(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &afs_RXFSGetVolumeStatus, 2 * 4, + max(12 * 4, AFSOPAQUEMAX + 1)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSGETVOLUMESTATUS); + bp[1] = htonl(vp->fid.vid); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock + */ +static int afs_deliver_fs_xxxx_lock(struct afs_call *call) +{ + struct afs_operation *op = call->op; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.SetLock operation type + */ +static const struct afs_call_type afs_RXFSSetLock = { + .name = "FS.SetLock", + .op = afs_FS_SetLock, + .deliver = afs_deliver_fs_xxxx_lock, + .done = afs_lock_op_done, + .destructor = afs_flat_call_destructor, +}; + +/* + * FS.ExtendLock operation type + */ +static const struct afs_call_type afs_RXFSExtendLock = { + .name = "FS.ExtendLock", + .op = afs_FS_ExtendLock, + .deliver = afs_deliver_fs_xxxx_lock, + .done = afs_lock_op_done, + .destructor = afs_flat_call_destructor, +}; + +/* + * FS.ReleaseLock operation type + */ +static const struct afs_call_type afs_RXFSReleaseLock = { + .name = "FS.ReleaseLock", + .op = afs_FS_ReleaseLock, + .deliver = afs_deliver_fs_xxxx_lock, + .destructor = afs_flat_call_destructor, +}; + +/* + * Set a lock on a file + */ +void afs_fs_set_lock(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &afs_RXFSSetLock, 5 * 4, 6 * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSETLOCK); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + *bp++ = htonl(op->lock.type); + + trace_afs_make_fs_calli(call, &vp->fid, op->lock.type); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * extend a lock on a file + */ +void afs_fs_extend_lock(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &afs_RXFSExtendLock, 4 * 4, 6 * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSEXTENDLOCK); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * release a lock on a file + */ +void afs_fs_release_lock(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSRELEASELOCK); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to an FS.GiveUpAllCallBacks operation. + */ +static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call) +{ + return afs_transfer_reply(call); +} + +/* + * FS.GiveUpAllCallBacks operation type + */ +static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = { + .name = "FS.GiveUpAllCallBacks", + .op = afs_FS_GiveUpAllCallBacks, + .deliver = afs_deliver_fs_give_up_all_callbacks, + .destructor = afs_flat_call_destructor, +}; + +/* + * Flush all the callbacks we have on a server. + */ +int afs_fs_give_up_all_callbacks(struct afs_net *net, + struct afs_server *server, + struct afs_addr_cursor *ac, + struct key *key) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXFSGiveUpAllCallBacks, 1 * 4, 0); + if (!call) + return -ENOMEM; + + call->key = key; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSGIVEUPALLCALLBACKS); + + call->server = afs_use_server(server, afs_server_trace_give_up_cb); + afs_make_call(ac, call, GFP_NOFS); + return afs_wait_for_call_to_complete(call, ac); +} + +/* + * Deliver reply data to an FS.GetCapabilities operation. + */ +static int afs_deliver_fs_get_capabilities(struct afs_call *call) +{ + u32 count; + int ret; + + _enter("{%u,%zu}", call->unmarshall, iov_iter_count(call->iter)); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* Extract the capabilities word count */ + case 1: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + count = ntohl(call->tmp); + + call->count = count; + call->count2 = count; + afs_extract_discard(call, count * sizeof(__be32)); + call->unmarshall++; + fallthrough; + + /* Extract capabilities words */ + case 2: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + /* TODO: Examine capabilities */ + + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.GetCapabilities operation type + */ +static const struct afs_call_type afs_RXFSGetCapabilities = { + .name = "FS.GetCapabilities", + .op = afs_FS_GetCapabilities, + .deliver = afs_deliver_fs_get_capabilities, + .done = afs_fileserver_probe_result, + .destructor = afs_flat_call_destructor, +}; + +/* + * Probe a fileserver for the capabilities that it supports. This RPC can + * reply with up to 196 words. The operation is asynchronous and if we managed + * to allocate a call, true is returned the result is delivered through the + * ->done() - otherwise we return false to indicate we didn't even try. + */ +bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, + struct afs_addr_cursor *ac, struct key *key) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4); + if (!call) + return false; + + call->key = key; + call->server = afs_use_server(server, afs_server_trace_get_caps); + call->upgrade = true; + call->async = true; + call->max_lifespan = AFS_PROBE_MAX_LIFESPAN; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSGETCAPABILITIES); + + trace_afs_make_fs_call(call, NULL); + afs_make_call(ac, call, GFP_NOFS); + afs_put_call(call); + return true; +} + +/* + * Deliver reply data to an FS.InlineBulkStatus call + */ +static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_status_cb *scb; + const __be32 *bp; + u32 tmp; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* Extract the file status count and array in two steps */ + case 1: + _debug("extract status count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("status count: %u/%u", tmp, op->nr_files); + if (tmp != op->nr_files) + return afs_protocol_error(call, afs_eproto_ibulkst_count); + + call->count = 0; + call->unmarshall++; + more_counts: + afs_extract_to_buf(call, 21 * sizeof(__be32)); + fallthrough; + + case 2: + _debug("extract status array %u", call->count); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + switch (call->count) { + case 0: + scb = &op->file[0].scb; + break; + case 1: + scb = &op->file[1].scb; + break; + default: + scb = &op->more_files[call->count - 2].scb; + break; + } + + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, call, scb); + + call->count++; + if (call->count < op->nr_files) + goto more_counts; + + call->count = 0; + call->unmarshall++; + afs_extract_to_tmp(call); + fallthrough; + + /* Extract the callback count and array in two steps */ + case 3: + _debug("extract CB count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("CB count: %u", tmp); + if (tmp != op->nr_files) + return afs_protocol_error(call, afs_eproto_ibulkst_cb_count); + call->count = 0; + call->unmarshall++; + more_cbs: + afs_extract_to_buf(call, 3 * sizeof(__be32)); + fallthrough; + + case 4: + _debug("extract CB array"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + _debug("unmarshall CB array"); + switch (call->count) { + case 0: + scb = &op->file[0].scb; + break; + case 1: + scb = &op->file[1].scb; + break; + default: + scb = &op->more_files[call->count - 2].scb; + break; + } + + bp = call->buffer; + xdr_decode_AFSCallBack(&bp, call, scb); + call->count++; + if (call->count < op->nr_files) + goto more_cbs; + + afs_extract_to_buf(call, 6 * sizeof(__be32)); + call->unmarshall++; + fallthrough; + + case 5: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_AFSVolSync(&bp, &op->volsync); + + call->unmarshall++; + + case 6: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +static void afs_done_fs_inline_bulk_status(struct afs_call *call) +{ + if (call->error == -ECONNABORTED && + call->abort_code == RX_INVALID_OPERATION) { + set_bit(AFS_SERVER_FL_NO_IBULK, &call->server->flags); + if (call->op) + set_bit(AFS_VOLUME_MAYBE_NO_IBULK, &call->op->volume->flags); + } +} + +/* + * FS.InlineBulkStatus operation type + */ +static const struct afs_call_type afs_RXFSInlineBulkStatus = { + .name = "FS.InlineBulkStatus", + .op = afs_FS_InlineBulkStatus, + .deliver = afs_deliver_fs_inline_bulk_status, + .done = afs_done_fs_inline_bulk_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for up to 50 files + */ +void afs_fs_inline_bulk_status(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + struct afs_call *call; + __be32 *bp; + int i; + + if (test_bit(AFS_SERVER_FL_NO_IBULK, &op->server->flags)) { + op->error = -ENOTSUPP; + return; + } + + _enter(",%x,{%llx:%llu},%u", + key_serial(op->key), vp->fid.vid, vp->fid.vnode, op->nr_files); + + call = afs_alloc_flat_call(op->net, &afs_RXFSInlineBulkStatus, + (2 + op->nr_files * 3) * 4, + 21 * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSINLINEBULKSTATUS); + *bp++ = htonl(op->nr_files); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + for (i = 0; i < op->nr_files - 2; i++) { + *bp++ = htonl(op->more_files[i].fid.vid); + *bp++ = htonl(op->more_files[i].fid.vnode); + *bp++ = htonl(op->more_files[i].fid.unique); + } + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * deliver reply data to an FS.FetchACL + */ +static int afs_deliver_fs_fetch_acl(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; + struct afs_acl *acl; + const __be32 *bp; + unsigned int size; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* extract the returned data length */ + case 1: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + size = call->count2 = ntohl(call->tmp); + size = round_up(size, 4); + + acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL); + if (!acl) + return -ENOMEM; + op->acl = acl; + acl->size = call->count2; + afs_extract_begin(call, acl->data, size); + call->unmarshall++; + fallthrough; + + /* extract the returned data */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + afs_extract_to_buf(call, (21 + 6) * 4); + call->unmarshall++; + fallthrough; + + /* extract the metadata */ + case 3: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); + + call->unmarshall++; + + case 4: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.FetchACL operation type + */ +static const struct afs_call_type afs_RXFSFetchACL = { + .name = "FS.FetchACL", + .op = afs_FS_FetchACL, + .deliver = afs_deliver_fs_fetch_acl, +}; + +/* + * Fetch the ACL for a file. + */ +void afs_fs_fetch_acl(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + call = afs_alloc_flat_call(op->net, &afs_RXFSFetchACL, 16, (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSFETCHACL); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_KERNEL); +} + +/* + * FS.StoreACL operation type + */ +static const struct afs_call_type afs_RXFSStoreACL = { + .name = "FS.StoreACL", + .op = afs_FS_StoreACL, + .deliver = afs_deliver_fs_file_status_and_vol, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the ACL for a file. + */ +void afs_fs_store_acl(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + const struct afs_acl *acl = op->acl; + size_t size; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + size = round_up(acl->size, 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreACL, + 5 * 4 + size, (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSSTOREACL); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); + bp[4] = htonl(acl->size); + memcpy(&bp[5], acl->data, acl->size); + if (acl->size != size) + memset((void *)&bp[5] + acl->size, 0, size - acl->size); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_KERNEL); +} diff --git a/fs/afs/inode.c b/fs/afs/inode.c new file mode 100644 index 000000000..fdca4262f --- /dev/null +++ b/fs/afs/inode.c @@ -0,0 +1,926 @@ +/* + * Copyright (c) 2002 Red Hat, Inc. All rights reserved. + * + * This software may be freely redistributed under the terms of the + * GNU General Public License. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * David Howells <dhowells@redhat.com> + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/sched.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/iversion.h> +#include "internal.h" +#include "afs_fs.h" + +static const struct inode_operations afs_symlink_inode_operations = { + .get_link = page_get_link, +}; + +static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode) +{ + static unsigned long once_only; + + pr_warn("kAFS: AFS vnode with undefined type %u\n", vnode->status.type); + pr_warn("kAFS: A=%d m=%o s=%llx v=%llx\n", + vnode->status.abort_code, + vnode->status.mode, + vnode->status.size, + vnode->status.data_version); + pr_warn("kAFS: vnode %llx:%llx:%x\n", + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique); + if (parent_vnode) + pr_warn("kAFS: dir %llx:%llx:%x\n", + parent_vnode->fid.vid, + parent_vnode->fid.vnode, + parent_vnode->fid.unique); + + if (!test_and_set_bit(0, &once_only)) + dump_stack(); +} + +/* + * Initialise an inode from the vnode status. + */ +static int afs_inode_init_from_status(struct afs_operation *op, + struct afs_vnode_param *vp, + struct afs_vnode *vnode) +{ + struct afs_file_status *status = &vp->scb.status; + struct inode *inode = AFS_VNODE_TO_I(vnode); + struct timespec64 t; + + _enter("{%llx:%llu.%u} %s", + vp->fid.vid, vp->fid.vnode, vp->fid.unique, + op->type ? op->type->name : "???"); + + _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu", + status->type, + status->nlink, + (unsigned long long) status->size, + status->data_version, + status->mode); + + write_seqlock(&vnode->cb_lock); + + vnode->cb_v_break = op->cb_v_break; + vnode->cb_s_break = op->cb_s_break; + vnode->status = *status; + + t = status->mtime_client; + inode->i_ctime = t; + inode->i_mtime = t; + inode->i_atime = t; + inode->i_flags |= S_NOATIME; + inode->i_uid = make_kuid(&init_user_ns, status->owner); + inode->i_gid = make_kgid(&init_user_ns, status->group); + set_nlink(&vnode->vfs_inode, status->nlink); + + switch (status->type) { + case AFS_FTYPE_FILE: + inode->i_mode = S_IFREG | (status->mode & S_IALLUGO); + inode->i_op = &afs_file_inode_operations; + inode->i_fop = &afs_file_operations; + inode->i_mapping->a_ops = &afs_fs_aops; + break; + case AFS_FTYPE_DIR: + inode->i_mode = S_IFDIR | (status->mode & S_IALLUGO); + inode->i_op = &afs_dir_inode_operations; + inode->i_fop = &afs_dir_file_operations; + inode->i_mapping->a_ops = &afs_dir_aops; + break; + case AFS_FTYPE_SYMLINK: + /* Symlinks with a mode of 0644 are actually mountpoints. */ + if ((status->mode & 0777) == 0644) { + inode->i_flags |= S_AUTOMOUNT; + + set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &afs_mntpt_inode_operations; + inode->i_fop = &afs_mntpt_file_operations; + inode->i_mapping->a_ops = &afs_fs_aops; + } else { + inode->i_mode = S_IFLNK | status->mode; + inode->i_op = &afs_symlink_inode_operations; + inode->i_mapping->a_ops = &afs_fs_aops; + } + inode_nohighmem(inode); + break; + default: + dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL); + write_sequnlock(&vnode->cb_lock); + return afs_protocol_error(NULL, afs_eproto_file_type); + } + + afs_set_i_size(vnode, status->size); + + vnode->invalid_before = status->data_version; + inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); + + if (!vp->scb.have_cb) { + /* it's a symlink we just created (the fileserver + * didn't give us a callback) */ + vnode->cb_expires_at = ktime_get_real_seconds(); + } else { + vnode->cb_expires_at = vp->scb.callback.expires_at; + vnode->cb_server = op->server; + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } + + write_sequnlock(&vnode->cb_lock); + return 0; +} + +/* + * Update the core inode struct from a returned status record. + */ +static void afs_apply_status(struct afs_operation *op, + struct afs_vnode_param *vp) +{ + struct afs_file_status *status = &vp->scb.status; + struct afs_vnode *vnode = vp->vnode; + struct inode *inode = &vnode->vfs_inode; + struct timespec64 t; + umode_t mode; + bool data_changed = false; + bool change_size = vp->set_size; + + _enter("{%llx:%llu.%u} %s", + vp->fid.vid, vp->fid.vnode, vp->fid.unique, + op->type ? op->type->name : "???"); + + BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags)); + + if (status->type != vnode->status.type) { + pr_warn("Vnode %llx:%llx:%x changed type %u to %u\n", + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + status->type, vnode->status.type); + afs_protocol_error(NULL, afs_eproto_bad_status); + return; + } + + if (status->nlink != vnode->status.nlink) + set_nlink(inode, status->nlink); + + if (status->owner != vnode->status.owner) + inode->i_uid = make_kuid(&init_user_ns, status->owner); + + if (status->group != vnode->status.group) + inode->i_gid = make_kgid(&init_user_ns, status->group); + + if (status->mode != vnode->status.mode) { + mode = inode->i_mode; + mode &= ~S_IALLUGO; + mode |= status->mode & S_IALLUGO; + WRITE_ONCE(inode->i_mode, mode); + } + + t = status->mtime_client; + inode->i_mtime = t; + if (vp->update_ctime) + inode->i_ctime = op->ctime; + + if (vnode->status.data_version != status->data_version) + data_changed = true; + + vnode->status = *status; + + if (vp->dv_before + vp->dv_delta != status->data_version) { + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n", + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long)vp->dv_before + vp->dv_delta, + (unsigned long long)status->data_version, + op->type ? op->type->name : "???"); + + vnode->invalid_before = status->data_version; + if (vnode->status.type == AFS_FTYPE_DIR) { + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + afs_stat_v(vnode, n_inval); + } else { + set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); + } + change_size = true; + data_changed = true; + } else if (vnode->status.type == AFS_FTYPE_DIR) { + /* Expected directory change is handled elsewhere so + * that we can locally edit the directory and save on a + * download. + */ + if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + data_changed = false; + change_size = true; + } + + if (data_changed) { + inode_set_iversion_raw(inode, status->data_version); + + /* Only update the size if the data version jumped. If the + * file is being modified locally, then we might have our own + * idea of what the size should be that's not the same as + * what's on the server. + */ + if (change_size) { + afs_set_i_size(vnode, status->size); + inode->i_ctime = t; + inode->i_atime = t; + } + } +} + +/* + * Apply a callback to a vnode. + */ +static void afs_apply_callback(struct afs_operation *op, + struct afs_vnode_param *vp) +{ + struct afs_callback *cb = &vp->scb.callback; + struct afs_vnode *vnode = vp->vnode; + + if (!afs_cb_is_broken(vp->cb_break_before, vnode)) { + vnode->cb_expires_at = cb->expires_at; + vnode->cb_server = op->server; + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } +} + +/* + * Apply the received status and callback to an inode all in the same critical + * section to avoid races with afs_validate(). + */ +void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *vp) +{ + struct afs_vnode *vnode = vp->vnode; + + _enter(""); + + write_seqlock(&vnode->cb_lock); + + if (vp->scb.have_error) { + /* A YFS server will return this from RemoveFile2 and AFS and + * YFS will return this from InlineBulkStatus. + */ + if (vp->scb.status.abort_code == VNOVNODE) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_nlink(&vnode->vfs_inode); + __afs_break_callback(vnode, afs_cb_break_for_deleted); + op->flags &= ~AFS_OPERATION_DIR_CONFLICT; + } + } else if (vp->scb.have_status) { + if (vp->speculative && + (test_bit(AFS_VNODE_MODIFYING, &vnode->flags) || + vp->dv_before != vnode->status.data_version)) + /* Ignore the result of a speculative bulk status fetch + * if it splits around a modification op, thereby + * appearing to regress the data version. + */ + goto out; + afs_apply_status(op, vp); + if (vp->scb.have_cb) + afs_apply_callback(op, vp); + } else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) { + drop_nlink(&vnode->vfs_inode); + if (vnode->vfs_inode.i_nlink == 0) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + __afs_break_callback(vnode, afs_cb_break_for_deleted); + } + } + +out: + write_sequnlock(&vnode->cb_lock); + + if (vp->scb.have_status) + afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb); +} + +static void afs_fetch_status_success(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; + struct afs_vnode *vnode = vp->vnode; + int ret; + + if (vnode->vfs_inode.i_state & I_NEW) { + ret = afs_inode_init_from_status(op, vp, vnode); + op->error = ret; + if (ret == 0) + afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb); + } else { + afs_vnode_commit_status(op, vp); + } +} + +const struct afs_operation_ops afs_fetch_status_operation = { + .issue_afs_rpc = afs_fs_fetch_status, + .issue_yfs_rpc = yfs_fs_fetch_status, + .success = afs_fetch_status_success, + .aborted = afs_check_for_remote_deletion, +}; + +/* + * Fetch file status from the volume. + */ +int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new, + afs_access_t *_caller_access) +{ + struct afs_operation *op; + + _enter("%s,{%llx:%llu.%u,S=%lx}", + vnode->volume->name, + vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, + vnode->flags); + + op = afs_alloc_operation(key, vnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, vnode); + + op->nr_files = 1; + op->ops = &afs_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + + if (_caller_access) + *_caller_access = op->file[0].scb.status.caller_access; + return afs_put_operation(op); +} + +/* + * ilookup() comparator + */ +int afs_ilookup5_test_by_fid(struct inode *inode, void *opaque) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_fid *fid = opaque; + + return (fid->vnode == vnode->fid.vnode && + fid->vnode_hi == vnode->fid.vnode_hi && + fid->unique == vnode->fid.unique); +} + +/* + * iget5() comparator + */ +static int afs_iget5_test(struct inode *inode, void *opaque) +{ + struct afs_vnode_param *vp = opaque; + //struct afs_vnode *vnode = AFS_FS_I(inode); + + return afs_ilookup5_test_by_fid(inode, &vp->fid); +} + +/* + * iget5() inode initialiser + */ +static int afs_iget5_set(struct inode *inode, void *opaque) +{ + struct afs_vnode_param *vp = opaque; + struct afs_super_info *as = AFS_FS_S(inode->i_sb); + struct afs_vnode *vnode = AFS_FS_I(inode); + + vnode->volume = as->volume; + vnode->fid = vp->fid; + + /* YFS supports 96-bit vnode IDs, but Linux only supports + * 64-bit inode numbers. + */ + inode->i_ino = vnode->fid.vnode; + inode->i_generation = vnode->fid.unique; + return 0; +} + +/* + * Get a cache cookie for an inode. + */ +static void afs_get_inode_cache(struct afs_vnode *vnode) +{ +#ifdef CONFIG_AFS_FSCACHE + struct { + u32 vnode_id; + u32 unique; + u32 vnode_id_ext[2]; /* Allow for a 96-bit key */ + } __packed key; + struct afs_vnode_cache_aux aux; + + if (vnode->status.type == AFS_FTYPE_DIR) { + vnode->cache = NULL; + return; + } + + key.vnode_id = vnode->fid.vnode; + key.unique = vnode->fid.unique; + key.vnode_id_ext[0] = vnode->fid.vnode >> 32; + key.vnode_id_ext[1] = vnode->fid.vnode_hi; + aux.data_version = vnode->status.data_version; + + vnode->cache = fscache_acquire_cookie(vnode->volume->cache, + &afs_vnode_cache_index_def, + &key, sizeof(key), + &aux, sizeof(aux), + vnode, vnode->status.size, true); +#endif +} + +/* + * inode retrieval + */ +struct inode *afs_iget(struct afs_operation *op, struct afs_vnode_param *vp) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct super_block *sb = dvp->vnode->vfs_inode.i_sb; + struct afs_vnode *vnode; + struct inode *inode; + int ret; + + _enter(",{%llx:%llu.%u},,", vp->fid.vid, vp->fid.vnode, vp->fid.unique); + + inode = iget5_locked(sb, vp->fid.vnode, afs_iget5_test, afs_iget5_set, vp); + if (!inode) { + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } + + vnode = AFS_FS_I(inode); + + _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }", + inode, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); + + /* deal with an existing inode */ + if (!(inode->i_state & I_NEW)) { + _leave(" = %p", inode); + return inode; + } + + ret = afs_inode_init_from_status(op, vp, vnode); + if (ret < 0) + goto bad_inode; + + afs_get_inode_cache(vnode); + + /* success */ + clear_bit(AFS_VNODE_UNSET, &vnode->flags); + unlock_new_inode(inode); + _leave(" = %p", inode); + return inode; + + /* failure */ +bad_inode: + iget_failed(inode); + _leave(" = %d [bad]", ret); + return ERR_PTR(ret); +} + +static int afs_iget5_set_root(struct inode *inode, void *opaque) +{ + struct afs_super_info *as = AFS_FS_S(inode->i_sb); + struct afs_vnode *vnode = AFS_FS_I(inode); + + vnode->volume = as->volume; + vnode->fid.vid = as->volume->vid, + vnode->fid.vnode = 1; + vnode->fid.unique = 1; + inode->i_ino = 1; + inode->i_generation = 1; + return 0; +} + +/* + * Set up the root inode for a volume. This is always vnode 1, unique 1 within + * the volume. + */ +struct inode *afs_root_iget(struct super_block *sb, struct key *key) +{ + struct afs_super_info *as = AFS_FS_S(sb); + struct afs_operation *op; + struct afs_vnode *vnode; + struct inode *inode; + int ret; + + _enter(",{%llx},,", as->volume->vid); + + inode = iget5_locked(sb, 1, NULL, afs_iget5_set_root, NULL); + if (!inode) { + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } + + _debug("GOT ROOT INODE %p { vl=%llx }", inode, as->volume->vid); + + BUG_ON(!(inode->i_state & I_NEW)); + + vnode = AFS_FS_I(inode); + vnode->cb_v_break = as->volume->cb_v_break, + + op = afs_alloc_operation(key, as->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto error; + } + + afs_op_set_vnode(op, 0, vnode); + + op->nr_files = 1; + op->ops = &afs_fetch_status_operation; + ret = afs_do_sync_operation(op); + if (ret < 0) + goto error; + + afs_get_inode_cache(vnode); + + clear_bit(AFS_VNODE_UNSET, &vnode->flags); + unlock_new_inode(inode); + _leave(" = %p", inode); + return inode; + +error: + iget_failed(inode); + _leave(" = %d [bad]", ret); + return ERR_PTR(ret); +} + +/* + * mark the data attached to an inode as obsolete due to a write on the server + * - might also want to ditch all the outstanding writes and dirty pages + */ +static void afs_zap_data(struct afs_vnode *vnode) +{ + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); + +#ifdef CONFIG_AFS_FSCACHE + fscache_invalidate(vnode->cache); +#endif + + /* nuke all the non-dirty pages that aren't locked, mapped or being + * written back in a regular file and completely discard the pages in a + * directory or symlink */ + if (S_ISREG(vnode->vfs_inode.i_mode)) + invalidate_remote_inode(&vnode->vfs_inode); + else + invalidate_inode_pages2(vnode->vfs_inode.i_mapping); +} + +/* + * Get the server reinit counter for a vnode's current server. + */ +static bool afs_get_s_break_rcu(struct afs_vnode *vnode, unsigned int *_s_break) +{ + struct afs_server_list *slist = rcu_dereference(vnode->volume->servers); + struct afs_server *server; + int i; + + for (i = 0; i < slist->nr_servers; i++) { + server = slist->servers[i].server; + if (server == vnode->cb_server) { + *_s_break = READ_ONCE(server->cb_s_break); + return true; + } + } + + return false; +} + +/* + * Check the validity of a vnode/inode. + */ +bool afs_check_validity(struct afs_vnode *vnode) +{ + struct afs_volume *volume = vnode->volume; + enum afs_cb_break_reason need_clear = afs_cb_break_no_break; + time64_t now = ktime_get_real_seconds(); + bool valid; + unsigned int cb_break, cb_s_break, cb_v_break; + int seq = 0; + + do { + read_seqbegin_or_lock(&vnode->cb_lock, &seq); + cb_v_break = READ_ONCE(volume->cb_v_break); + cb_break = vnode->cb_break; + + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) && + afs_get_s_break_rcu(vnode, &cb_s_break)) { + if (vnode->cb_s_break != cb_s_break || + vnode->cb_v_break != cb_v_break) { + vnode->cb_s_break = cb_s_break; + vnode->cb_v_break = cb_v_break; + need_clear = afs_cb_break_for_vsbreak; + valid = false; + } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { + need_clear = afs_cb_break_for_zap; + valid = false; + } else if (vnode->cb_expires_at - 10 <= now) { + need_clear = afs_cb_break_for_lapsed; + valid = false; + } else { + valid = true; + } + } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + valid = true; + } else { + vnode->cb_v_break = cb_v_break; + valid = false; + } + + } while (need_seqretry(&vnode->cb_lock, seq)); + + done_seqretry(&vnode->cb_lock, seq); + + if (need_clear != afs_cb_break_no_break) { + write_seqlock(&vnode->cb_lock); + if (cb_break == vnode->cb_break) + __afs_break_callback(vnode, need_clear); + else + trace_afs_cb_miss(&vnode->fid, need_clear); + write_sequnlock(&vnode->cb_lock); + valid = false; + } + + return valid; +} + +/* + * validate a vnode/inode + * - there are several things we need to check + * - parent dir data changes (rm, rmdir, rename, mkdir, create, link, + * symlink) + * - parent dir metadata changed (security changes) + * - dentry data changed (write, truncate) + * - dentry metadata changed (security changes) + */ +int afs_validate(struct afs_vnode *vnode, struct key *key) +{ + bool valid; + int ret; + + _enter("{v={%llx:%llu} fl=%lx},%x", + vnode->fid.vid, vnode->fid.vnode, vnode->flags, + key_serial(key)); + + rcu_read_lock(); + valid = afs_check_validity(vnode); + rcu_read_unlock(); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + clear_nlink(&vnode->vfs_inode); + + if (valid) + goto valid; + + down_write(&vnode->validate_lock); + + /* if the promise has expired, we need to check the server again to get + * a new promise - note that if the (parent) directory's metadata was + * changed then the security may be different and we may no longer have + * access */ + if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + _debug("not promised"); + ret = afs_fetch_status(vnode, key, false, NULL); + if (ret < 0) { + if (ret == -ENOENT) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + ret = -ESTALE; + } + goto error_unlock; + } + _debug("new promise [fl=%lx]", vnode->flags); + } + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + _debug("file already deleted"); + ret = -ESTALE; + goto error_unlock; + } + + /* if the vnode's data version number changed then its contents are + * different */ + if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) + afs_zap_data(vnode); + up_write(&vnode->validate_lock); +valid: + _leave(" = 0"); + return 0; + +error_unlock: + up_write(&vnode->validate_lock); + _leave(" = %d", ret); + return ret; +} + +/* + * read the attributes of an inode + */ +int afs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + struct afs_vnode *vnode = AFS_FS_I(inode); + struct key *key; + int ret, seq = 0; + + _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); + + if (vnode->volume && + !(query_flags & AT_STATX_DONT_SYNC) && + !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + ret = afs_validate(vnode, key); + key_put(key); + if (ret < 0) + return ret; + } + + do { + read_seqbegin_or_lock(&vnode->cb_lock, &seq); + generic_fillattr(inode, stat); + if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) && + stat->nlink > 0) + stat->nlink -= 1; + } while (need_seqretry(&vnode->cb_lock, seq)); + + done_seqretry(&vnode->cb_lock, seq); + return 0; +} + +/* + * discard an AFS inode + */ +int afs_drop_inode(struct inode *inode) +{ + _enter(""); + + if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags)) + return generic_delete_inode(inode); + else + return generic_drop_inode(inode); +} + +/* + * clear an AFS inode + */ +void afs_evict_inode(struct inode *inode) +{ + struct afs_vnode *vnode; + + vnode = AFS_FS_I(inode); + + _enter("{%llx:%llu.%d}", + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique); + + _debug("CLEAR INODE %p", inode); + + ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); + + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); + + while (!list_empty(&vnode->wb_keys)) { + struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next, + struct afs_wb_key, vnode_link); + list_del(&wbk->vnode_link); + afs_put_wb_key(wbk); + } + +#ifdef CONFIG_AFS_FSCACHE + { + struct afs_vnode_cache_aux aux; + + aux.data_version = vnode->status.data_version; + fscache_relinquish_cookie(vnode->cache, &aux, + test_bit(AFS_VNODE_DELETED, &vnode->flags)); + vnode->cache = NULL; + } +#endif + + afs_prune_wb_keys(vnode); + afs_put_permits(rcu_access_pointer(vnode->permit_cache)); + key_put(vnode->silly_key); + vnode->silly_key = NULL; + key_put(vnode->lock_key); + vnode->lock_key = NULL; + _leave(""); +} + +static void afs_setattr_success(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct inode *inode = &vp->vnode->vfs_inode; + loff_t old_i_size = i_size_read(inode); + + op->setattr.old_i_size = old_i_size; + afs_vnode_commit_status(op, vp); + /* inode->i_size has now been changed. */ + + if (op->setattr.attr->ia_valid & ATTR_SIZE) { + loff_t size = op->setattr.attr->ia_size; + if (size > old_i_size) + pagecache_isize_extended(inode, old_i_size, size); + } +} + +static void afs_setattr_edit_file(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct inode *inode = &vp->vnode->vfs_inode; + + if (op->setattr.attr->ia_valid & ATTR_SIZE) { + loff_t size = op->setattr.attr->ia_size; + loff_t i_size = op->setattr.old_i_size; + + if (size < i_size) + truncate_pagecache(inode, size); + } +} + +static const struct afs_operation_ops afs_setattr_operation = { + .issue_afs_rpc = afs_fs_setattr, + .issue_yfs_rpc = yfs_fs_setattr, + .success = afs_setattr_success, + .edit_dir = afs_setattr_edit_file, +}; + +/* + * set the attributes of an inode + */ +int afs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct afs_operation *op; + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + int ret; + + _enter("{%llx:%llu},{n=%pd},%x", + vnode->fid.vid, vnode->fid.vnode, dentry, + attr->ia_valid); + + if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | + ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET | + ATTR_TOUCH))) { + _leave(" = 0 [unsupported]"); + return 0; + } + + if (attr->ia_valid & ATTR_SIZE) { + if (!S_ISREG(vnode->vfs_inode.i_mode)) + return -EISDIR; + + ret = inode_newsize_ok(&vnode->vfs_inode, attr->ia_size); + if (ret) + return ret; + + if (attr->ia_size == i_size_read(&vnode->vfs_inode)) + attr->ia_valid &= ~ATTR_SIZE; + } + + /* flush any dirty data outstanding on a regular file */ + if (S_ISREG(vnode->vfs_inode.i_mode)) + filemap_write_and_wait(vnode->vfs_inode.i_mapping); + + /* Prevent any new writebacks from starting whilst we do this. */ + down_write(&vnode->validate_lock); + + op = afs_alloc_operation(((attr->ia_valid & ATTR_FILE) ? + afs_file_key(attr->ia_file) : NULL), + vnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto out_unlock; + } + + afs_op_set_vnode(op, 0, vnode); + op->setattr.attr = attr; + + if (attr->ia_valid & ATTR_SIZE) { + op->file[0].dv_delta = 1; + op->file[0].set_size = true; + } + op->ctime = attr->ia_ctime; + op->file[0].update_ctime = 1; + op->file[0].modification = true; + + op->ops = &afs_setattr_operation; + ret = afs_do_sync_operation(op); + +out_unlock: + up_write(&vnode->validate_lock); + _leave(" = %d", ret); + return ret; +} diff --git a/fs/afs/internal.h b/fs/afs/internal.h new file mode 100644 index 000000000..31c7a5621 --- /dev/null +++ b/fs/afs/internal.h @@ -0,0 +1,1744 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* internal AFS stuff + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/ktime.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/rxrpc.h> +#include <linux/key.h> +#include <linux/workqueue.h> +#include <linux/sched.h> +#include <linux/fscache.h> +#include <linux/backing-dev.h> +#include <linux/uuid.h> +#include <linux/mm_types.h> +#include <linux/dns_resolver.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> + +#include "afs.h" +#include "afs_vl.h" + +#define AFS_CELL_MAX_ADDRS 15 + +struct pagevec; +struct afs_call; + +/* + * Partial file-locking emulation mode. (The problem being that AFS3 only + * allows whole-file locks and no upgrading/downgrading). + */ +enum afs_flock_mode { + afs_flock_mode_unset, + afs_flock_mode_local, /* Local locking only */ + afs_flock_mode_openafs, /* Don't get server lock for a partial lock */ + afs_flock_mode_strict, /* Always get a server lock for a partial lock */ + afs_flock_mode_write, /* Get an exclusive server lock for a partial lock */ +}; + +struct afs_fs_context { + bool force; /* T to force cell type */ + bool autocell; /* T if set auto mount operation */ + bool dyn_root; /* T if dynamic root */ + bool no_cell; /* T if the source is "none" (for dynroot) */ + enum afs_flock_mode flock_mode; /* Partial file-locking emulation mode */ + afs_voltype_t type; /* type of volume requested */ + unsigned int volnamesz; /* size of volume name */ + const char *volname; /* name of volume to mount */ + struct afs_net *net; /* the AFS net namespace stuff */ + struct afs_cell *cell; /* cell in which to find volume */ + struct afs_volume *volume; /* volume record */ + struct key *key; /* key to use for secure mounting */ +}; + +enum afs_call_state { + AFS_CALL_CL_REQUESTING, /* Client: Request is being sent */ + AFS_CALL_CL_AWAIT_REPLY, /* Client: Awaiting reply */ + AFS_CALL_CL_PROC_REPLY, /* Client: rxrpc call complete; processing reply */ + AFS_CALL_SV_AWAIT_OP_ID, /* Server: Awaiting op ID */ + AFS_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */ + AFS_CALL_SV_REPLYING, /* Server: Replying */ + AFS_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */ + AFS_CALL_COMPLETE, /* Completed or failed */ +}; + +/* + * List of server addresses. + */ +struct afs_addr_list { + struct rcu_head rcu; + refcount_t usage; + u32 version; /* Version */ + unsigned char max_addrs; + unsigned char nr_addrs; + unsigned char preferred; /* Preferred address */ + unsigned char nr_ipv4; /* Number of IPv4 addresses */ + enum dns_record_source source:8; + enum dns_lookup_status status:8; + unsigned long failed; /* Mask of addrs that failed locally/ICMP */ + unsigned long responded; /* Mask of addrs that responded */ + struct sockaddr_rxrpc addrs[]; +#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) +}; + +/* + * a record of an in-progress RxRPC call + */ +struct afs_call { + const struct afs_call_type *type; /* type of call */ + struct afs_addr_list *alist; /* Address is alist[addr_ix] */ + wait_queue_head_t waitq; /* processes awaiting completion */ + struct work_struct async_work; /* async I/O processor */ + struct work_struct work; /* actual work processor */ + struct rxrpc_call *rxcall; /* RxRPC call handle */ + struct key *key; /* security for this call */ + struct afs_net *net; /* The network namespace */ + struct afs_server *server; /* The fileserver record if fs op (pins ref) */ + struct afs_vlserver *vlserver; /* The vlserver record if vl op */ + void *request; /* request data (first part) */ + struct iov_iter def_iter; /* Default buffer/data iterator */ + struct iov_iter *iter; /* Iterator currently in use */ + union { /* Convenience for ->def_iter */ + struct kvec kvec[1]; + struct bio_vec bvec[1]; + }; + void *buffer; /* reply receive buffer */ + union { + long ret0; /* Value to reply with instead of 0 */ + struct afs_addr_list *ret_alist; + struct afs_vldb_entry *ret_vldb; + char *ret_str; + }; + struct afs_operation *op; + unsigned int server_index; + atomic_t usage; + enum afs_call_state state; + spinlock_t state_lock; + int error; /* error code */ + u32 abort_code; /* Remote abort ID or 0 */ + unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */ + unsigned request_size; /* size of request data */ + unsigned reply_max; /* maximum size of reply */ + unsigned count2; /* count used in unmarshalling */ + unsigned char unmarshall; /* unmarshalling phase */ + unsigned char addr_ix; /* Address in ->alist */ + bool drop_ref; /* T if need to drop ref for incoming call */ + bool send_pages; /* T if data from mapping should be sent */ + bool need_attention; /* T if RxRPC poked us */ + bool async; /* T if asynchronous */ + bool upgrade; /* T to request service upgrade */ + bool intr; /* T if interruptible */ + bool unmarshalling_error; /* T if an unmarshalling error occurred */ + u16 service_id; /* Actual service ID (after upgrade) */ + unsigned int debug_id; /* Trace ID */ + u32 operation_ID; /* operation ID for an incoming call */ + u32 count; /* count for use in unmarshalling */ + union { /* place to extract temporary data */ + struct { + __be32 tmp_u; + __be32 tmp; + } __attribute__((packed)); + __be64 tmp64; + }; + ktime_t issue_time; /* Time of issue of operation */ +}; + +struct afs_call_type { + const char *name; + unsigned int op; /* Really enum afs_fs_operation */ + + /* deliver request or reply data to an call + * - returning an error will cause the call to be aborted + */ + int (*deliver)(struct afs_call *call); + + /* clean up a call */ + void (*destructor)(struct afs_call *call); + + /* Work function */ + void (*work)(struct work_struct *work); + + /* Call done function (gets called immediately on success or failure) */ + void (*done)(struct afs_call *call); +}; + +/* + * Key available for writeback on a file. + */ +struct afs_wb_key { + refcount_t usage; + struct key *key; + struct list_head vnode_link; /* Link in vnode->wb_keys */ +}; + +/* + * AFS open file information record. Pointed to by file->private_data. + */ +struct afs_file { + struct key *key; /* The key this file was opened with */ + struct afs_wb_key *wb; /* Writeback key record for this file */ +}; + +static inline struct key *afs_file_key(struct file *file) +{ + struct afs_file *af = file->private_data; + + return af->key; +} + +/* + * Record of an outstanding read operation on a vnode. + */ +struct afs_read { + loff_t pos; /* Where to start reading */ + loff_t len; /* How much we're asking for */ + loff_t actual_len; /* How much we're actually getting */ + loff_t remain; /* Amount remaining */ + loff_t file_size; /* File size returned by server */ + afs_dataversion_t data_version; /* Version number returned by server */ + refcount_t usage; + unsigned int index; /* Which page we're reading into */ + unsigned int nr_pages; + unsigned int offset; /* offset into current page */ + struct afs_vnode *vnode; + void (*page_done)(struct afs_read *); + struct page **pages; + struct page *array[]; +}; + +/* + * AFS superblock private data + * - there's one superblock per volume + */ +struct afs_super_info { + struct net *net_ns; /* Network namespace */ + struct afs_cell *cell; /* The cell in which the volume resides */ + struct afs_volume *volume; /* volume record */ + enum afs_flock_mode flock_mode:8; /* File locking emulation mode */ + bool dyn_root; /* True if dynamic root */ +}; + +static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) +{ + return sb->s_fs_info; +} + +extern struct file_system_type afs_fs_type; + +/* + * Set of substitutes for @sys. + */ +struct afs_sysnames { +#define AFS_NR_SYSNAME 16 + char *subs[AFS_NR_SYSNAME]; + refcount_t usage; + unsigned short nr; + char blank[1]; +}; + +/* + * AFS network namespace record. + */ +struct afs_net { + struct net *net; /* Backpointer to the owning net namespace */ + struct afs_uuid uuid; + bool live; /* F if this namespace is being removed */ + + /* AF_RXRPC I/O stuff */ + struct socket *socket; + struct afs_call *spare_incoming_call; + struct work_struct charge_preallocation_work; + struct mutex socket_mutex; + atomic_t nr_outstanding_calls; + atomic_t nr_superblocks; + + /* Cell database */ + struct rb_root cells; + struct afs_cell *ws_cell; + struct work_struct cells_manager; + struct timer_list cells_timer; + atomic_t cells_outstanding; + struct rw_semaphore cells_lock; + struct mutex cells_alias_lock; + + struct mutex proc_cells_lock; + struct hlist_head proc_cells; + + /* Known servers. Theoretically each fileserver can only be in one + * cell, but in practice, people create aliases and subsets and there's + * no easy way to distinguish them. + */ + seqlock_t fs_lock; /* For fs_servers, fs_probe_*, fs_proc */ + struct rb_root fs_servers; /* afs_server (by server UUID or address) */ + struct list_head fs_probe_fast; /* List of afs_server to probe at 30s intervals */ + struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */ + struct hlist_head fs_proc; /* procfs servers list */ + + struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */ + struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */ + seqlock_t fs_addr_lock; /* For fs_addresses[46] */ + + struct work_struct fs_manager; + struct timer_list fs_timer; + + struct work_struct fs_prober; + struct timer_list fs_probe_timer; + atomic_t servers_outstanding; + + /* File locking renewal management */ + struct mutex lock_manager_mutex; + + /* Misc */ + struct super_block *dynroot_sb; /* Dynamic root mount superblock */ + struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ + struct afs_sysnames *sysnames; + rwlock_t sysnames_lock; + + /* Statistics counters */ + atomic_t n_lookup; /* Number of lookups done */ + atomic_t n_reval; /* Number of dentries needing revalidation */ + atomic_t n_inval; /* Number of invalidations by the server */ + atomic_t n_relpg; /* Number of invalidations by releasepage */ + atomic_t n_read_dir; /* Number of directory pages read */ + atomic_t n_dir_cr; /* Number of directory entry creation edits */ + atomic_t n_dir_rm; /* Number of directory entry removal edits */ + atomic_t n_stores; /* Number of store ops */ + atomic_long_t n_store_bytes; /* Number of bytes stored */ + atomic_long_t n_fetch_bytes; /* Number of bytes fetched */ + atomic_t n_fetches; /* Number of data fetch ops */ +}; + +extern const char afs_init_sysname[]; + +enum afs_cell_state { + AFS_CELL_UNSET, + AFS_CELL_ACTIVATING, + AFS_CELL_ACTIVE, + AFS_CELL_DEACTIVATING, + AFS_CELL_INACTIVE, + AFS_CELL_FAILED, + AFS_CELL_REMOVED, +}; + +/* + * AFS cell record. + * + * This is a tricky concept to get right as it is possible to create aliases + * simply by pointing AFSDB/SRV records for two names at the same set of VL + * servers; it is also possible to do things like setting up two sets of VL + * servers, one of which provides a superset of the volumes provided by the + * other (for internal/external division, for example). + * + * Cells only exist in the sense that (a) a cell's name maps to a set of VL + * servers and (b) a cell's name is used by the client to select the key to use + * for authentication and encryption. The cell name is not typically used in + * the protocol. + * + * Two cells are determined to be aliases if they have an explicit alias (YFS + * only), share any VL servers in common or have at least one volume in common. + * "In common" means that the address list of the VL servers or the fileservers + * share at least one endpoint. + */ +struct afs_cell { + union { + struct rcu_head rcu; + struct rb_node net_node; /* Node in net->cells */ + }; + struct afs_net *net; + struct afs_cell *alias_of; /* The cell this is an alias of */ + struct afs_volume *root_volume; /* The root.cell volume if there is one */ + struct key *anonymous_key; /* anonymous user key for this cell */ + struct work_struct manager; /* Manager for init/deinit/dns */ + struct hlist_node proc_link; /* /proc cell list link */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ +#endif + time64_t dns_expiry; /* Time AFSDB/SRV record expires */ + time64_t last_inactive; /* Time of last drop of usage count */ + atomic_t ref; /* Struct refcount */ + atomic_t active; /* Active usage counter */ + unsigned long flags; +#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */ +#define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */ +#define AFS_CELL_FL_CHECK_ALIAS 2 /* Need to check for aliases */ + enum afs_cell_state state; + short error; + enum dns_record_source dns_source:8; /* Latest source of data from lookup */ + enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */ + unsigned int dns_lookup_count; /* Counter of DNS lookups */ + unsigned int debug_id; + + /* The volumes belonging to this cell */ + struct rb_root volumes; /* Tree of volumes on this server */ + struct hlist_head proc_volumes; /* procfs volume list */ + seqlock_t volume_lock; /* For volumes */ + + /* Active fileserver interaction state. */ + struct rb_root fs_servers; /* afs_server (by server UUID) */ + seqlock_t fs_lock; /* For fs_servers */ + + /* VL server list. */ + rwlock_t vl_servers_lock; /* Lock on vl_servers */ + struct afs_vlserver_list __rcu *vl_servers; + + u8 name_len; /* Length of name */ + char *name; /* Cell name, case-flattened and NUL-padded */ +}; + +/* + * Volume Location server record. + */ +struct afs_vlserver { + struct rcu_head rcu; + struct afs_addr_list __rcu *addresses; /* List of addresses for this VL server */ + unsigned long flags; +#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */ +#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */ +#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */ +#define AFS_VLSERVER_FL_RESPONDING 3 /* VL server is responding */ + rwlock_t lock; /* Lock on addresses */ + atomic_t usage; + unsigned int rtt; /* Server's current RTT in uS */ + + /* Probe state */ + wait_queue_head_t probe_wq; + atomic_t probe_outstanding; + spinlock_t probe_lock; + struct { + unsigned int rtt; /* RTT in uS */ + u32 abort_code; + short error; + unsigned short flags; +#define AFS_VLSERVER_PROBE_RESPONDED 0x01 /* At least once response (may be abort) */ +#define AFS_VLSERVER_PROBE_IS_YFS 0x02 /* The peer appears to be YFS */ +#define AFS_VLSERVER_PROBE_NOT_YFS 0x04 /* The peer appears not to be YFS */ +#define AFS_VLSERVER_PROBE_LOCAL_FAILURE 0x08 /* A local failure prevented a probe */ + } probe; + + u16 port; + u16 name_len; /* Length of name */ + char name[]; /* Server name, case-flattened */ +}; + +/* + * Weighted list of Volume Location servers. + */ +struct afs_vlserver_entry { + u16 priority; /* Preference (as SRV) */ + u16 weight; /* Weight (as SRV) */ + enum dns_record_source source:8; + enum dns_lookup_status status:8; + struct afs_vlserver *server; +}; + +struct afs_vlserver_list { + struct rcu_head rcu; + atomic_t usage; + u8 nr_servers; + u8 index; /* Server currently in use */ + u8 preferred; /* Preferred server */ + enum dns_record_source source:8; + enum dns_lookup_status status:8; + rwlock_t lock; + struct afs_vlserver_entry servers[]; +}; + +/* + * Cached VLDB entry. + * + * This is pointed to by cell->vldb_entries, indexed by name. + */ +struct afs_vldb_entry { + afs_volid_t vid[3]; /* Volume IDs for R/W, R/O and Bak volumes */ + + unsigned long flags; +#define AFS_VLDB_HAS_RW 0 /* - R/W volume exists */ +#define AFS_VLDB_HAS_RO 1 /* - R/O volume exists */ +#define AFS_VLDB_HAS_BAK 2 /* - Backup volume exists */ +#define AFS_VLDB_QUERY_VALID 3 /* - Record is valid */ +#define AFS_VLDB_QUERY_ERROR 4 /* - VL server returned error */ + + uuid_t fs_server[AFS_NMAXNSERVERS]; + u32 addr_version[AFS_NMAXNSERVERS]; /* Registration change counters */ + u8 fs_mask[AFS_NMAXNSERVERS]; +#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */ +#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */ +#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */ + short error; + u8 nr_servers; /* Number of server records */ + u8 name_len; + u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ +}; + +/* + * Record of fileserver with which we're actively communicating. + */ +struct afs_server { + struct rcu_head rcu; + union { + uuid_t uuid; /* Server ID */ + struct afs_uuid _uuid; + }; + + struct afs_addr_list __rcu *addresses; + struct afs_cell *cell; /* Cell to which belongs (pins ref) */ + struct rb_node uuid_rb; /* Link in net->fs_servers */ + struct afs_server __rcu *uuid_next; /* Next server with same UUID */ + struct afs_server *uuid_prev; /* Previous server with same UUID */ + struct list_head probe_link; /* Link in net->fs_probe_list */ + struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ + struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ + struct hlist_node proc_link; /* Link in net->fs_proc */ + struct afs_server *gc_next; /* Next server in manager's list */ + time64_t unuse_time; /* Time at which last unused */ + unsigned long flags; +#define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */ +#define AFS_SERVER_FL_UPDATING 1 +#define AFS_SERVER_FL_NEEDS_UPDATE 2 /* Fileserver address list is out of date */ +#define AFS_SERVER_FL_NOT_READY 4 /* The record is not ready for use */ +#define AFS_SERVER_FL_NOT_FOUND 5 /* VL server says no such server */ +#define AFS_SERVER_FL_VL_FAIL 6 /* Failed to access VL server */ +#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ +#define AFS_SERVER_FL_IS_YFS 16 /* Server is YFS not AFS */ +#define AFS_SERVER_FL_NO_IBULK 17 /* Fileserver doesn't support FS.InlineBulkStatus */ +#define AFS_SERVER_FL_NO_RM2 18 /* Fileserver doesn't support YFS.RemoveFile2 */ + atomic_t ref; /* Object refcount */ + atomic_t active; /* Active user count */ + u32 addr_version; /* Address list version */ + unsigned int rtt; /* Server's current RTT in uS */ + unsigned int debug_id; /* Debugging ID for traces */ + + /* file service access */ + rwlock_t fs_lock; /* access lock */ + + /* callback promise management */ + unsigned cb_s_break; /* Break-everything counter. */ + + /* Probe state */ + unsigned long probed_at; /* Time last probe was dispatched (jiffies) */ + wait_queue_head_t probe_wq; + atomic_t probe_outstanding; + spinlock_t probe_lock; + struct { + unsigned int rtt; /* RTT in uS */ + u32 abort_code; + short error; + bool responded:1; + bool is_yfs:1; + bool not_yfs:1; + bool local_failure:1; + } probe; +}; + +/* + * Replaceable volume server list. + */ +struct afs_server_entry { + struct afs_server *server; +}; + +struct afs_server_list { + struct rcu_head rcu; + afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */ + refcount_t usage; + unsigned char nr_servers; + unsigned char preferred; /* Preferred server */ + unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ + unsigned int seq; /* Set to ->servers_seq when installed */ + rwlock_t lock; + struct afs_server_entry servers[]; +}; + +/* + * Live AFS volume management. + */ +struct afs_volume { + union { + struct rcu_head rcu; + afs_volid_t vid; /* volume ID */ + }; + atomic_t usage; + time64_t update_at; /* Time at which to next update */ + struct afs_cell *cell; /* Cell to which belongs (pins ref) */ + struct rb_node cell_node; /* Link in cell->volumes */ + struct hlist_node proc_link; /* Link in cell->proc_volumes */ + struct super_block __rcu *sb; /* Superblock on which inodes reside */ + unsigned long flags; +#define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */ +#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */ +#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */ +#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */ +#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */ +#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */ +#define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ +#endif + struct afs_server_list __rcu *servers; /* List of servers on which volume resides */ + rwlock_t servers_lock; /* Lock for ->servers */ + unsigned int servers_seq; /* Incremented each time ->servers changes */ + + unsigned cb_v_break; /* Break-everything counter. */ + rwlock_t cb_v_break_lock; + + afs_voltype_t type; /* type of volume */ + char type_force; /* force volume type (suppress R/O -> R/W) */ + u8 name_len; + u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ +}; + +enum afs_lock_state { + AFS_VNODE_LOCK_NONE, /* The vnode has no lock on the server */ + AFS_VNODE_LOCK_WAITING_FOR_CB, /* We're waiting for the server to break the callback */ + AFS_VNODE_LOCK_SETTING, /* We're asking the server for a lock */ + AFS_VNODE_LOCK_GRANTED, /* We have a lock on the server */ + AFS_VNODE_LOCK_EXTENDING, /* We're extending a lock on the server */ + AFS_VNODE_LOCK_NEED_UNLOCK, /* We need to unlock on the server */ + AFS_VNODE_LOCK_UNLOCKING, /* We're telling the server to unlock */ + AFS_VNODE_LOCK_DELETED, /* The vnode has been deleted whilst we have a lock */ +}; + +/* + * AFS inode private data. + * + * Note that afs_alloc_inode() *must* reset anything that could incorrectly + * leak from one inode to another. + */ +struct afs_vnode { + struct inode vfs_inode; /* the VFS's inode record */ + + struct afs_volume *volume; /* volume on which vnode resides */ + struct afs_fid fid; /* the file identifier for this inode */ + struct afs_file_status status; /* AFS status info for this file */ + afs_dataversion_t invalid_before; /* Child dentries are invalid before this */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ +#endif + struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ + struct mutex io_lock; /* Lock for serialising I/O on this mutex */ + struct rw_semaphore validate_lock; /* lock for validating this vnode */ + struct rw_semaphore rmdir_lock; /* Lock for rmdir vs sillyrename */ + struct key *silly_key; /* Silly rename key */ + spinlock_t wb_lock; /* lock for wb_keys */ + spinlock_t lock; /* waitqueue/flags lock */ + unsigned long flags; +#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */ +#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */ +#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */ +#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ +#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ +#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ +#define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */ +#define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */ +#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */ +#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */ +#define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */ + + struct list_head wb_keys; /* List of keys available for writeback */ + struct list_head pending_locks; /* locks waiting to be granted */ + struct list_head granted_locks; /* locks granted on this file */ + struct delayed_work lock_work; /* work to be done in locking */ + struct key *lock_key; /* Key to be used in lock ops */ + ktime_t locked_at; /* Time at which lock obtained */ + enum afs_lock_state lock_state : 8; + afs_lock_type_t lock_type : 8; + + /* outstanding callback notification on this file */ + void *cb_server; /* Server with callback/filelock */ + unsigned int cb_s_break; /* Mass break counter on ->server */ + unsigned int cb_v_break; /* Mass break counter on ->volume */ + unsigned int cb_break; /* Break counter on vnode */ + seqlock_t cb_lock; /* Lock for ->cb_server, ->status, ->cb_*break */ + + time64_t cb_expires_at; /* time at which callback expires */ +}; + +static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode) +{ +#ifdef CONFIG_AFS_FSCACHE + return vnode->cache; +#else + return NULL; +#endif +} + +/* + * cached security record for one user's attempt to access a vnode + */ +struct afs_permit { + struct key *key; /* RxRPC ticket holding a security context */ + afs_access_t access; /* CallerAccess value for this key */ +}; + +/* + * Immutable cache of CallerAccess records from attempts to access vnodes. + * These may be shared between multiple vnodes. + */ +struct afs_permits { + struct rcu_head rcu; + struct hlist_node hash_node; /* Link in hash */ + unsigned long h; /* Hash value for this permit list */ + refcount_t usage; + unsigned short nr_permits; /* Number of records */ + bool invalidated; /* Invalidated due to key change */ + struct afs_permit permits[]; /* List of permits sorted by key pointer */ +}; + +/* + * Error prioritisation and accumulation. + */ +struct afs_error { + short error; /* Accumulated error */ + bool responded; /* T if server responded */ +}; + +/* + * Cursor for iterating over a server's address list. + */ +struct afs_addr_cursor { + struct afs_addr_list *alist; /* Current address list (pins ref) */ + unsigned long tried; /* Tried addresses */ + signed char index; /* Current address */ + bool responded; /* T if the current address responded */ + unsigned short nr_iterations; /* Number of address iterations */ + short error; + u32 abort_code; +}; + +/* + * Cursor for iterating over a set of volume location servers. + */ +struct afs_vl_cursor { + struct afs_addr_cursor ac; + struct afs_cell *cell; /* The cell we're querying */ + struct afs_vlserver_list *server_list; /* Current server list (pins ref) */ + struct afs_vlserver *server; /* Server on which this resides */ + struct key *key; /* Key for the server */ + unsigned long untried; /* Bitmask of untried servers */ + short index; /* Current server */ + short error; + unsigned short flags; +#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */ +#define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */ +#define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */ + unsigned short nr_iterations; /* Number of server iterations */ +}; + +/* + * Fileserver operation methods. + */ +struct afs_operation_ops { + void (*issue_afs_rpc)(struct afs_operation *op); + void (*issue_yfs_rpc)(struct afs_operation *op); + void (*success)(struct afs_operation *op); + void (*aborted)(struct afs_operation *op); + void (*edit_dir)(struct afs_operation *op); + void (*put)(struct afs_operation *op); +}; + +struct afs_vnode_param { + struct afs_vnode *vnode; + struct afs_fid fid; /* Fid to access */ + struct afs_status_cb scb; /* Returned status and callback promise */ + afs_dataversion_t dv_before; /* Data version before the call */ + unsigned int cb_break_before; /* cb_break + cb_s_break before the call */ + u8 dv_delta; /* Expected change in data version */ + bool put_vnode:1; /* T if we have a ref on the vnode */ + bool need_io_lock:1; /* T if we need the I/O lock on this */ + bool update_ctime:1; /* Need to update the ctime */ + bool set_size:1; /* Must update i_size */ + bool op_unlinked:1; /* True if file was unlinked by op */ + bool speculative:1; /* T if speculative status fetch (no vnode lock) */ + bool modification:1; /* Set if the content gets modified */ +}; + +/* + * Fileserver operation wrapper, handling server and address rotation + * asynchronously. May make simultaneous calls to multiple servers. + */ +struct afs_operation { + struct afs_net *net; /* Network namespace */ + struct key *key; /* Key for the cell */ + const struct afs_call_type *type; /* Type of call done */ + const struct afs_operation_ops *ops; + + /* Parameters/results for the operation */ + struct afs_volume *volume; /* Volume being accessed */ + struct afs_vnode_param file[2]; + struct afs_vnode_param *more_files; + struct afs_volsync volsync; + struct dentry *dentry; /* Dentry to be altered */ + struct dentry *dentry_2; /* Second dentry to be altered */ + struct timespec64 mtime; /* Modification time to record */ + struct timespec64 ctime; /* Change time to set */ + short nr_files; /* Number of entries in file[], more_files */ + short error; + unsigned int debug_id; + + unsigned int cb_v_break; /* Volume break counter before op */ + unsigned int cb_s_break; /* Server break counter before op */ + + union { + struct { + int which; /* Which ->file[] to fetch for */ + } fetch_status; + struct { + int reason; /* enum afs_edit_dir_reason */ + mode_t mode; + const char *symlink; + } create; + struct { + bool need_rehash; + } unlink; + struct { + struct dentry *rehash; + struct dentry *tmp; + bool new_negative; + } rename; + struct { + struct afs_read *req; + } fetch; + struct { + afs_lock_type_t type; + } lock; + struct { + struct address_space *mapping; /* Pages being written from */ + pgoff_t first; /* first page in mapping to deal with */ + pgoff_t last; /* last page in mapping to deal with */ + unsigned first_offset; /* offset into mapping[first] */ + unsigned last_to; /* amount of mapping[last] */ + bool laundering; /* Laundering page, PG_writeback not set */ + } store; + struct { + struct iattr *attr; + loff_t old_i_size; + } setattr; + struct afs_acl *acl; + struct yfs_acl *yacl; + struct { + struct afs_volume_status vs; + struct kstatfs *buf; + } volstatus; + }; + + /* Fileserver iteration state */ + struct afs_addr_cursor ac; + struct afs_server_list *server_list; /* Current server list (pins ref) */ + struct afs_server *server; /* Server we're using (ref pinned by server_list) */ + struct afs_call *call; + unsigned long untried; /* Bitmask of untried servers */ + short index; /* Current server */ + unsigned short nr_iterations; /* Number of server iterations */ + + unsigned int flags; +#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */ +#define AFS_OPERATION_VBUSY 0x0002 /* Set if seen VBUSY */ +#define AFS_OPERATION_VMOVED 0x0004 /* Set if seen VMOVED */ +#define AFS_OPERATION_VNOVOL 0x0008 /* Set if seen VNOVOL */ +#define AFS_OPERATION_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ +#define AFS_OPERATION_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ +#define AFS_OPERATION_UNINTR 0x0040 /* Set if op is uninterruptible */ +#define AFS_OPERATION_DOWNGRADE 0x0080 /* Set to retry with downgraded opcode */ +#define AFS_OPERATION_LOCK_0 0x0100 /* Set if have io_lock on file[0] */ +#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */ +#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */ +#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */ +#define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */ +}; + +/* + * Cache auxiliary data. + */ +struct afs_vnode_cache_aux { + u64 data_version; +} __packed; + +/* + * We use page->private to hold the amount of the page that we've written to, + * splitting the field into two parts. However, we need to represent a range + * 0...PAGE_SIZE, so we reduce the resolution if the size of the page + * exceeds what we can encode. + */ +#ifdef CONFIG_64BIT +#define __AFS_PAGE_PRIV_MASK 0x7fffffffUL +#define __AFS_PAGE_PRIV_SHIFT 32 +#define __AFS_PAGE_PRIV_MMAPPED 0x80000000UL +#else +#define __AFS_PAGE_PRIV_MASK 0x7fffUL +#define __AFS_PAGE_PRIV_SHIFT 16 +#define __AFS_PAGE_PRIV_MMAPPED 0x8000UL +#endif + +static inline unsigned int afs_page_dirty_resolution(void) +{ + int shift = PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1); + return (shift > 0) ? shift : 0; +} + +static inline size_t afs_page_dirty_from(unsigned long priv) +{ + unsigned long x = priv & __AFS_PAGE_PRIV_MASK; + + /* The lower bound is inclusive */ + return x << afs_page_dirty_resolution(); +} + +static inline size_t afs_page_dirty_to(unsigned long priv) +{ + unsigned long x = (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK; + + /* The upper bound is immediately beyond the region */ + return (x + 1) << afs_page_dirty_resolution(); +} + +static inline unsigned long afs_page_dirty(size_t from, size_t to) +{ + unsigned int res = afs_page_dirty_resolution(); + from >>= res; + to = (to - 1) >> res; + return (to << __AFS_PAGE_PRIV_SHIFT) | from; +} + +static inline unsigned long afs_page_dirty_mmapped(unsigned long priv) +{ + return priv | __AFS_PAGE_PRIV_MMAPPED; +} + +static inline bool afs_is_page_dirty_mmapped(unsigned long priv) +{ + return priv & __AFS_PAGE_PRIV_MMAPPED; +} + +#include <trace/events/afs.h> + +/*****************************************************************************/ +/* + * addr_list.c + */ +static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist) +{ + if (alist) + refcount_inc(&alist->usage); + return alist; +} +extern struct afs_addr_list *afs_alloc_addrlist(unsigned int, + unsigned short, + unsigned short); +extern void afs_put_addrlist(struct afs_addr_list *); +extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *, + const char *, size_t, char, + unsigned short, unsigned short); +extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *); +extern bool afs_iterate_addresses(struct afs_addr_cursor *); +extern int afs_end_cursor(struct afs_addr_cursor *); + +extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16); +extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); + +/* + * cache.c + */ +#ifdef CONFIG_AFS_FSCACHE +extern struct fscache_netfs afs_cache_netfs; +extern struct fscache_cookie_def afs_cell_cache_index_def; +extern struct fscache_cookie_def afs_volume_cache_index_def; +extern struct fscache_cookie_def afs_vnode_cache_index_def; +#else +#define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL) +#define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL) +#define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL) +#endif + +/* + * callback.c + */ +extern void afs_init_callback_state(struct afs_server *); +extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); +extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); +extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break *); + +static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) +{ + return vnode->cb_break + vnode->cb_v_break; +} + +static inline bool afs_cb_is_broken(unsigned int cb_break, + const struct afs_vnode *vnode) +{ + return cb_break != (vnode->cb_break + vnode->volume->cb_v_break); +} + +/* + * cell.c + */ +extern int afs_cell_init(struct afs_net *, const char *); +extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned, + enum afs_cell_trace); +extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, + const char *, bool); +extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_unuse_cell(struct afs_net *, struct afs_cell *, enum afs_cell_trace); +extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_manage_cells(struct work_struct *); +extern void afs_cells_timer(struct timer_list *); +extern void __net_exit afs_cell_purge(struct afs_net *); + +/* + * cmservice.c + */ +extern bool afs_cm_incoming_call(struct afs_call *); + +/* + * dir.c + */ +extern const struct file_operations afs_dir_file_operations; +extern const struct inode_operations afs_dir_inode_operations; +extern const struct address_space_operations afs_dir_aops; +extern const struct dentry_operations afs_fs_dentry_operations; + +extern void afs_d_release(struct dentry *); +extern void afs_check_for_remote_deletion(struct afs_operation *); + +/* + * dir_edit.c + */ +extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *, + enum afs_edit_dir_reason); +extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason); + +/* + * dir_silly.c + */ +extern int afs_sillyrename(struct afs_vnode *, struct afs_vnode *, + struct dentry *, struct key *); +extern int afs_silly_iput(struct dentry *, struct inode *); + +/* + * dynroot.c + */ +extern const struct inode_operations afs_dynroot_inode_operations; +extern const struct dentry_operations afs_dynroot_dentry_operations; + +extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *); +extern int afs_dynroot_mkdir(struct afs_net *, struct afs_cell *); +extern void afs_dynroot_rmdir(struct afs_net *, struct afs_cell *); +extern int afs_dynroot_populate(struct super_block *); +extern void afs_dynroot_depopulate(struct super_block *); + +/* + * file.c + */ +extern const struct address_space_operations afs_fs_aops; +extern const struct inode_operations afs_file_inode_operations; +extern const struct file_operations afs_file_operations; + +extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *); +extern void afs_put_wb_key(struct afs_wb_key *); +extern int afs_open(struct inode *, struct file *); +extern int afs_release(struct inode *, struct file *); +extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *); +extern int afs_page_filler(void *, struct page *); +extern void afs_put_read(struct afs_read *); + +static inline struct afs_read *afs_get_read(struct afs_read *req) +{ + refcount_inc(&req->usage); + return req; +} + +/* + * flock.c + */ +extern struct workqueue_struct *afs_lock_manager; + +extern void afs_lock_op_done(struct afs_call *); +extern void afs_lock_work(struct work_struct *); +extern void afs_lock_may_be_available(struct afs_vnode *); +extern int afs_lock(struct file *, int, struct file_lock *); +extern int afs_flock(struct file *, int, struct file_lock *); + +/* + * fsclient.c + */ +extern void afs_fs_fetch_status(struct afs_operation *); +extern void afs_fs_fetch_data(struct afs_operation *); +extern void afs_fs_create_file(struct afs_operation *); +extern void afs_fs_make_dir(struct afs_operation *); +extern void afs_fs_remove_file(struct afs_operation *); +extern void afs_fs_remove_dir(struct afs_operation *); +extern void afs_fs_link(struct afs_operation *); +extern void afs_fs_symlink(struct afs_operation *); +extern void afs_fs_rename(struct afs_operation *); +extern void afs_fs_store_data(struct afs_operation *); +extern void afs_fs_setattr(struct afs_operation *); +extern void afs_fs_get_volume_status(struct afs_operation *); +extern void afs_fs_set_lock(struct afs_operation *); +extern void afs_fs_extend_lock(struct afs_operation *); +extern void afs_fs_release_lock(struct afs_operation *); +extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, + struct afs_addr_cursor *, struct key *); +extern bool afs_fs_get_capabilities(struct afs_net *, struct afs_server *, + struct afs_addr_cursor *, struct key *); +extern void afs_fs_inline_bulk_status(struct afs_operation *); + +struct afs_acl { + u32 size; + u8 data[]; +}; + +extern void afs_fs_fetch_acl(struct afs_operation *); +extern void afs_fs_store_acl(struct afs_operation *); + +/* + * fs_operation.c + */ +extern struct afs_operation *afs_alloc_operation(struct key *, struct afs_volume *); +extern int afs_put_operation(struct afs_operation *); +extern bool afs_begin_vnode_operation(struct afs_operation *); +extern void afs_wait_for_operation(struct afs_operation *); +extern int afs_do_sync_operation(struct afs_operation *); + +static inline void afs_op_nomem(struct afs_operation *op) +{ + op->error = -ENOMEM; +} + +static inline void afs_op_set_vnode(struct afs_operation *op, unsigned int n, + struct afs_vnode *vnode) +{ + op->file[n].vnode = vnode; + op->file[n].need_io_lock = true; +} + +static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n, + const struct afs_fid *fid) +{ + op->file[n].fid = *fid; +} + +/* + * fs_probe.c + */ +extern void afs_fileserver_probe_result(struct afs_call *); +extern void afs_fs_probe_fileserver(struct afs_net *, struct afs_server *, struct key *, bool); +extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long); +extern void afs_probe_fileserver(struct afs_net *, struct afs_server *); +extern void afs_fs_probe_dispatcher(struct work_struct *); +extern int afs_wait_for_one_fs_probe(struct afs_server *, bool); +extern void afs_fs_probe_cleanup(struct afs_net *); + +/* + * inode.c + */ +extern const struct afs_operation_ops afs_fetch_status_operation; + +extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *); +extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *); +extern int afs_ilookup5_test_by_fid(struct inode *, void *); +extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool); +extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *); +extern struct inode *afs_root_iget(struct super_block *, struct key *); +extern bool afs_check_validity(struct afs_vnode *); +extern int afs_validate(struct afs_vnode *, struct key *); +extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int); +extern int afs_setattr(struct dentry *, struct iattr *); +extern void afs_evict_inode(struct inode *); +extern int afs_drop_inode(struct inode *); + +/* + * main.c + */ +extern struct workqueue_struct *afs_wq; +extern int afs_net_id; + +static inline struct afs_net *afs_net(struct net *net) +{ + return net_generic(net, afs_net_id); +} + +static inline struct afs_net *afs_sb2net(struct super_block *sb) +{ + return afs_net(AFS_FS_S(sb)->net_ns); +} + +static inline struct afs_net *afs_d2net(struct dentry *dentry) +{ + return afs_sb2net(dentry->d_sb); +} + +static inline struct afs_net *afs_i2net(struct inode *inode) +{ + return afs_sb2net(inode->i_sb); +} + +static inline struct afs_net *afs_v2net(struct afs_vnode *vnode) +{ + return afs_i2net(&vnode->vfs_inode); +} + +static inline struct afs_net *afs_sock2net(struct sock *sk) +{ + return net_generic(sock_net(sk), afs_net_id); +} + +static inline void __afs_stat(atomic_t *s) +{ + atomic_inc(s); +} + +#define afs_stat_v(vnode, n) __afs_stat(&afs_v2net(vnode)->n) + +/* + * misc.c + */ +extern int afs_abort_to_error(u32); +extern void afs_prioritise_error(struct afs_error *, int, u32); + +/* + * mntpt.c + */ +extern const struct inode_operations afs_mntpt_inode_operations; +extern const struct inode_operations afs_autocell_inode_operations; +extern const struct file_operations afs_mntpt_file_operations; + +extern struct vfsmount *afs_d_automount(struct path *); +extern void afs_mntpt_kill_timer(void); + +/* + * proc.c + */ +#ifdef CONFIG_PROC_FS +extern int __net_init afs_proc_init(struct afs_net *); +extern void __net_exit afs_proc_cleanup(struct afs_net *); +extern int afs_proc_cell_setup(struct afs_cell *); +extern void afs_proc_cell_remove(struct afs_cell *); +extern void afs_put_sysnames(struct afs_sysnames *); +#else +static inline int afs_proc_init(struct afs_net *net) { return 0; } +static inline void afs_proc_cleanup(struct afs_net *net) {} +static inline int afs_proc_cell_setup(struct afs_cell *cell) { return 0; } +static inline void afs_proc_cell_remove(struct afs_cell *cell) {} +static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {} +#endif + +/* + * rotate.c + */ +extern bool afs_select_fileserver(struct afs_operation *); +extern void afs_dump_edestaddrreq(const struct afs_operation *); + +/* + * rxrpc.c + */ +extern struct workqueue_struct *afs_async_calls; + +extern int __net_init afs_open_socket(struct afs_net *); +extern void __net_exit afs_close_socket(struct afs_net *); +extern void afs_charge_preallocation(struct work_struct *); +extern void afs_put_call(struct afs_call *); +extern void afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t); +extern long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *); +extern struct afs_call *afs_alloc_flat_call(struct afs_net *, + const struct afs_call_type *, + size_t, size_t); +extern void afs_flat_call_destructor(struct afs_call *); +extern void afs_send_empty_reply(struct afs_call *); +extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); +extern int afs_extract_data(struct afs_call *, bool); +extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause); + +static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call, + gfp_t gfp) +{ + op->call = call; + op->type = call->type; + call->op = op; + call->key = op->key; + call->intr = !(op->flags & AFS_OPERATION_UNINTR); + afs_make_call(&op->ac, call, gfp); +} + +static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size) +{ + call->kvec[0].iov_base = buf; + call->kvec[0].iov_len = size; + iov_iter_kvec(&call->def_iter, READ, call->kvec, 1, size); +} + +static inline void afs_extract_to_tmp(struct afs_call *call) +{ + afs_extract_begin(call, &call->tmp, sizeof(call->tmp)); +} + +static inline void afs_extract_to_tmp64(struct afs_call *call) +{ + afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64)); +} + +static inline void afs_extract_discard(struct afs_call *call, size_t size) +{ + iov_iter_discard(&call->def_iter, READ, size); +} + +static inline void afs_extract_to_buf(struct afs_call *call, size_t size) +{ + afs_extract_begin(call, call->buffer, size); +} + +static inline int afs_transfer_reply(struct afs_call *call) +{ + return afs_extract_data(call, false); +} + +static inline bool afs_check_call_state(struct afs_call *call, + enum afs_call_state state) +{ + return READ_ONCE(call->state) == state; +} + +static inline bool afs_set_call_state(struct afs_call *call, + enum afs_call_state from, + enum afs_call_state to) +{ + bool ok = false; + + spin_lock_bh(&call->state_lock); + if (call->state == from) { + call->state = to; + trace_afs_call_state(call, from, to, 0, 0); + ok = true; + } + spin_unlock_bh(&call->state_lock); + return ok; +} + +static inline void afs_set_call_complete(struct afs_call *call, + int error, u32 remote_abort) +{ + enum afs_call_state state; + bool ok = false; + + spin_lock_bh(&call->state_lock); + state = call->state; + if (state != AFS_CALL_COMPLETE) { + call->abort_code = remote_abort; + call->error = error; + call->state = AFS_CALL_COMPLETE; + trace_afs_call_state(call, state, AFS_CALL_COMPLETE, + error, remote_abort); + ok = true; + } + spin_unlock_bh(&call->state_lock); + if (ok) { + trace_afs_call_done(call); + + /* Asynchronous calls have two refs to release - one from the alloc and + * one queued with the work item - and we can't just deallocate the + * call because the work item may be queued again. + */ + if (call->drop_ref) + afs_put_call(call); + } +} + +/* + * security.c + */ +extern void afs_put_permits(struct afs_permits *); +extern void afs_clear_permits(struct afs_vnode *); +extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int, + struct afs_status_cb *); +extern void afs_zap_permits(struct rcu_head *); +extern struct key *afs_request_key(struct afs_cell *); +extern struct key *afs_request_key_rcu(struct afs_cell *); +extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *); +extern int afs_permission(struct inode *, int); +extern void __exit afs_clean_up_permit_cache(void); + +/* + * server.c + */ +extern spinlock_t afs_server_peer_lock; + +extern struct afs_server *afs_find_server(struct afs_net *, + const struct sockaddr_rxrpc *); +extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); +extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32); +extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace); +extern struct afs_server *afs_use_server(struct afs_server *, enum afs_server_trace); +extern void afs_unuse_server(struct afs_net *, struct afs_server *, enum afs_server_trace); +extern void afs_unuse_server_notime(struct afs_net *, struct afs_server *, enum afs_server_trace); +extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace); +extern void afs_manage_servers(struct work_struct *); +extern void afs_servers_timer(struct timer_list *); +extern void afs_fs_probe_timer(struct timer_list *); +extern void __net_exit afs_purge_servers(struct afs_net *); +extern bool afs_check_server_record(struct afs_operation *, struct afs_server *); + +static inline void afs_inc_servers_outstanding(struct afs_net *net) +{ + atomic_inc(&net->servers_outstanding); +} + +static inline void afs_dec_servers_outstanding(struct afs_net *net) +{ + if (atomic_dec_and_test(&net->servers_outstanding)) + wake_up_var(&net->servers_outstanding); +} + +static inline bool afs_is_probing_server(struct afs_server *server) +{ + return list_empty(&server->probe_link); +} + +/* + * server_list.c + */ +static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list *slist) +{ + refcount_inc(&slist->usage); + return slist; +} + +extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *); +extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *, + struct afs_vldb_entry *, + u8); +extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *); + +/* + * super.c + */ +extern int __init afs_fs_init(void); +extern void afs_fs_exit(void); + +/* + * vlclient.c + */ +extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *, + const char *, int); +extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *); +extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, + struct key *, struct afs_vlserver *, unsigned int); +extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); +extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *); + +/* + * vl_alias.c + */ +extern int afs_cell_detect_alias(struct afs_cell *, struct key *); + +/* + * vl_probe.c + */ +extern void afs_vlserver_probe_result(struct afs_call *); +extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *); +extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long); + +/* + * vl_rotate.c + */ +extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *, + struct afs_cell *, struct key *); +extern bool afs_select_vlserver(struct afs_vl_cursor *); +extern bool afs_select_current_vlserver(struct afs_vl_cursor *); +extern int afs_end_vlserver_operation(struct afs_vl_cursor *); + +/* + * vlserver_list.c + */ +static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver) +{ + atomic_inc(&vlserver->usage); + return vlserver; +} + +static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist) +{ + if (vllist) + atomic_inc(&vllist->usage); + return vllist; +} + +extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short); +extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *); +extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int); +extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *); +extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *, + const void *, size_t); + +/* + * volume.c + */ +extern struct afs_volume *afs_create_volume(struct afs_fs_context *); +extern void afs_activate_volume(struct afs_volume *); +extern void afs_deactivate_volume(struct afs_volume *); +extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace); +extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace); +extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *); + +/* + * write.c + */ +extern int afs_set_page_dirty(struct page *); +extern int afs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); +extern int afs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); +extern int afs_writepage(struct page *, struct writeback_control *); +extern int afs_writepages(struct address_space *, struct writeback_control *); +extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); +extern int afs_fsync(struct file *, loff_t, loff_t, int); +extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf); +extern void afs_prune_wb_keys(struct afs_vnode *); +extern int afs_launder_page(struct page *); + +/* + * xattr.c + */ +extern const struct xattr_handler *afs_xattr_handlers[]; + +/* + * yfsclient.c + */ +extern void yfs_fs_fetch_data(struct afs_operation *); +extern void yfs_fs_create_file(struct afs_operation *); +extern void yfs_fs_make_dir(struct afs_operation *); +extern void yfs_fs_remove_file2(struct afs_operation *); +extern void yfs_fs_remove_file(struct afs_operation *); +extern void yfs_fs_remove_dir(struct afs_operation *); +extern void yfs_fs_link(struct afs_operation *); +extern void yfs_fs_symlink(struct afs_operation *); +extern void yfs_fs_rename(struct afs_operation *); +extern void yfs_fs_store_data(struct afs_operation *); +extern void yfs_fs_setattr(struct afs_operation *); +extern void yfs_fs_get_volume_status(struct afs_operation *); +extern void yfs_fs_set_lock(struct afs_operation *); +extern void yfs_fs_extend_lock(struct afs_operation *); +extern void yfs_fs_release_lock(struct afs_operation *); +extern void yfs_fs_fetch_status(struct afs_operation *); +extern void yfs_fs_inline_bulk_status(struct afs_operation *); + +struct yfs_acl { + struct afs_acl *acl; /* Dir/file/symlink ACL */ + struct afs_acl *vol_acl; /* Whole volume ACL */ + u32 inherit_flag; /* True if ACL is inherited from parent dir */ + u32 num_cleaned; /* Number of ACEs removed due to subject removal */ + unsigned int flags; +#define YFS_ACL_WANT_ACL 0x01 /* Set if caller wants ->acl */ +#define YFS_ACL_WANT_VOL_ACL 0x02 /* Set if caller wants ->vol_acl */ +}; + +extern void yfs_free_opaque_acl(struct yfs_acl *); +extern void yfs_fs_fetch_opaque_acl(struct afs_operation *); +extern void yfs_fs_store_opaque_acl2(struct afs_operation *); + +/* + * Miscellaneous inline functions. + */ +static inline struct afs_vnode *AFS_FS_I(struct inode *inode) +{ + return container_of(inode, struct afs_vnode, vfs_inode); +} + +static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) +{ + return &vnode->vfs_inode; +} + +/* + * Note that a dentry got changed. We need to set d_fsdata to the data version + * number derived from the result of the operation. It doesn't matter if + * d_fsdata goes backwards as we'll just revalidate. + */ +static inline void afs_update_dentry_version(struct afs_operation *op, + struct afs_vnode_param *dir_vp, + struct dentry *dentry) +{ + if (!op->error) + dentry->d_fsdata = + (void *)(unsigned long)dir_vp->scb.status.data_version; +} + +/* + * Set the file size and block count. Estimate the number of 512 bytes blocks + * used, rounded up to nearest 1K for consistency with other AFS clients. + */ +static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size) +{ + i_size_write(&vnode->vfs_inode, size); + vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; +} + +/* + * Check for a conflicting operation on a directory that we just unlinked from. + * If someone managed to sneak a link or an unlink in on the file we just + * unlinked, we won't be able to trust nlink on an AFS file (but not YFS). + */ +static inline void afs_check_dir_conflict(struct afs_operation *op, + struct afs_vnode_param *dvp) +{ + if (dvp->dv_before + dvp->dv_delta != dvp->scb.status.data_version) + op->flags |= AFS_OPERATION_DIR_CONFLICT; +} + +static inline int afs_io_error(struct afs_call *call, enum afs_io_error where) +{ + trace_afs_io_error(call->debug_id, -EIO, where); + return -EIO; +} + +static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where) +{ + trace_afs_file_error(vnode, -EIO, where); + return -EIO; +} + +/*****************************************************************************/ +/* + * debug tracing + */ +extern unsigned afs_debug; + +#define dbgprintk(FMT,...) \ + printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) + +#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) +#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) + + +#if defined(__KDEBUG) +#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__) +#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__) +#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__) + +#elif defined(CONFIG_AFS_DEBUG) +#define AFS_DEBUG_KENTER 0x01 +#define AFS_DEBUG_KLEAVE 0x02 +#define AFS_DEBUG_KDEBUG 0x04 + +#define _enter(FMT,...) \ +do { \ + if (unlikely(afs_debug & AFS_DEBUG_KENTER)) \ + kenter(FMT,##__VA_ARGS__); \ +} while (0) + +#define _leave(FMT,...) \ +do { \ + if (unlikely(afs_debug & AFS_DEBUG_KLEAVE)) \ + kleave(FMT,##__VA_ARGS__); \ +} while (0) + +#define _debug(FMT,...) \ +do { \ + if (unlikely(afs_debug & AFS_DEBUG_KDEBUG)) \ + kdebug(FMT,##__VA_ARGS__); \ +} while (0) + +#else +#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) +#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) +#endif + +/* + * debug assertion checking + */ +#if 1 // defined(__KDEBUGALL) + +#define ASSERT(X) \ +do { \ + if (unlikely(!(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "AFS: Assertion failed\n"); \ + BUG(); \ + } \ +} while(0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ + if (unlikely(!((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "AFS: Assertion failed\n"); \ + printk(KERN_ERR "%lu " #OP " %lu is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while(0) + +#define ASSERTRANGE(L, OP1, N, OP2, H) \ +do { \ + if (unlikely(!((L) OP1 (N)) || !((N) OP2 (H)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "AFS: Assertion failed\n"); \ + printk(KERN_ERR "%lu "#OP1" %lu "#OP2" %lu is false\n", \ + (unsigned long)(L), (unsigned long)(N), \ + (unsigned long)(H)); \ + printk(KERN_ERR "0x%lx "#OP1" 0x%lx "#OP2" 0x%lx is false\n", \ + (unsigned long)(L), (unsigned long)(N), \ + (unsigned long)(H)); \ + BUG(); \ + } \ +} while(0) + +#define ASSERTIF(C, X) \ +do { \ + if (unlikely((C) && !(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "AFS: Assertion failed\n"); \ + BUG(); \ + } \ +} while(0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ + if (unlikely((C) && !((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "AFS: Assertion failed\n"); \ + printk(KERN_ERR "%lu " #OP " %lu is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while(0) + +#else + +#define ASSERT(X) \ +do { \ +} while(0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ +} while(0) + +#define ASSERTRANGE(L, OP1, N, OP2, H) \ +do { \ +} while(0) + +#define ASSERTIF(C, X) \ +do { \ +} while(0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ +} while(0) + +#endif /* __KDEBUGALL */ diff --git a/fs/afs/main.c b/fs/afs/main.c new file mode 100644 index 000000000..179004b15 --- /dev/null +++ b/fs/afs/main.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS client file system + * + * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/completion.h> +#include <linux/sched.h> +#include <linux/random.h> +#include <linux/proc_fs.h> +#define CREATE_TRACE_POINTS +#include "internal.h" + +MODULE_DESCRIPTION("AFS Client File System"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +unsigned afs_debug; +module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(debug, "AFS debugging mask"); + +static char *rootcell; + +module_param(rootcell, charp, 0); +MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); + +struct workqueue_struct *afs_wq; +static struct proc_dir_entry *afs_proc_symlink; + +#if defined(CONFIG_ALPHA) +const char afs_init_sysname[] = "alpha_linux26"; +#elif defined(CONFIG_X86_64) +const char afs_init_sysname[] = "amd64_linux26"; +#elif defined(CONFIG_ARM) +const char afs_init_sysname[] = "arm_linux26"; +#elif defined(CONFIG_ARM64) +const char afs_init_sysname[] = "aarch64_linux26"; +#elif defined(CONFIG_X86_32) +const char afs_init_sysname[] = "i386_linux26"; +#elif defined(CONFIG_IA64) +const char afs_init_sysname[] = "ia64_linux26"; +#elif defined(CONFIG_PPC64) +const char afs_init_sysname[] = "ppc64_linux26"; +#elif defined(CONFIG_PPC32) +const char afs_init_sysname[] = "ppc_linux26"; +#elif defined(CONFIG_S390) +#ifdef CONFIG_64BIT +const char afs_init_sysname[] = "s390x_linux26"; +#else +const char afs_init_sysname[] = "s390_linux26"; +#endif +#elif defined(CONFIG_SPARC64) +const char afs_init_sysname[] = "sparc64_linux26"; +#elif defined(CONFIG_SPARC32) +const char afs_init_sysname[] = "sparc_linux26"; +#else +const char afs_init_sysname[] = "unknown_linux26"; +#endif + +/* + * Initialise an AFS network namespace record. + */ +static int __net_init afs_net_init(struct net *net_ns) +{ + struct afs_sysnames *sysnames; + struct afs_net *net = afs_net(net_ns); + int ret; + + net->net = net_ns; + net->live = true; + generate_random_uuid((unsigned char *)&net->uuid); + + INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation); + mutex_init(&net->socket_mutex); + + net->cells = RB_ROOT; + init_rwsem(&net->cells_lock); + INIT_WORK(&net->cells_manager, afs_manage_cells); + timer_setup(&net->cells_timer, afs_cells_timer, 0); + + mutex_init(&net->cells_alias_lock); + mutex_init(&net->proc_cells_lock); + INIT_HLIST_HEAD(&net->proc_cells); + + seqlock_init(&net->fs_lock); + net->fs_servers = RB_ROOT; + INIT_LIST_HEAD(&net->fs_probe_fast); + INIT_LIST_HEAD(&net->fs_probe_slow); + INIT_HLIST_HEAD(&net->fs_proc); + + INIT_HLIST_HEAD(&net->fs_addresses4); + INIT_HLIST_HEAD(&net->fs_addresses6); + seqlock_init(&net->fs_addr_lock); + + INIT_WORK(&net->fs_manager, afs_manage_servers); + timer_setup(&net->fs_timer, afs_servers_timer, 0); + INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher); + timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0); + atomic_set(&net->servers_outstanding, 1); + + ret = -ENOMEM; + sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); + if (!sysnames) + goto error_sysnames; + sysnames->subs[0] = (char *)&afs_init_sysname; + sysnames->nr = 1; + refcount_set(&sysnames->usage, 1); + net->sysnames = sysnames; + rwlock_init(&net->sysnames_lock); + + /* Register the /proc stuff */ + ret = afs_proc_init(net); + if (ret < 0) + goto error_proc; + + /* Initialise the cell DB */ + ret = afs_cell_init(net, rootcell); + if (ret < 0) + goto error_cell_init; + + /* Create the RxRPC transport */ + ret = afs_open_socket(net); + if (ret < 0) + goto error_open_socket; + + return 0; + +error_open_socket: + net->live = false; + afs_fs_probe_cleanup(net); + afs_cell_purge(net); + afs_purge_servers(net); +error_cell_init: + net->live = false; + afs_proc_cleanup(net); +error_proc: + afs_put_sysnames(net->sysnames); +error_sysnames: + net->live = false; + return ret; +} + +/* + * Clean up and destroy an AFS network namespace record. + */ +static void __net_exit afs_net_exit(struct net *net_ns) +{ + struct afs_net *net = afs_net(net_ns); + + net->live = false; + afs_fs_probe_cleanup(net); + afs_cell_purge(net); + afs_purge_servers(net); + afs_close_socket(net); + afs_proc_cleanup(net); + afs_put_sysnames(net->sysnames); +} + +static struct pernet_operations afs_net_ops = { + .init = afs_net_init, + .exit = afs_net_exit, + .id = &afs_net_id, + .size = sizeof(struct afs_net), +}; + +/* + * initialise the AFS client FS module + */ +static int __init afs_init(void) +{ + int ret = -ENOMEM; + + printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n"); + + afs_wq = alloc_workqueue("afs", 0, 0); + if (!afs_wq) + goto error_afs_wq; + afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); + if (!afs_async_calls) + goto error_async; + afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0); + if (!afs_lock_manager) + goto error_lockmgr; + +#ifdef CONFIG_AFS_FSCACHE + /* we want to be able to cache */ + ret = fscache_register_netfs(&afs_cache_netfs); + if (ret < 0) + goto error_cache; +#endif + + ret = register_pernet_device(&afs_net_ops); + if (ret < 0) + goto error_net; + + /* register the filesystems */ + ret = afs_fs_init(); + if (ret < 0) + goto error_fs; + + afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs"); + if (!afs_proc_symlink) { + ret = -ENOMEM; + goto error_proc; + } + + return ret; + +error_proc: + afs_fs_exit(); +error_fs: + unregister_pernet_device(&afs_net_ops); +error_net: +#ifdef CONFIG_AFS_FSCACHE + fscache_unregister_netfs(&afs_cache_netfs); +error_cache: +#endif + destroy_workqueue(afs_lock_manager); +error_lockmgr: + destroy_workqueue(afs_async_calls); +error_async: + destroy_workqueue(afs_wq); +error_afs_wq: + rcu_barrier(); + printk(KERN_ERR "kAFS: failed to register: %d\n", ret); + return ret; +} + +/* XXX late_initcall is kludgy, but the only alternative seems to create + * a transport upon the first mount, which is worse. Or is it? + */ +late_initcall(afs_init); /* must be called after net/ to create socket */ + +/* + * clean up on module removal + */ +static void __exit afs_exit(void) +{ + printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); + + proc_remove(afs_proc_symlink); + afs_fs_exit(); + unregister_pernet_device(&afs_net_ops); +#ifdef CONFIG_AFS_FSCACHE + fscache_unregister_netfs(&afs_cache_netfs); +#endif + destroy_workqueue(afs_lock_manager); + destroy_workqueue(afs_async_calls); + destroy_workqueue(afs_wq); + afs_clean_up_permit_cache(); + rcu_barrier(); +} + +module_exit(afs_exit); diff --git a/fs/afs/misc.c b/fs/afs/misc.c new file mode 100644 index 000000000..f1dc21629 --- /dev/null +++ b/fs/afs/misc.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* miscellaneous bits + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/errno.h> +#include "internal.h" +#include "afs_fs.h" +#include "protocol_uae.h" + +/* + * convert an AFS abort code to a Linux error number + */ +int afs_abort_to_error(u32 abort_code) +{ + switch (abort_code) { + /* Low errno codes inserted into abort namespace */ + case 13: return -EACCES; + case 27: return -EFBIG; + case 30: return -EROFS; + + /* VICE "special error" codes; 101 - 111 */ + case VSALVAGE: return -EIO; + case VNOVNODE: return -ENOENT; + case VNOVOL: return -ENOMEDIUM; + case VVOLEXISTS: return -EEXIST; + case VNOSERVICE: return -EIO; + case VOFFLINE: return -ENOENT; + case VONLINE: return -EEXIST; + case VDISKFULL: return -ENOSPC; + case VOVERQUOTA: return -EDQUOT; + case VBUSY: return -EBUSY; + case VMOVED: return -ENXIO; + + /* Volume Location server errors */ + case AFSVL_IDEXIST: return -EEXIST; + case AFSVL_IO: return -EREMOTEIO; + case AFSVL_NAMEEXIST: return -EEXIST; + case AFSVL_CREATEFAIL: return -EREMOTEIO; + case AFSVL_NOENT: return -ENOMEDIUM; + case AFSVL_EMPTY: return -ENOMEDIUM; + case AFSVL_ENTDELETED: return -ENOMEDIUM; + case AFSVL_BADNAME: return -EINVAL; + case AFSVL_BADINDEX: return -EINVAL; + case AFSVL_BADVOLTYPE: return -EINVAL; + case AFSVL_BADSERVER: return -EINVAL; + case AFSVL_BADPARTITION: return -EINVAL; + case AFSVL_REPSFULL: return -EFBIG; + case AFSVL_NOREPSERVER: return -ENOENT; + case AFSVL_DUPREPSERVER: return -EEXIST; + case AFSVL_RWNOTFOUND: return -ENOENT; + case AFSVL_BADREFCOUNT: return -EINVAL; + case AFSVL_SIZEEXCEEDED: return -EINVAL; + case AFSVL_BADENTRY: return -EINVAL; + case AFSVL_BADVOLIDBUMP: return -EINVAL; + case AFSVL_IDALREADYHASHED: return -EINVAL; + case AFSVL_ENTRYLOCKED: return -EBUSY; + case AFSVL_BADVOLOPER: return -EBADRQC; + case AFSVL_BADRELLOCKTYPE: return -EINVAL; + case AFSVL_RERELEASE: return -EREMOTEIO; + case AFSVL_BADSERVERFLAG: return -EINVAL; + case AFSVL_PERM: return -EACCES; + case AFSVL_NOMEM: return -EREMOTEIO; + + /* Unified AFS error table */ + case UAEPERM: return -EPERM; + case UAENOENT: return -ENOENT; + case UAEAGAIN: return -EAGAIN; + case UAEACCES: return -EACCES; + case UAEBUSY: return -EBUSY; + case UAEEXIST: return -EEXIST; + case UAENOTDIR: return -ENOTDIR; + case UAEISDIR: return -EISDIR; + case UAEFBIG: return -EFBIG; + case UAENOSPC: return -ENOSPC; + case UAEROFS: return -EROFS; + case UAEMLINK: return -EMLINK; + case UAEDEADLK: return -EDEADLK; + case UAENAMETOOLONG: return -ENAMETOOLONG; + case UAENOLCK: return -ENOLCK; + case UAENOTEMPTY: return -ENOTEMPTY; + case UAELOOP: return -ELOOP; + case UAEOVERFLOW: return -EOVERFLOW; + case UAENOMEDIUM: return -ENOMEDIUM; + case UAEDQUOT: return -EDQUOT; + + /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */ + case RXKADINCONSISTENCY: return -EPROTO; + case RXKADPACKETSHORT: return -EPROTO; + case RXKADLEVELFAIL: return -EKEYREJECTED; + case RXKADTICKETLEN: return -EKEYREJECTED; + case RXKADOUTOFSEQUENCE: return -EPROTO; + case RXKADNOAUTH: return -EKEYREJECTED; + case RXKADBADKEY: return -EKEYREJECTED; + case RXKADBADTICKET: return -EKEYREJECTED; + case RXKADUNKNOWNKEY: return -EKEYREJECTED; + case RXKADEXPIRED: return -EKEYEXPIRED; + case RXKADSEALEDINCON: return -EKEYREJECTED; + case RXKADDATALEN: return -EKEYREJECTED; + case RXKADILLEGALLEVEL: return -EKEYREJECTED; + + case RXGEN_OPCODE: return -ENOTSUPP; + + default: return -EREMOTEIO; + } +} + +/* + * Select the error to report from a set of errors. + */ +void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code) +{ + switch (error) { + case 0: + return; + default: + if (e->error == -ETIMEDOUT || + e->error == -ETIME) + return; + fallthrough; + case -ETIMEDOUT: + case -ETIME: + if (e->error == -ENOMEM || + e->error == -ENONET) + return; + fallthrough; + case -ENOMEM: + case -ENONET: + if (e->error == -ERFKILL) + return; + fallthrough; + case -ERFKILL: + if (e->error == -EADDRNOTAVAIL) + return; + fallthrough; + case -EADDRNOTAVAIL: + if (e->error == -ENETUNREACH) + return; + fallthrough; + case -ENETUNREACH: + if (e->error == -EHOSTUNREACH) + return; + fallthrough; + case -EHOSTUNREACH: + if (e->error == -EHOSTDOWN) + return; + fallthrough; + case -EHOSTDOWN: + if (e->error == -ECONNREFUSED) + return; + fallthrough; + case -ECONNREFUSED: + if (e->error == -ECONNRESET) + return; + fallthrough; + case -ECONNRESET: /* Responded, but call expired. */ + if (e->responded) + return; + e->error = error; + return; + + case -ECONNABORTED: + e->responded = true; + e->error = afs_abort_to_error(abort_code); + return; + } +} diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c new file mode 100644 index 000000000..bbb2c210d --- /dev/null +++ b/fs/afs/mntpt.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mountpoint management + * + * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/gfp.h> +#include <linux/fs_context.h> +#include "internal.h" + + +static struct dentry *afs_mntpt_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags); +static int afs_mntpt_open(struct inode *inode, struct file *file); +static void afs_mntpt_expiry_timed_out(struct work_struct *work); + +const struct file_operations afs_mntpt_file_operations = { + .open = afs_mntpt_open, + .llseek = noop_llseek, +}; + +const struct inode_operations afs_mntpt_inode_operations = { + .lookup = afs_mntpt_lookup, + .readlink = page_readlink, + .getattr = afs_getattr, +}; + +const struct inode_operations afs_autocell_inode_operations = { + .getattr = afs_getattr, +}; + +static LIST_HEAD(afs_vfsmounts); +static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); + +static unsigned long afs_mntpt_expiry_timeout = 10 * 60; + +static const char afs_root_volume[] = "root.cell"; + +/* + * no valid lookup procedure on this sort of dir + */ +static struct dentry *afs_mntpt_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags) +{ + _enter("%p,%p{%pd2}", dir, dentry, dentry); + return ERR_PTR(-EREMOTE); +} + +/* + * no valid open procedure on this sort of dir + */ +static int afs_mntpt_open(struct inode *inode, struct file *file) +{ + _enter("%p,%p{%pD2}", inode, file, file); + return -EREMOTE; +} + +/* + * Set the parameters for the proposed superblock. + */ +static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) +{ + struct afs_fs_context *ctx = fc->fs_private; + struct afs_super_info *src_as = AFS_FS_S(mntpt->d_sb); + struct afs_vnode *vnode = AFS_FS_I(d_inode(mntpt)); + struct afs_cell *cell; + const char *p; + int ret; + + if (fc->net_ns != src_as->net_ns) { + put_net(fc->net_ns); + fc->net_ns = get_net(src_as->net_ns); + } + + if (src_as->volume && src_as->volume->type == AFSVL_RWVOL) { + ctx->type = AFSVL_RWVOL; + ctx->force = true; + } + if (ctx->cell) { + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_mntpt); + ctx->cell = NULL; + } + if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { + /* if the directory is a pseudo directory, use the d_name */ + unsigned size = mntpt->d_name.len; + + if (size < 2) + return -ENOENT; + + p = mntpt->d_name.name; + if (mntpt->d_name.name[0] == '.') { + size--; + p++; + ctx->type = AFSVL_RWVOL; + ctx->force = true; + } + if (size > AFS_MAXCELLNAME) + return -ENAMETOOLONG; + + cell = afs_lookup_cell(ctx->net, p, size, NULL, false); + if (IS_ERR(cell)) { + pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt); + return PTR_ERR(cell); + } + ctx->cell = cell; + + ctx->volname = afs_root_volume; + ctx->volnamesz = sizeof(afs_root_volume) - 1; + } else { + /* read the contents of the AFS special symlink */ + struct page *page; + loff_t size = i_size_read(d_inode(mntpt)); + char *buf; + + if (src_as->cell) + ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt); + + if (size < 2 || size > PAGE_SIZE - 1) + return -EINVAL; + + page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + if (PageError(page)) { + ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); + put_page(page); + return ret; + } + + buf = kmap(page); + ret = -EINVAL; + if (buf[size - 1] == '.') + ret = vfs_parse_fs_string(fc, "source", buf, size - 1); + kunmap(page); + put_page(page); + if (ret < 0) + return ret; + } + + return 0; +} + +/* + * create a vfsmount to be automounted + */ +static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) +{ + struct fs_context *fc; + struct vfsmount *mnt; + int ret; + + BUG_ON(!d_inode(mntpt)); + + fc = fs_context_for_submount(&afs_fs_type, mntpt); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + ret = afs_mntpt_set_params(fc, mntpt); + if (!ret) + mnt = fc_mount(fc); + else + mnt = ERR_PTR(ret); + + put_fs_context(fc); + return mnt; +} + +/* + * handle an automount point + */ +struct vfsmount *afs_d_automount(struct path *path) +{ + struct vfsmount *newmnt; + + _enter("{%pd}", path->dentry); + + newmnt = afs_mntpt_do_automount(path->dentry); + if (IS_ERR(newmnt)) + return newmnt; + + mntget(newmnt); /* prevent immediate expiration */ + mnt_set_expiry(newmnt, &afs_vfsmounts); + queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, + afs_mntpt_expiry_timeout * HZ); + _leave(" = %p", newmnt); + return newmnt; +} + +/* + * handle mountpoint expiry timer going off + */ +static void afs_mntpt_expiry_timed_out(struct work_struct *work) +{ + _enter(""); + + if (!list_empty(&afs_vfsmounts)) { + mark_mounts_for_expiry(&afs_vfsmounts); + queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, + afs_mntpt_expiry_timeout * HZ); + } + + _leave(""); +} + +/* + * kill the AFS mountpoint timer if it's still running + */ +void afs_mntpt_kill_timer(void) +{ + _enter(""); + + ASSERT(list_empty(&afs_vfsmounts)); + cancel_delayed_work_sync(&afs_mntpt_expiry_timer); +} diff --git a/fs/afs/proc.c b/fs/afs/proc.c new file mode 100644 index 000000000..065a28bfa --- /dev/null +++ b/fs/afs/proc.c @@ -0,0 +1,705 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* /proc interface for AFS + * + * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/sched.h> +#include <linux/uaccess.h> +#include "internal.h" + +struct afs_vl_seq_net_private { + struct seq_net_private seq; /* Must be first */ + struct afs_vlserver_list *vllist; +}; + +static inline struct afs_net *afs_seq2net(struct seq_file *m) +{ + return afs_net(seq_file_net(m)); +} + +static inline struct afs_net *afs_seq2net_single(struct seq_file *m) +{ + return afs_net(seq_file_single_net(m)); +} + +/* + * Display the list of cells known to the namespace. + */ +static int afs_proc_cells_show(struct seq_file *m, void *v) +{ + struct afs_vlserver_list *vllist; + struct afs_cell *cell; + + if (v == SEQ_START_TOKEN) { + /* display header on line 1 */ + seq_puts(m, "USE ACT TTL SV ST NAME\n"); + return 0; + } + + cell = list_entry(v, struct afs_cell, proc_link); + vllist = rcu_dereference(cell->vl_servers); + + /* display one cell per line on subsequent lines */ + seq_printf(m, "%3u %3u %6lld %2u %2u %s\n", + atomic_read(&cell->ref), + atomic_read(&cell->active), + cell->dns_expiry - ktime_get_real_seconds(), + vllist ? vllist->nr_servers : 0, + cell->state, + cell->name); + return 0; +} + +static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) +{ + rcu_read_lock(); + return seq_hlist_start_head_rcu(&afs_seq2net(m)->proc_cells, *_pos); +} + +static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_hlist_next_rcu(v, &afs_seq2net(m)->proc_cells, pos); +} + +static void afs_proc_cells_stop(struct seq_file *m, void *v) + __releases(rcu) +{ + rcu_read_unlock(); +} + +static const struct seq_operations afs_proc_cells_ops = { + .start = afs_proc_cells_start, + .next = afs_proc_cells_next, + .stop = afs_proc_cells_stop, + .show = afs_proc_cells_show, +}; + +/* + * handle writes to /proc/fs/afs/cells + * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]" + */ +static int afs_proc_cells_write(struct file *file, char *buf, size_t size) +{ + struct seq_file *m = file->private_data; + struct afs_net *net = afs_seq2net(m); + char *name, *args; + int ret; + + /* trim to first NL */ + name = memchr(buf, '\n', size); + if (name) + *name = 0; + + /* split into command, name and argslist */ + name = strchr(buf, ' '); + if (!name) + goto inval; + do { + *name++ = 0; + } while(*name == ' '); + if (!*name) + goto inval; + + args = strchr(name, ' '); + if (args) { + do { + *args++ = 0; + } while(*args == ' '); + if (!*args) + goto inval; + } + + /* determine command to perform */ + _debug("cmd=%s name=%s args=%s", buf, name, args); + + if (strcmp(buf, "add") == 0) { + struct afs_cell *cell; + + cell = afs_lookup_cell(net, name, strlen(name), args, true); + if (IS_ERR(cell)) { + ret = PTR_ERR(cell); + goto done; + } + + if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags)) + afs_unuse_cell(net, cell, afs_cell_trace_unuse_no_pin); + } else { + goto inval; + } + + ret = 0; + +done: + _leave(" = %d", ret); + return ret; + +inval: + ret = -EINVAL; + printk("kAFS: Invalid Command on /proc/fs/afs/cells file\n"); + goto done; +} + +/* + * Display the name of the current workstation cell. + */ +static int afs_proc_rootcell_show(struct seq_file *m, void *v) +{ + struct afs_cell *cell; + struct afs_net *net; + + net = afs_seq2net_single(m); + down_read(&net->cells_lock); + cell = net->ws_cell; + if (cell) + seq_printf(m, "%s\n", cell->name); + up_read(&net->cells_lock); + return 0; +} + +/* + * Set the current workstation cell and optionally supply its list of volume + * location servers. + * + * echo "cell.name:192.168.231.14" >/proc/fs/afs/rootcell + */ +static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size) +{ + struct seq_file *m = file->private_data; + struct afs_net *net = afs_seq2net_single(m); + char *s; + int ret; + + ret = -EINVAL; + if (buf[0] == '.') + goto out; + if (memchr(buf, '/', size)) + goto out; + + /* trim to first NL */ + s = memchr(buf, '\n', size); + if (s) + *s = 0; + + /* determine command to perform */ + _debug("rootcell=%s", buf); + + ret = afs_cell_init(net, buf); + +out: + _leave(" = %d", ret); + return ret; +} + +static const char afs_vol_types[3][3] = { + [AFSVL_RWVOL] = "RW", + [AFSVL_ROVOL] = "RO", + [AFSVL_BACKVOL] = "BK", +}; + +/* + * Display the list of volumes known to a cell. + */ +static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) +{ + struct afs_volume *vol = hlist_entry(v, struct afs_volume, proc_link); + + /* Display header on line 1 */ + if (v == SEQ_START_TOKEN) { + seq_puts(m, "USE VID TY NAME\n"); + return 0; + } + + seq_printf(m, "%3d %08llx %s %s\n", + atomic_read(&vol->usage), vol->vid, + afs_vol_types[vol->type], + vol->name); + + return 0; +} + +static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) + __acquires(cell->proc_lock) +{ + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + + rcu_read_lock(); + return seq_hlist_start_head_rcu(&cell->proc_volumes, *_pos); +} + +static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v, + loff_t *_pos) +{ + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + + return seq_hlist_next_rcu(v, &cell->proc_volumes, _pos); +} + +static void afs_proc_cell_volumes_stop(struct seq_file *m, void *v) + __releases(cell->proc_lock) +{ + rcu_read_unlock(); +} + +static const struct seq_operations afs_proc_cell_volumes_ops = { + .start = afs_proc_cell_volumes_start, + .next = afs_proc_cell_volumes_next, + .stop = afs_proc_cell_volumes_stop, + .show = afs_proc_cell_volumes_show, +}; + +static const char *const dns_record_sources[NR__dns_record_source + 1] = { + [DNS_RECORD_UNAVAILABLE] = "unav", + [DNS_RECORD_FROM_CONFIG] = "cfg", + [DNS_RECORD_FROM_DNS_A] = "A", + [DNS_RECORD_FROM_DNS_AFSDB] = "AFSDB", + [DNS_RECORD_FROM_DNS_SRV] = "SRV", + [DNS_RECORD_FROM_NSS] = "nss", + [NR__dns_record_source] = "[weird]" +}; + +static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = { + [DNS_LOOKUP_NOT_DONE] = "no-lookup", + [DNS_LOOKUP_GOOD] = "good", + [DNS_LOOKUP_GOOD_WITH_BAD] = "good/bad", + [DNS_LOOKUP_BAD] = "bad", + [DNS_LOOKUP_GOT_NOT_FOUND] = "not-found", + [DNS_LOOKUP_GOT_LOCAL_FAILURE] = "local-failure", + [DNS_LOOKUP_GOT_TEMP_FAILURE] = "temp-failure", + [DNS_LOOKUP_GOT_NS_FAILURE] = "ns-failure", + [NR__dns_lookup_status] = "[weird]" +}; + +/* + * Display the list of Volume Location servers we're using for a cell. + */ +static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) +{ + const struct afs_vl_seq_net_private *priv = m->private; + const struct afs_vlserver_list *vllist = priv->vllist; + const struct afs_vlserver_entry *entry; + const struct afs_vlserver *vlserver; + const struct afs_addr_list *alist; + int i; + + if (v == SEQ_START_TOKEN) { + seq_printf(m, "# source %s, status %s\n", + dns_record_sources[vllist ? vllist->source : 0], + dns_lookup_statuses[vllist ? vllist->status : 0]); + return 0; + } + + entry = v; + vlserver = entry->server; + alist = rcu_dereference(vlserver->addresses); + + seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n", + vlserver->name, entry->priority, entry->weight, + dns_record_sources[alist ? alist->source : entry->source], + dns_lookup_statuses[alist ? alist->status : entry->status]); + if (alist) { + for (i = 0; i < alist->nr_addrs; i++) + seq_printf(m, " %c %pISpc\n", + alist->preferred == i ? '>' : '-', + &alist->addrs[i].transport); + } + seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt); + seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n", + vlserver->probe.flags, vlserver->probe.error, + vlserver->probe.abort_code, + atomic_read(&vlserver->probe_outstanding)); + return 0; +} + +static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) +{ + struct afs_vl_seq_net_private *priv = m->private; + struct afs_vlserver_list *vllist; + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + loff_t pos = *_pos; + + rcu_read_lock(); + + vllist = rcu_dereference(cell->vl_servers); + priv->vllist = vllist; + + if (pos < 0) + *_pos = pos = 0; + if (pos == 0) + return SEQ_START_TOKEN; + + if (pos - 1 >= vllist->nr_servers) + return NULL; + + return &vllist->servers[pos - 1]; +} + +static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v, + loff_t *_pos) +{ + struct afs_vl_seq_net_private *priv = m->private; + struct afs_vlserver_list *vllist = priv->vllist; + loff_t pos; + + pos = *_pos; + pos++; + *_pos = pos; + if (!vllist || pos - 1 >= vllist->nr_servers) + return NULL; + + return &vllist->servers[pos - 1]; +} + +static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v) + __releases(rcu) +{ + rcu_read_unlock(); +} + +static const struct seq_operations afs_proc_cell_vlservers_ops = { + .start = afs_proc_cell_vlservers_start, + .next = afs_proc_cell_vlservers_next, + .stop = afs_proc_cell_vlservers_stop, + .show = afs_proc_cell_vlservers_show, +}; + +/* + * Display the list of fileservers we're using within a namespace. + */ +static int afs_proc_servers_show(struct seq_file *m, void *v) +{ + struct afs_server *server; + struct afs_addr_list *alist; + int i; + + if (v == SEQ_START_TOKEN) { + seq_puts(m, "UUID REF ACT\n"); + return 0; + } + + server = list_entry(v, struct afs_server, proc_link); + alist = rcu_dereference(server->addresses); + seq_printf(m, "%pU %3d %3d\n", + &server->uuid, + atomic_read(&server->ref), + atomic_read(&server->active)); + seq_printf(m, " - info: fl=%lx rtt=%u brk=%x\n", + server->flags, server->rtt, server->cb_s_break); + seq_printf(m, " - probe: last=%d out=%d\n", + (int)(jiffies - server->probed_at) / HZ, + atomic_read(&server->probe_outstanding)); + seq_printf(m, " - ALIST v=%u rsp=%lx f=%lx\n", + alist->version, alist->responded, alist->failed); + for (i = 0; i < alist->nr_addrs; i++) + seq_printf(m, " [%x] %pISpc%s\n", + i, &alist->addrs[i].transport, + alist->preferred == i ? "*" : ""); + return 0; +} + +static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) +{ + rcu_read_lock(); + return seq_hlist_start_head_rcu(&afs_seq2net(m)->fs_proc, *_pos); +} + +static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos) +{ + return seq_hlist_next_rcu(v, &afs_seq2net(m)->fs_proc, _pos); +} + +static void afs_proc_servers_stop(struct seq_file *m, void *v) + __releases(rcu) +{ + rcu_read_unlock(); +} + +static const struct seq_operations afs_proc_servers_ops = { + .start = afs_proc_servers_start, + .next = afs_proc_servers_next, + .stop = afs_proc_servers_stop, + .show = afs_proc_servers_show, +}; + +/* + * Display the list of strings that may be substituted for the @sys pathname + * macro. + */ +static int afs_proc_sysname_show(struct seq_file *m, void *v) +{ + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *sysnames = net->sysnames; + unsigned int i = (unsigned long)v - 1; + + if (i < sysnames->nr) + seq_printf(m, "%s\n", sysnames->subs[i]); + return 0; +} + +static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos) + __acquires(&net->sysnames_lock) +{ + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names; + + read_lock(&net->sysnames_lock); + + names = net->sysnames; + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); +} + +static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names = net->sysnames; + + *pos += 1; + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); +} + +static void afs_proc_sysname_stop(struct seq_file *m, void *v) + __releases(&net->sysnames_lock) +{ + struct afs_net *net = afs_seq2net(m); + + read_unlock(&net->sysnames_lock); +} + +static const struct seq_operations afs_proc_sysname_ops = { + .start = afs_proc_sysname_start, + .next = afs_proc_sysname_next, + .stop = afs_proc_sysname_stop, + .show = afs_proc_sysname_show, +}; + +/* + * Allow the @sys substitution to be configured. + */ +static int afs_proc_sysname_write(struct file *file, char *buf, size_t size) +{ + struct afs_sysnames *sysnames, *kill; + struct seq_file *m = file->private_data; + struct afs_net *net = afs_seq2net(m); + char *s, *p, *sub; + int ret, len; + + sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); + if (!sysnames) + return -ENOMEM; + refcount_set(&sysnames->usage, 1); + kill = sysnames; + + p = buf; + while ((s = strsep(&p, " \t\n"))) { + len = strlen(s); + if (len == 0) + continue; + ret = -ENAMETOOLONG; + if (len >= AFSNAMEMAX) + goto error; + + if (len >= 4 && + s[len - 4] == '@' && + s[len - 3] == 's' && + s[len - 2] == 'y' && + s[len - 1] == 's') + /* Protect against recursion */ + goto invalid; + + if (s[0] == '.' && + (len < 2 || (len == 2 && s[1] == '.'))) + goto invalid; + + if (memchr(s, '/', len)) + goto invalid; + + ret = -EFBIG; + if (sysnames->nr >= AFS_NR_SYSNAME) + goto out; + + if (strcmp(s, afs_init_sysname) == 0) { + sub = (char *)afs_init_sysname; + } else { + ret = -ENOMEM; + sub = kmemdup(s, len + 1, GFP_KERNEL); + if (!sub) + goto out; + } + + sysnames->subs[sysnames->nr] = sub; + sysnames->nr++; + } + + if (sysnames->nr == 0) { + sysnames->subs[0] = sysnames->blank; + sysnames->nr++; + } + + write_lock(&net->sysnames_lock); + kill = net->sysnames; + net->sysnames = sysnames; + write_unlock(&net->sysnames_lock); + ret = 0; +out: + afs_put_sysnames(kill); + return ret; + +invalid: + ret = -EINVAL; +error: + goto out; +} + +void afs_put_sysnames(struct afs_sysnames *sysnames) +{ + int i; + + if (sysnames && refcount_dec_and_test(&sysnames->usage)) { + for (i = 0; i < sysnames->nr; i++) + if (sysnames->subs[i] != afs_init_sysname && + sysnames->subs[i] != sysnames->blank) + kfree(sysnames->subs[i]); + kfree(sysnames); + } +} + +/* + * Display general per-net namespace statistics + */ +static int afs_proc_stats_show(struct seq_file *m, void *v) +{ + struct afs_net *net = afs_seq2net_single(m); + + seq_puts(m, "kAFS statistics\n"); + + seq_printf(m, "dir-mgmt: look=%u reval=%u inval=%u relpg=%u\n", + atomic_read(&net->n_lookup), + atomic_read(&net->n_reval), + atomic_read(&net->n_inval), + atomic_read(&net->n_relpg)); + + seq_printf(m, "dir-data: rdpg=%u\n", + atomic_read(&net->n_read_dir)); + + seq_printf(m, "dir-edit: cr=%u rm=%u\n", + atomic_read(&net->n_dir_cr), + atomic_read(&net->n_dir_rm)); + + seq_printf(m, "file-rd : n=%u nb=%lu\n", + atomic_read(&net->n_fetches), + atomic_long_read(&net->n_fetch_bytes)); + seq_printf(m, "file-wr : n=%u nb=%lu\n", + atomic_read(&net->n_stores), + atomic_long_read(&net->n_store_bytes)); + return 0; +} + +/* + * initialise /proc/fs/afs/<cell>/ + */ +int afs_proc_cell_setup(struct afs_cell *cell) +{ + struct proc_dir_entry *dir; + struct afs_net *net = cell->net; + + _enter("%p{%s},%p", cell, cell->name, net->proc_afs); + + dir = proc_net_mkdir(net->net, cell->name, net->proc_afs); + if (!dir) + goto error_dir; + + if (!proc_create_net_data("vlservers", 0444, dir, + &afs_proc_cell_vlservers_ops, + sizeof(struct afs_vl_seq_net_private), + cell) || + !proc_create_net_data("volumes", 0444, dir, + &afs_proc_cell_volumes_ops, + sizeof(struct seq_net_private), + cell)) + goto error_tree; + + _leave(" = 0"); + return 0; + +error_tree: + remove_proc_subtree(cell->name, net->proc_afs); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * remove /proc/fs/afs/<cell>/ + */ +void afs_proc_cell_remove(struct afs_cell *cell) +{ + struct afs_net *net = cell->net; + + _enter(""); + remove_proc_subtree(cell->name, net->proc_afs); + _leave(""); +} + +/* + * initialise the /proc/fs/afs/ directory + */ +int afs_proc_init(struct afs_net *net) +{ + struct proc_dir_entry *p; + + _enter(""); + + p = proc_net_mkdir(net->net, "afs", net->net->proc_net); + if (!p) + goto error_dir; + + if (!proc_create_net_data_write("cells", 0644, p, + &afs_proc_cells_ops, + afs_proc_cells_write, + sizeof(struct seq_net_private), + NULL) || + !proc_create_net_single_write("rootcell", 0644, p, + afs_proc_rootcell_show, + afs_proc_rootcell_write, + NULL) || + !proc_create_net("servers", 0444, p, &afs_proc_servers_ops, + sizeof(struct seq_net_private)) || + !proc_create_net_single("stats", 0444, p, afs_proc_stats_show, NULL) || + !proc_create_net_data_write("sysname", 0644, p, + &afs_proc_sysname_ops, + afs_proc_sysname_write, + sizeof(struct seq_net_private), + NULL)) + goto error_tree; + + net->proc_afs = p; + _leave(" = 0"); + return 0; + +error_tree: + proc_remove(p); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * clean up the /proc/fs/afs/ directory + */ +void afs_proc_cleanup(struct afs_net *net) +{ + proc_remove(net->proc_afs); + net->proc_afs = NULL; +} diff --git a/fs/afs/protocol_uae.h b/fs/afs/protocol_uae.h new file mode 100644 index 000000000..1b3d1060b --- /dev/null +++ b/fs/afs/protocol_uae.h @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Universal AFS Error codes (UAE). + * + * Copyright (C) 2003, Daria Phoebe Brashear + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + */ + +enum { + UAEPERM = 0x2f6df00, /* Operation not permitted */ + UAENOENT = 0x2f6df01, /* No such file or directory */ + UAESRCH = 0x2f6df02, /* No such process */ + UAEINTR = 0x2f6df03, /* Interrupted system call */ + UAEIO = 0x2f6df04, /* I/O error */ + UAENXIO = 0x2f6df05, /* No such device or address */ + UAE2BIG = 0x2f6df06, /* Arg list too long */ + UAENOEXEC = 0x2f6df07, /* Exec format error */ + UAEBADF = 0x2f6df08, /* Bad file number */ + UAECHILD = 0x2f6df09, /* No child processes */ + UAEAGAIN = 0x2f6df0a, /* Try again */ + UAENOMEM = 0x2f6df0b, /* Out of memory */ + UAEACCES = 0x2f6df0c, /* Permission denied */ + UAEFAULT = 0x2f6df0d, /* Bad address */ + UAENOTBLK = 0x2f6df0e, /* Block device required */ + UAEBUSY = 0x2f6df0f, /* Device or resource busy */ + UAEEXIST = 0x2f6df10, /* File exists */ + UAEXDEV = 0x2f6df11, /* Cross-device link */ + UAENODEV = 0x2f6df12, /* No such device */ + UAENOTDIR = 0x2f6df13, /* Not a directory */ + UAEISDIR = 0x2f6df14, /* Is a directory */ + UAEINVAL = 0x2f6df15, /* Invalid argument */ + UAENFILE = 0x2f6df16, /* File table overflow */ + UAEMFILE = 0x2f6df17, /* Too many open files */ + UAENOTTY = 0x2f6df18, /* Not a typewriter */ + UAETXTBSY = 0x2f6df19, /* Text file busy */ + UAEFBIG = 0x2f6df1a, /* File too large */ + UAENOSPC = 0x2f6df1b, /* No space left on device */ + UAESPIPE = 0x2f6df1c, /* Illegal seek */ + UAEROFS = 0x2f6df1d, /* Read-only file system */ + UAEMLINK = 0x2f6df1e, /* Too many links */ + UAEPIPE = 0x2f6df1f, /* Broken pipe */ + UAEDOM = 0x2f6df20, /* Math argument out of domain of func */ + UAERANGE = 0x2f6df21, /* Math result not representable */ + UAEDEADLK = 0x2f6df22, /* Resource deadlock would occur */ + UAENAMETOOLONG = 0x2f6df23, /* File name too long */ + UAENOLCK = 0x2f6df24, /* No record locks available */ + UAENOSYS = 0x2f6df25, /* Function not implemented */ + UAENOTEMPTY = 0x2f6df26, /* Directory not empty */ + UAELOOP = 0x2f6df27, /* Too many symbolic links encountered */ + UAEWOULDBLOCK = 0x2f6df28, /* Operation would block */ + UAENOMSG = 0x2f6df29, /* No message of desired type */ + UAEIDRM = 0x2f6df2a, /* Identifier removed */ + UAECHRNG = 0x2f6df2b, /* Channel number out of range */ + UAEL2NSYNC = 0x2f6df2c, /* Level 2 not synchronized */ + UAEL3HLT = 0x2f6df2d, /* Level 3 halted */ + UAEL3RST = 0x2f6df2e, /* Level 3 reset */ + UAELNRNG = 0x2f6df2f, /* Link number out of range */ + UAEUNATCH = 0x2f6df30, /* Protocol driver not attached */ + UAENOCSI = 0x2f6df31, /* No CSI structure available */ + UAEL2HLT = 0x2f6df32, /* Level 2 halted */ + UAEBADE = 0x2f6df33, /* Invalid exchange */ + UAEBADR = 0x2f6df34, /* Invalid request descriptor */ + UAEXFULL = 0x2f6df35, /* Exchange full */ + UAENOANO = 0x2f6df36, /* No anode */ + UAEBADRQC = 0x2f6df37, /* Invalid request code */ + UAEBADSLT = 0x2f6df38, /* Invalid slot */ + UAEBFONT = 0x2f6df39, /* Bad font file format */ + UAENOSTR = 0x2f6df3a, /* Device not a stream */ + UAENODATA = 0x2f6df3b, /* No data available */ + UAETIME = 0x2f6df3c, /* Timer expired */ + UAENOSR = 0x2f6df3d, /* Out of streams resources */ + UAENONET = 0x2f6df3e, /* Machine is not on the network */ + UAENOPKG = 0x2f6df3f, /* Package not installed */ + UAEREMOTE = 0x2f6df40, /* Object is remote */ + UAENOLINK = 0x2f6df41, /* Link has been severed */ + UAEADV = 0x2f6df42, /* Advertise error */ + UAESRMNT = 0x2f6df43, /* Srmount error */ + UAECOMM = 0x2f6df44, /* Communication error on send */ + UAEPROTO = 0x2f6df45, /* Protocol error */ + UAEMULTIHOP = 0x2f6df46, /* Multihop attempted */ + UAEDOTDOT = 0x2f6df47, /* RFS specific error */ + UAEBADMSG = 0x2f6df48, /* Not a data message */ + UAEOVERFLOW = 0x2f6df49, /* Value too large for defined data type */ + UAENOTUNIQ = 0x2f6df4a, /* Name not unique on network */ + UAEBADFD = 0x2f6df4b, /* File descriptor in bad state */ + UAEREMCHG = 0x2f6df4c, /* Remote address changed */ + UAELIBACC = 0x2f6df4d, /* Can not access a needed shared library */ + UAELIBBAD = 0x2f6df4e, /* Accessing a corrupted shared library */ + UAELIBSCN = 0x2f6df4f, /* .lib section in a.out corrupted */ + UAELIBMAX = 0x2f6df50, /* Attempting to link in too many shared libraries */ + UAELIBEXEC = 0x2f6df51, /* Cannot exec a shared library directly */ + UAEILSEQ = 0x2f6df52, /* Illegal byte sequence */ + UAERESTART = 0x2f6df53, /* Interrupted system call should be restarted */ + UAESTRPIPE = 0x2f6df54, /* Streams pipe error */ + UAEUSERS = 0x2f6df55, /* Too many users */ + UAENOTSOCK = 0x2f6df56, /* Socket operation on non-socket */ + UAEDESTADDRREQ = 0x2f6df57, /* Destination address required */ + UAEMSGSIZE = 0x2f6df58, /* Message too long */ + UAEPROTOTYPE = 0x2f6df59, /* Protocol wrong type for socket */ + UAENOPROTOOPT = 0x2f6df5a, /* Protocol not available */ + UAEPROTONOSUPPORT = 0x2f6df5b, /* Protocol not supported */ + UAESOCKTNOSUPPORT = 0x2f6df5c, /* Socket type not supported */ + UAEOPNOTSUPP = 0x2f6df5d, /* Operation not supported on transport endpoint */ + UAEPFNOSUPPORT = 0x2f6df5e, /* Protocol family not supported */ + UAEAFNOSUPPORT = 0x2f6df5f, /* Address family not supported by protocol */ + UAEADDRINUSE = 0x2f6df60, /* Address already in use */ + UAEADDRNOTAVAIL = 0x2f6df61, /* Cannot assign requested address */ + UAENETDOWN = 0x2f6df62, /* Network is down */ + UAENETUNREACH = 0x2f6df63, /* Network is unreachable */ + UAENETRESET = 0x2f6df64, /* Network dropped connection because of reset */ + UAECONNABORTED = 0x2f6df65, /* Software caused connection abort */ + UAECONNRESET = 0x2f6df66, /* Connection reset by peer */ + UAENOBUFS = 0x2f6df67, /* No buffer space available */ + UAEISCONN = 0x2f6df68, /* Transport endpoint is already connected */ + UAENOTCONN = 0x2f6df69, /* Transport endpoint is not connected */ + UAESHUTDOWN = 0x2f6df6a, /* Cannot send after transport endpoint shutdown */ + UAETOOMANYREFS = 0x2f6df6b, /* Too many references: cannot splice */ + UAETIMEDOUT = 0x2f6df6c, /* Connection timed out */ + UAECONNREFUSED = 0x2f6df6d, /* Connection refused */ + UAEHOSTDOWN = 0x2f6df6e, /* Host is down */ + UAEHOSTUNREACH = 0x2f6df6f, /* No route to host */ + UAEALREADY = 0x2f6df70, /* Operation already in progress */ + UAEINPROGRESS = 0x2f6df71, /* Operation now in progress */ + UAESTALE = 0x2f6df72, /* Stale NFS file handle */ + UAEUCLEAN = 0x2f6df73, /* Structure needs cleaning */ + UAENOTNAM = 0x2f6df74, /* Not a XENIX named type file */ + UAENAVAIL = 0x2f6df75, /* No XENIX semaphores available */ + UAEISNAM = 0x2f6df76, /* Is a named type file */ + UAEREMOTEIO = 0x2f6df77, /* Remote I/O error */ + UAEDQUOT = 0x2f6df78, /* Quota exceeded */ + UAENOMEDIUM = 0x2f6df79, /* No medium found */ + UAEMEDIUMTYPE = 0x2f6df7a, /* Wrong medium type */ +}; diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h new file mode 100644 index 000000000..b5bd03b1d --- /dev/null +++ b/fs/afs/protocol_yfs.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* YFS protocol bits + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define YFS_FS_SERVICE 2500 +#define YFS_CM_SERVICE 2501 + +#define YFSCBMAX 1024 + +enum YFS_CM_Operations { + YFSCBProbe = 206, /* probe client */ + YFSCBGetLock = 207, /* get contents of CM lock table */ + YFSCBXStatsVersion = 209, /* get version of extended statistics */ + YFSCBGetXStats = 210, /* get contents of extended statistics data */ + YFSCBInitCallBackState3 = 213, /* initialise callback state, version 3 */ + YFSCBProbeUuid = 214, /* check the client hasn't rebooted */ + YFSCBGetServerPrefs = 215, + YFSCBGetCellServDV = 216, + YFSCBGetLocalCell = 217, + YFSCBGetCacheConfig = 218, + YFSCBGetCellByNum = 65537, + YFSCBTellMeAboutYourself = 65538, /* get client capabilities */ + YFSCBCallBack = 64204, +}; + +enum YFS_FS_Operations { + YFSFETCHACL = 64131, /* YFS Fetch file AFS3 ACL */ + YFSFETCHSTATUS = 64132, /* YFS Fetch file status */ + YFSSTOREACL = 64134, /* YFS Store file AFS3 ACL */ + YFSSTORESTATUS = 64135, /* YFS Store file status */ + YFSREMOVEFILE = 64136, /* YFS Remove a file */ + YFSCREATEFILE = 64137, /* YFS Create a file */ + YFSRENAME = 64138, /* YFS Rename or move a file or directory */ + YFSSYMLINK = 64139, /* YFS Create a symbolic link */ + YFSLINK = 64140, /* YFS Create a hard link */ + YFSMAKEDIR = 64141, /* YFS Create a directory */ + YFSREMOVEDIR = 64142, /* YFS Remove a directory */ + YFSGETVOLUMESTATUS = 64149, /* YFS Get volume status information */ + YFSSETVOLUMESTATUS = 64150, /* YFS Set volume status information */ + YFSSETLOCK = 64156, /* YFS Request a file lock */ + YFSEXTENDLOCK = 64157, /* YFS Extend a file lock */ + YFSRELEASELOCK = 64158, /* YFS Release a file lock */ + YFSLOOKUP = 64161, /* YFS lookup file in directory */ + YFSFLUSHCPS = 64165, + YFSFETCHOPAQUEACL = 64168, /* YFS Fetch file YFS ACL */ + YFSWHOAMI = 64170, + YFSREMOVEACL = 64171, + YFSREMOVEFILE2 = 64173, + YFSSTOREOPAQUEACL2 = 64174, + YFSINLINEBULKSTATUS = 64536, /* YFS Fetch multiple file statuses with errors */ + YFSFETCHDATA64 = 64537, /* YFS Fetch file data */ + YFSSTOREDATA64 = 64538, /* YFS Store file data */ + YFSUPDATESYMLINK = 64540, +}; + +struct yfs_xdr_u64 { + __be32 msw; + __be32 lsw; +} __packed; + +static inline u64 xdr_to_u64(const struct yfs_xdr_u64 x) +{ + return ((u64)ntohl(x.msw) << 32) | ntohl(x.lsw); +} + +static inline struct yfs_xdr_u64 u64_to_xdr(const u64 x) +{ + return (struct yfs_xdr_u64){ .msw = htonl(x >> 32), .lsw = htonl(x) }; +} + +struct yfs_xdr_vnode { + struct yfs_xdr_u64 lo; + __be32 hi; + __be32 unique; +} __packed; + +struct yfs_xdr_YFSFid { + struct yfs_xdr_u64 volume; + struct yfs_xdr_vnode vnode; +} __packed; + + +struct yfs_xdr_YFSFetchStatus { + __be32 type; + __be32 nlink; + struct yfs_xdr_u64 size; + struct yfs_xdr_u64 data_version; + struct yfs_xdr_u64 author; + struct yfs_xdr_u64 owner; + struct yfs_xdr_u64 group; + __be32 mode; + __be32 caller_access; + __be32 anon_access; + struct yfs_xdr_vnode parent; + __be32 data_access_protocol; + struct yfs_xdr_u64 mtime_client; + struct yfs_xdr_u64 mtime_server; + __be32 lock_count; + __be32 abort_code; +} __packed; + +struct yfs_xdr_YFSCallBack { + __be32 version; + struct yfs_xdr_u64 expiration_time; + __be32 type; +} __packed; + +struct yfs_xdr_YFSStoreStatus { + __be32 mask; + __be32 mode; + struct yfs_xdr_u64 mtime_client; + struct yfs_xdr_u64 owner; + struct yfs_xdr_u64 group; +} __packed; + +struct yfs_xdr_RPCFlags { + __be32 rpc_flags; +} __packed; + +struct yfs_xdr_YFSVolSync { + struct yfs_xdr_u64 vol_creation_date; + struct yfs_xdr_u64 vol_update_date; + struct yfs_xdr_u64 max_quota; + struct yfs_xdr_u64 blocks_in_use; + struct yfs_xdr_u64 blocks_avail; +} __packed; + +enum yfs_volume_type { + yfs_volume_type_ro = 0, + yfs_volume_type_rw = 1, +}; + +#define yfs_FVSOnline 0x1 +#define yfs_FVSInservice 0x2 +#define yfs_FVSBlessed 0x4 +#define yfs_FVSNeedsSalvage 0x8 + +struct yfs_xdr_YFSFetchVolumeStatus { + struct yfs_xdr_u64 vid; + struct yfs_xdr_u64 parent_id; + __be32 flags; + __be32 type; + struct yfs_xdr_u64 max_quota; + struct yfs_xdr_u64 blocks_in_use; + struct yfs_xdr_u64 part_blocks_avail; + struct yfs_xdr_u64 part_max_blocks; + struct yfs_xdr_u64 vol_copy_date; + struct yfs_xdr_u64 vol_backup_date; +} __packed; + +struct yfs_xdr_YFSStoreVolumeStatus { + __be32 mask; + struct yfs_xdr_u64 min_quota; + struct yfs_xdr_u64 max_quota; + struct yfs_xdr_u64 file_quota; +} __packed; + +enum yfs_lock_type { + yfs_LockNone = -1, + yfs_LockRead = 0, + yfs_LockWrite = 1, + yfs_LockExtend = 2, + yfs_LockRelease = 3, + yfs_LockMandatoryRead = 0x100, + yfs_LockMandatoryWrite = 0x101, + yfs_LockMandatoryExtend = 0x102, +}; diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c new file mode 100644 index 000000000..d83f13c44 --- /dev/null +++ b/fs/afs/rotate.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Handle fileserver selection and rotation. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/sched/signal.h> +#include "internal.h" +#include "afs_fs.h" + +/* + * Begin iteration through a server list, starting with the vnode's last used + * server if possible, or the last recorded good server if not. + */ +static bool afs_start_fs_iteration(struct afs_operation *op, + struct afs_vnode *vnode) +{ + struct afs_server *server; + void *cb_server; + int i; + + read_lock(&op->volume->servers_lock); + op->server_list = afs_get_serverlist( + rcu_dereference_protected(op->volume->servers, + lockdep_is_held(&op->volume->servers_lock))); + read_unlock(&op->volume->servers_lock); + + op->untried = (1UL << op->server_list->nr_servers) - 1; + op->index = READ_ONCE(op->server_list->preferred); + + cb_server = vnode->cb_server; + if (cb_server) { + /* See if the vnode's preferred record is still available */ + for (i = 0; i < op->server_list->nr_servers; i++) { + server = op->server_list->servers[i].server; + if (server == cb_server) { + op->index = i; + goto found_interest; + } + } + + /* If we have a lock outstanding on a server that's no longer + * serving this vnode, then we can't switch to another server + * and have to return an error. + */ + if (op->flags & AFS_OPERATION_CUR_ONLY) { + op->error = -ESTALE; + return false; + } + + /* Note that the callback promise is effectively broken */ + write_seqlock(&vnode->cb_lock); + ASSERTCMP(cb_server, ==, vnode->cb_server); + vnode->cb_server = NULL; + if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + vnode->cb_break++; + write_sequnlock(&vnode->cb_lock); + } + +found_interest: + return true; +} + +/* + * Post volume busy note. + */ +static void afs_busy(struct afs_volume *volume, u32 abort_code) +{ + const char *m; + + switch (abort_code) { + case VOFFLINE: m = "offline"; break; + case VRESTARTING: m = "restarting"; break; + case VSALVAGING: m = "being salvaged"; break; + default: m = "busy"; break; + } + + pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m); +} + +/* + * Sleep and retry the operation to the same fileserver. + */ +static bool afs_sleep_and_retry(struct afs_operation *op) +{ + if (!(op->flags & AFS_OPERATION_UNINTR)) { + msleep_interruptible(1000); + if (signal_pending(current)) { + op->error = -ERESTARTSYS; + return false; + } + } else { + msleep(1000); + } + + return true; +} + +/* + * Select the fileserver to use. May be called multiple times to rotate + * through the fileservers. + */ +bool afs_select_fileserver(struct afs_operation *op) +{ + struct afs_addr_list *alist; + struct afs_server *server; + struct afs_vnode *vnode = op->file[0].vnode; + struct afs_error e; + u32 rtt; + int error = op->ac.error, i; + + _enter("%lx[%d],%lx[%d],%d,%d", + op->untried, op->index, + op->ac.tried, op->ac.index, + error, op->ac.abort_code); + + if (op->flags & AFS_OPERATION_STOP) { + _leave(" = f [stopped]"); + return false; + } + + op->nr_iterations++; + + /* Evaluate the result of the previous operation, if there was one. */ + switch (error) { + case SHRT_MAX: + goto start; + + case 0: + default: + /* Success or local failure. Stop. */ + op->error = error; + op->flags |= AFS_OPERATION_STOP; + _leave(" = f [okay/local %d]", error); + return false; + + case -ECONNABORTED: + /* The far side rejected the operation on some grounds. This + * might involve the server being busy or the volume having been moved. + */ + switch (op->ac.abort_code) { + case VNOVOL: + /* This fileserver doesn't know about the volume. + * - May indicate that the VL is wrong - retry once and compare + * the results. + * - May indicate that the fileserver couldn't attach to the vol. + */ + if (op->flags & AFS_OPERATION_VNOVOL) { + op->error = -EREMOTEIO; + goto next_server; + } + + write_lock(&op->volume->servers_lock); + op->server_list->vnovol_mask |= 1 << op->index; + write_unlock(&op->volume->servers_lock); + + set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags); + error = afs_check_volume_status(op->volume, op); + if (error < 0) + goto failed_set_error; + + if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) { + op->error = -ENOMEDIUM; + goto failed; + } + + /* If the server list didn't change, then assume that + * it's the fileserver having trouble. + */ + if (rcu_access_pointer(op->volume->servers) == op->server_list) { + op->error = -EREMOTEIO; + goto next_server; + } + + /* Try again */ + op->flags |= AFS_OPERATION_VNOVOL; + _leave(" = t [vnovol]"); + return true; + + case VSALVAGE: /* TODO: Should this return an error or iterate? */ + case VVOLEXISTS: + case VNOSERVICE: + case VONLINE: + case VDISKFULL: + case VOVERQUOTA: + op->error = afs_abort_to_error(op->ac.abort_code); + goto next_server; + + case VOFFLINE: + if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) { + afs_busy(op->volume, op->ac.abort_code); + clear_bit(AFS_VOLUME_BUSY, &op->volume->flags); + } + if (op->flags & AFS_OPERATION_NO_VSLEEP) { + op->error = -EADV; + goto failed; + } + if (op->flags & AFS_OPERATION_CUR_ONLY) { + op->error = -ESTALE; + goto failed; + } + goto busy; + + case VSALVAGING: + case VRESTARTING: + case VBUSY: + /* Retry after going round all the servers unless we + * have a file lock we need to maintain. + */ + if (op->flags & AFS_OPERATION_NO_VSLEEP) { + op->error = -EBUSY; + goto failed; + } + if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) { + afs_busy(op->volume, op->ac.abort_code); + clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags); + } + busy: + if (op->flags & AFS_OPERATION_CUR_ONLY) { + if (!afs_sleep_and_retry(op)) + goto failed; + + /* Retry with same server & address */ + _leave(" = t [vbusy]"); + return true; + } + + op->flags |= AFS_OPERATION_VBUSY; + goto next_server; + + case VMOVED: + /* The volume migrated to another server. We consider + * consider all locks and callbacks broken and request + * an update from the VLDB. + * + * We also limit the number of VMOVED hops we will + * honour, just in case someone sets up a loop. + */ + if (op->flags & AFS_OPERATION_VMOVED) { + op->error = -EREMOTEIO; + goto failed; + } + op->flags |= AFS_OPERATION_VMOVED; + + set_bit(AFS_VOLUME_WAIT, &op->volume->flags); + set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags); + error = afs_check_volume_status(op->volume, op); + if (error < 0) + goto failed_set_error; + + /* If the server list didn't change, then the VLDB is + * out of sync with the fileservers. This is hopefully + * a temporary condition, however, so we don't want to + * permanently block access to the file. + * + * TODO: Try other fileservers if we can. + * + * TODO: Retry a few times with sleeps. + */ + if (rcu_access_pointer(op->volume->servers) == op->server_list) { + op->error = -ENOMEDIUM; + goto failed; + } + + goto restart_from_beginning; + + default: + clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags); + clear_bit(AFS_VOLUME_BUSY, &op->volume->flags); + op->error = afs_abort_to_error(op->ac.abort_code); + goto failed; + } + + case -ETIMEDOUT: + case -ETIME: + if (op->error != -EDESTADDRREQ) + goto iterate_address; + fallthrough; + case -ERFKILL: + case -EADDRNOTAVAIL: + case -ENETUNREACH: + case -EHOSTUNREACH: + case -EHOSTDOWN: + case -ECONNREFUSED: + _debug("no conn"); + op->error = error; + goto iterate_address; + + case -ECONNRESET: + _debug("call reset"); + op->error = error; + goto failed; + } + +restart_from_beginning: + _debug("restart"); + afs_end_cursor(&op->ac); + op->server = NULL; + afs_put_serverlist(op->net, op->server_list); + op->server_list = NULL; +start: + _debug("start"); + /* See if we need to do an update of the volume record. Note that the + * volume may have moved or even have been deleted. + */ + error = afs_check_volume_status(op->volume, op); + if (error < 0) + goto failed_set_error; + + if (!afs_start_fs_iteration(op, vnode)) + goto failed; + + _debug("__ VOL %llx __", op->volume->vid); + +pick_server: + _debug("pick [%lx]", op->untried); + + error = afs_wait_for_fs_probes(op->server_list, op->untried); + if (error < 0) + goto failed_set_error; + + /* Pick the untried server with the lowest RTT. If we have outstanding + * callbacks, we stick with the server we're already using if we can. + */ + if (op->server) { + _debug("server %u", op->index); + if (test_bit(op->index, &op->untried)) + goto selected_server; + op->server = NULL; + _debug("no server"); + } + + op->index = -1; + rtt = U32_MAX; + for (i = 0; i < op->server_list->nr_servers; i++) { + struct afs_server *s = op->server_list->servers[i].server; + + if (!test_bit(i, &op->untried) || + !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags)) + continue; + if (s->probe.rtt < rtt) { + op->index = i; + rtt = s->probe.rtt; + } + } + + if (op->index == -1) + goto no_more_servers; + +selected_server: + _debug("use %d", op->index); + __clear_bit(op->index, &op->untried); + + /* We're starting on a different fileserver from the list. We need to + * check it, create a callback intercept, find its address list and + * probe its capabilities before we use it. + */ + ASSERTCMP(op->ac.alist, ==, NULL); + server = op->server_list->servers[op->index].server; + + if (!afs_check_server_record(op, server)) + goto failed; + + _debug("USING SERVER: %pU", &server->uuid); + + op->flags |= AFS_OPERATION_RETRY_SERVER; + op->server = server; + if (vnode->cb_server != server) { + vnode->cb_server = server; + vnode->cb_s_break = server->cb_s_break; + vnode->cb_v_break = vnode->volume->cb_v_break; + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } + + read_lock(&server->fs_lock); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + afs_get_addrlist(alist); + read_unlock(&server->fs_lock); + +retry_server: + memset(&op->ac, 0, sizeof(op->ac)); + + if (!op->ac.alist) + op->ac.alist = alist; + else + afs_put_addrlist(alist); + + op->ac.index = -1; + +iterate_address: + ASSERT(op->ac.alist); + /* Iterate over the current server's address list to try and find an + * address on which it will respond to us. + */ + if (!afs_iterate_addresses(&op->ac)) + goto out_of_addresses; + + _debug("address [%u] %u/%u %pISp", + op->index, op->ac.index, op->ac.alist->nr_addrs, + &op->ac.alist->addrs[op->ac.index].transport); + + _leave(" = t"); + return true; + +out_of_addresses: + /* We've now had a failure to respond on all of a server's addresses - + * immediately probe them again and consider retrying the server. + */ + afs_probe_fileserver(op->net, op->server); + if (op->flags & AFS_OPERATION_RETRY_SERVER) { + alist = op->ac.alist; + error = afs_wait_for_one_fs_probe( + op->server, !(op->flags & AFS_OPERATION_UNINTR)); + switch (error) { + case 0: + op->flags &= ~AFS_OPERATION_RETRY_SERVER; + goto retry_server; + case -ERESTARTSYS: + goto failed_set_error; + case -ETIME: + case -EDESTADDRREQ: + goto next_server; + } + } + +next_server: + _debug("next"); + afs_end_cursor(&op->ac); + goto pick_server; + +no_more_servers: + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (op->flags & AFS_OPERATION_VBUSY) + goto restart_from_beginning; + + e.error = -EDESTADDRREQ; + e.responded = false; + for (i = 0; i < op->server_list->nr_servers; i++) { + struct afs_server *s = op->server_list->servers[i].server; + + afs_prioritise_error(&e, READ_ONCE(s->probe.error), + s->probe.abort_code); + } + + error = e.error; + +failed_set_error: + op->error = error; +failed: + op->flags |= AFS_OPERATION_STOP; + afs_end_cursor(&op->ac); + _leave(" = f [failed %d]", op->error); + return false; +} + +/* + * Dump cursor state in the case of the error being EDESTADDRREQ. + */ +void afs_dump_edestaddrreq(const struct afs_operation *op) +{ + static int count; + int i; + + if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) + return; + count++; + + rcu_read_lock(); + + pr_notice("EDESTADDR occurred\n"); + pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n", + op->file[0].cb_break_before, + op->file[1].cb_break_before, op->flags, op->error); + pr_notice("FC: ut=%lx ix=%d ni=%u\n", + op->untried, op->index, op->nr_iterations); + + if (op->server_list) { + const struct afs_server_list *sl = op->server_list; + pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n", + sl->nr_servers, sl->preferred, sl->vnovol_mask); + for (i = 0; i < sl->nr_servers; i++) { + const struct afs_server *s = sl->servers[i].server; + pr_notice("FC: server fl=%lx av=%u %pU\n", + s->flags, s->addr_version, &s->uuid); + if (s->addresses) { + const struct afs_addr_list *a = + rcu_dereference(s->addresses); + pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n", + a->version, + a->nr_ipv4, a->nr_addrs, a->max_addrs, + a->preferred); + pr_notice("FC: - R=%lx F=%lx\n", + a->responded, a->failed); + if (a == op->ac.alist) + pr_notice("FC: - current\n"); + } + } + } + + pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", + op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error, + op->ac.responded, op->ac.nr_iterations); + rcu_read_unlock(); +} diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c new file mode 100644 index 000000000..1820b5365 --- /dev/null +++ b/fs/afs/rxrpc.c @@ -0,0 +1,969 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Maintain an RxRPC server socket to do AFS communications through + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <linux/sched/signal.h> + +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "internal.h" +#include "afs_cm.h" +#include "protocol_yfs.h" + +struct workqueue_struct *afs_async_calls; + +static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); +static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); +static void afs_process_async_call(struct work_struct *); +static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); +static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); +static int afs_deliver_cm_op_id(struct afs_call *); + +/* asynchronous incoming call initial processing */ +static const struct afs_call_type afs_RXCMxxxx = { + .name = "CB.xxxx", + .deliver = afs_deliver_cm_op_id, +}; + +/* + * open an RxRPC socket and bind it to be a server for callback notifications + * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT + */ +int afs_open_socket(struct afs_net *net) +{ + struct sockaddr_rxrpc srx; + struct socket *socket; + int ret; + + _enter(""); + + ret = sock_create_kern(net->net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket); + if (ret < 0) + goto error_1; + + socket->sk->sk_allocation = GFP_NOFS; + + /* bind the callback manager's address to make this a server socket */ + memset(&srx, 0, sizeof(srx)); + srx.srx_family = AF_RXRPC; + srx.srx_service = CM_SERVICE; + srx.transport_type = SOCK_DGRAM; + srx.transport_len = sizeof(srx.transport.sin6); + srx.transport.sin6.sin6_family = AF_INET6; + srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); + + ret = rxrpc_sock_set_min_security_level(socket->sk, + RXRPC_SECURITY_ENCRYPT); + if (ret < 0) + goto error_2; + + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + if (ret == -EADDRINUSE) { + srx.transport.sin6.sin6_port = 0; + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + } + if (ret < 0) + goto error_2; + + srx.srx_service = YFS_CM_SERVICE; + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + if (ret < 0) + goto error_2; + + /* Ideally, we'd turn on service upgrade here, but we can't because + * OpenAFS is buggy and leaks the userStatus field from packet to + * packet and between FS packets and CB packets - so if we try to do an + * upgrade on an FS packet, OpenAFS will leak that into the CB packet + * it sends back to us. + */ + + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, + afs_rx_discard_new_call); + + ret = kernel_listen(socket, INT_MAX); + if (ret < 0) + goto error_2; + + net->socket = socket; + afs_charge_preallocation(&net->charge_preallocation_work); + _leave(" = 0"); + return 0; + +error_2: + sock_release(socket); +error_1: + _leave(" = %d", ret); + return ret; +} + +/* + * close the RxRPC socket AFS was using + */ +void afs_close_socket(struct afs_net *net) +{ + _enter(""); + + kernel_listen(net->socket, 0); + flush_workqueue(afs_async_calls); + + if (net->spare_incoming_call) { + afs_put_call(net->spare_incoming_call); + net->spare_incoming_call = NULL; + } + + _debug("outstanding %u", atomic_read(&net->nr_outstanding_calls)); + wait_var_event(&net->nr_outstanding_calls, + !atomic_read(&net->nr_outstanding_calls)); + _debug("no outstanding calls"); + + kernel_sock_shutdown(net->socket, SHUT_RDWR); + flush_workqueue(afs_async_calls); + sock_release(net->socket); + + _debug("dework"); + _leave(""); +} + +/* + * Allocate a call. + */ +static struct afs_call *afs_alloc_call(struct afs_net *net, + const struct afs_call_type *type, + gfp_t gfp) +{ + struct afs_call *call; + int o; + + call = kzalloc(sizeof(*call), gfp); + if (!call) + return NULL; + + call->type = type; + call->net = net; + call->debug_id = atomic_inc_return(&rxrpc_debug_id); + atomic_set(&call->usage, 1); + INIT_WORK(&call->async_work, afs_process_async_call); + init_waitqueue_head(&call->waitq); + spin_lock_init(&call->state_lock); + call->iter = &call->def_iter; + + o = atomic_inc_return(&net->nr_outstanding_calls); + trace_afs_call(call, afs_call_trace_alloc, 1, o, + __builtin_return_address(0)); + return call; +} + +/* + * Dispose of a reference on a call. + */ +void afs_put_call(struct afs_call *call) +{ + struct afs_net *net = call->net; + int n = atomic_dec_return(&call->usage); + int o = atomic_read(&net->nr_outstanding_calls); + + trace_afs_call(call, afs_call_trace_put, n, o, + __builtin_return_address(0)); + + ASSERTCMP(n, >=, 0); + if (n == 0) { + ASSERT(!work_pending(&call->async_work)); + ASSERT(call->type->name != NULL); + + if (call->rxcall) { + rxrpc_kernel_end_call(net->socket, call->rxcall); + call->rxcall = NULL; + } + if (call->type->destructor) + call->type->destructor(call); + + afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call); + afs_put_addrlist(call->alist); + kfree(call->request); + + trace_afs_call(call, afs_call_trace_free, 0, o, + __builtin_return_address(0)); + kfree(call); + + o = atomic_dec_return(&net->nr_outstanding_calls); + if (o == 0) + wake_up_var(&net->nr_outstanding_calls); + } +} + +static struct afs_call *afs_get_call(struct afs_call *call, + enum afs_call_trace why) +{ + int u = atomic_inc_return(&call->usage); + + trace_afs_call(call, why, u, + atomic_read(&call->net->nr_outstanding_calls), + __builtin_return_address(0)); + return call; +} + +/* + * Queue the call for actual work. + */ +static void afs_queue_call_work(struct afs_call *call) +{ + if (call->type->work) { + INIT_WORK(&call->work, call->type->work); + + afs_get_call(call, afs_call_trace_work); + if (!queue_work(afs_wq, &call->work)) + afs_put_call(call); + } +} + +/* + * allocate a call with flat request and reply buffers + */ +struct afs_call *afs_alloc_flat_call(struct afs_net *net, + const struct afs_call_type *type, + size_t request_size, size_t reply_max) +{ + struct afs_call *call; + + call = afs_alloc_call(net, type, GFP_NOFS); + if (!call) + goto nomem_call; + + if (request_size) { + call->request_size = request_size; + call->request = kmalloc(request_size, GFP_NOFS); + if (!call->request) + goto nomem_free; + } + + if (reply_max) { + call->reply_max = reply_max; + call->buffer = kmalloc(reply_max, GFP_NOFS); + if (!call->buffer) + goto nomem_free; + } + + afs_extract_to_buf(call, call->reply_max); + call->operation_ID = type->op; + init_waitqueue_head(&call->waitq); + return call; + +nomem_free: + afs_put_call(call); +nomem_call: + return NULL; +} + +/* + * clean up a call with flat buffer + */ +void afs_flat_call_destructor(struct afs_call *call) +{ + _enter(""); + + kfree(call->request); + call->request = NULL; + kfree(call->buffer); + call->buffer = NULL; +} + +#define AFS_BVEC_MAX 8 + +/* + * Load the given bvec with the next few pages. + */ +static void afs_load_bvec(struct afs_call *call, struct msghdr *msg, + struct bio_vec *bv, pgoff_t first, pgoff_t last, + unsigned offset) +{ + struct afs_operation *op = call->op; + struct page *pages[AFS_BVEC_MAX]; + unsigned int nr, n, i, to, bytes = 0; + + nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX); + n = find_get_pages_contig(op->store.mapping, first, nr, pages); + ASSERTCMP(n, ==, nr); + + msg->msg_flags |= MSG_MORE; + for (i = 0; i < nr; i++) { + to = PAGE_SIZE; + if (first + i >= last) { + to = op->store.last_to; + msg->msg_flags &= ~MSG_MORE; + } + bv[i].bv_page = pages[i]; + bv[i].bv_len = to - offset; + bv[i].bv_offset = offset; + bytes += to - offset; + offset = 0; + } + + iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes); +} + +/* + * Advance the AFS call state when the RxRPC call ends the transmit phase. + */ +static void afs_notify_end_request_tx(struct sock *sock, + struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ + struct afs_call *call = (struct afs_call *)call_user_ID; + + afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY); +} + +/* + * attach the data from a bunch of pages on an inode to a call + */ +static int afs_send_pages(struct afs_call *call, struct msghdr *msg) +{ + struct afs_operation *op = call->op; + struct bio_vec bv[AFS_BVEC_MAX]; + unsigned int bytes, nr, loop, offset; + pgoff_t first = op->store.first, last = op->store.last; + int ret; + + offset = op->store.first_offset; + op->store.first_offset = 0; + + do { + afs_load_bvec(call, msg, bv, first, last, offset); + trace_afs_send_pages(call, msg, first, last, offset); + + offset = 0; + bytes = msg->msg_iter.count; + nr = msg->msg_iter.nr_segs; + + ret = rxrpc_kernel_send_data(op->net->socket, call->rxcall, msg, + bytes, afs_notify_end_request_tx); + for (loop = 0; loop < nr; loop++) + put_page(bv[loop].bv_page); + if (ret < 0) + break; + + first += nr; + } while (first <= last); + + trace_afs_sent_pages(call, op->store.first, last, first, ret); + return ret; +} + +/* + * Initiate a call and synchronously queue up the parameters for dispatch. Any + * error is stored into the call struct, which the caller must check for. + */ +void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) +{ + struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index]; + struct rxrpc_call *rxcall; + struct msghdr msg; + struct kvec iov[1]; + s64 tx_total_len; + int ret; + + _enter(",{%pISp},", &srx->transport); + + ASSERT(call->type != NULL); + ASSERT(call->type->name != NULL); + + _debug("____MAKE %p{%s,%x} [%d]____", + call, call->type->name, key_serial(call->key), + atomic_read(&call->net->nr_outstanding_calls)); + + call->addr_ix = ac->index; + call->alist = afs_get_addrlist(ac->alist); + + /* Work out the length we're going to transmit. This is awkward for + * calls such as FS.StoreData where there's an extra injection of data + * after the initial fixed part. + */ + tx_total_len = call->request_size; + if (call->send_pages) { + struct afs_operation *op = call->op; + + if (op->store.last == op->store.first) { + tx_total_len += op->store.last_to - op->store.first_offset; + } else { + /* It looks mathematically like you should be able to + * combine the following lines with the ones above, but + * unsigned arithmetic is fun when it wraps... + */ + tx_total_len += PAGE_SIZE - op->store.first_offset; + tx_total_len += op->store.last_to; + tx_total_len += (op->store.last - op->store.first - 1) * PAGE_SIZE; + } + } + + /* If the call is going to be asynchronous, we need an extra ref for + * the call to hold itself so the caller need not hang on to its ref. + */ + if (call->async) { + afs_get_call(call, afs_call_trace_get); + call->drop_ref = true; + } + + /* create a call */ + rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, + (unsigned long)call, + tx_total_len, gfp, + (call->async ? + afs_wake_up_async_call : + afs_wake_up_call_waiter), + call->upgrade, + (call->intr ? RXRPC_PREINTERRUPTIBLE : + RXRPC_UNINTERRUPTIBLE), + call->debug_id); + if (IS_ERR(rxcall)) { + ret = PTR_ERR(rxcall); + call->error = ret; + goto error_kill_call; + } + + call->rxcall = rxcall; + + if (call->max_lifespan) + rxrpc_kernel_set_max_life(call->net->socket, rxcall, + call->max_lifespan); + call->issue_time = ktime_get_real(); + + /* send the request */ + iov[0].iov_base = call->request; + iov[0].iov_len = call->request_size; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size); + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0); + + ret = rxrpc_kernel_send_data(call->net->socket, rxcall, + &msg, call->request_size, + afs_notify_end_request_tx); + if (ret < 0) + goto error_do_abort; + + if (call->send_pages) { + ret = afs_send_pages(call, &msg); + if (ret < 0) + goto error_do_abort; + } + + /* Note that at this point, we may have received the reply or an abort + * - and an asynchronous call may already have completed. + * + * afs_wait_for_call_to_complete(call, ac) + * must be called to synchronously clean up. + */ + return; + +error_do_abort: + if (ret != -ECONNABORTED) { + rxrpc_kernel_abort_call(call->net->socket, rxcall, + RX_USER_ABORT, ret, "KSD"); + } else { + iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0); + rxrpc_kernel_recv_data(call->net->socket, rxcall, + &msg.msg_iter, false, + &call->abort_code, &call->service_id); + ac->abort_code = call->abort_code; + ac->responded = true; + } + call->error = ret; + trace_afs_call_done(call); +error_kill_call: + if (call->type->done) + call->type->done(call); + + /* We need to dispose of the extra ref we grabbed for an async call. + * The call, however, might be queued on afs_async_calls and we need to + * make sure we don't get any more notifications that might requeue it. + */ + if (call->rxcall) { + rxrpc_kernel_end_call(call->net->socket, call->rxcall); + call->rxcall = NULL; + } + if (call->async) { + if (cancel_work_sync(&call->async_work)) + afs_put_call(call); + afs_set_call_complete(call, ret, 0); + } + + ac->error = ret; + call->state = AFS_CALL_COMPLETE; + _leave(" = %d", ret); +} + +/* + * deliver messages to a call + */ +static void afs_deliver_to_call(struct afs_call *call) +{ + enum afs_call_state state; + u32 abort_code, remote_abort = 0; + int ret; + + _enter("%s", call->type->name); + + while (state = READ_ONCE(call->state), + state == AFS_CALL_CL_AWAIT_REPLY || + state == AFS_CALL_SV_AWAIT_OP_ID || + state == AFS_CALL_SV_AWAIT_REQUEST || + state == AFS_CALL_SV_AWAIT_ACK + ) { + if (state == AFS_CALL_SV_AWAIT_ACK) { + iov_iter_kvec(&call->def_iter, READ, NULL, 0, 0); + ret = rxrpc_kernel_recv_data(call->net->socket, + call->rxcall, &call->def_iter, + false, &remote_abort, + &call->service_id); + trace_afs_receive_data(call, &call->def_iter, false, ret); + + if (ret == -EINPROGRESS || ret == -EAGAIN) + return; + if (ret < 0 || ret == 1) { + if (ret == 1) + ret = 0; + goto call_complete; + } + return; + } + + ret = call->type->deliver(call); + state = READ_ONCE(call->state); + if (ret == 0 && call->unmarshalling_error) + ret = -EBADMSG; + switch (ret) { + case 0: + afs_queue_call_work(call); + if (state == AFS_CALL_CL_PROC_REPLY) { + if (call->op) + set_bit(AFS_SERVER_FL_MAY_HAVE_CB, + &call->op->server->flags); + goto call_complete; + } + ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY); + goto done; + case -EINPROGRESS: + case -EAGAIN: + goto out; + case -ECONNABORTED: + ASSERTCMP(state, ==, AFS_CALL_COMPLETE); + goto done; + case -ENOTSUPP: + abort_code = RXGEN_OPCODE; + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + abort_code, ret, "KIV"); + goto local_abort; + case -EIO: + pr_err("kAFS: Call %u in bad state %u\n", + call->debug_id, state); + fallthrough; + case -ENODATA: + case -EBADMSG: + case -EMSGSIZE: + case -ENOMEM: + case -EFAULT: + abort_code = RXGEN_CC_UNMARSHAL; + if (state != AFS_CALL_CL_AWAIT_REPLY) + abort_code = RXGEN_SS_UNMARSHAL; + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + abort_code, ret, "KUM"); + goto local_abort; + default: + abort_code = RX_CALL_DEAD; + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + abort_code, ret, "KER"); + goto local_abort; + } + } + +done: + if (call->type->done) + call->type->done(call); +out: + _leave(""); + return; + +local_abort: + abort_code = 0; +call_complete: + afs_set_call_complete(call, ret, remote_abort); + state = AFS_CALL_COMPLETE; + goto done; +} + +/* + * Wait synchronously for a call to complete and clean up the call struct. + */ +long afs_wait_for_call_to_complete(struct afs_call *call, + struct afs_addr_cursor *ac) +{ + long ret; + bool rxrpc_complete = false; + + DECLARE_WAITQUEUE(myself, current); + + _enter(""); + + ret = call->error; + if (ret < 0) + goto out; + + add_wait_queue(&call->waitq, &myself); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + + /* deliver any messages that are in the queue */ + if (!afs_check_call_state(call, AFS_CALL_COMPLETE) && + call->need_attention) { + call->need_attention = false; + __set_current_state(TASK_RUNNING); + afs_deliver_to_call(call); + continue; + } + + if (afs_check_call_state(call, AFS_CALL_COMPLETE)) + break; + + if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) { + /* rxrpc terminated the call. */ + rxrpc_complete = true; + break; + } + + schedule(); + } + + remove_wait_queue(&call->waitq, &myself); + __set_current_state(TASK_RUNNING); + + if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) { + if (rxrpc_complete) { + afs_set_call_complete(call, call->error, call->abort_code); + } else { + /* Kill off the call if it's still live. */ + _debug("call interrupted"); + if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + RX_USER_ABORT, -EINTR, "KWI")) + afs_set_call_complete(call, -EINTR, 0); + } + } + + spin_lock_bh(&call->state_lock); + ac->abort_code = call->abort_code; + ac->error = call->error; + spin_unlock_bh(&call->state_lock); + + ret = ac->error; + switch (ret) { + case 0: + ret = call->ret0; + call->ret0 = 0; + + fallthrough; + case -ECONNABORTED: + ac->responded = true; + break; + } + +out: + _debug("call complete"); + afs_put_call(call); + _leave(" = %p", (void *)ret); + return ret; +} + +/* + * wake up a waiting call + */ +static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ + struct afs_call *call = (struct afs_call *)call_user_ID; + + call->need_attention = true; + wake_up(&call->waitq); +} + +/* + * wake up an asynchronous call + */ +static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ + struct afs_call *call = (struct afs_call *)call_user_ID; + int u; + + trace_afs_notify_call(rxcall, call); + call->need_attention = true; + + u = atomic_fetch_add_unless(&call->usage, 1, 0); + if (u != 0) { + trace_afs_call(call, afs_call_trace_wake, u + 1, + atomic_read(&call->net->nr_outstanding_calls), + __builtin_return_address(0)); + + if (!queue_work(afs_async_calls, &call->async_work)) + afs_put_call(call); + } +} + +/* + * Perform I/O processing on an asynchronous call. The work item carries a ref + * to the call struct that we either need to release or to pass on. + */ +static void afs_process_async_call(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, async_work); + + _enter(""); + + if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + call->need_attention = false; + afs_deliver_to_call(call); + } + + afs_put_call(call); + _leave(""); +} + +static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) +{ + struct afs_call *call = (struct afs_call *)user_call_ID; + + call->rxcall = rxcall; +} + +/* + * Charge the incoming call preallocation. + */ +void afs_charge_preallocation(struct work_struct *work) +{ + struct afs_net *net = + container_of(work, struct afs_net, charge_preallocation_work); + struct afs_call *call = net->spare_incoming_call; + + for (;;) { + if (!call) { + call = afs_alloc_call(net, &afs_RXCMxxxx, GFP_KERNEL); + if (!call) + break; + + call->drop_ref = true; + call->async = true; + call->state = AFS_CALL_SV_AWAIT_OP_ID; + init_waitqueue_head(&call->waitq); + afs_extract_to_tmp(call); + } + + if (rxrpc_kernel_charge_accept(net->socket, + afs_wake_up_async_call, + afs_rx_attach, + (unsigned long)call, + GFP_KERNEL, + call->debug_id) < 0) + break; + call = NULL; + } + net->spare_incoming_call = call; +} + +/* + * Discard a preallocated call when a socket is shut down. + */ +static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + struct afs_call *call = (struct afs_call *)user_call_ID; + + call->rxcall = NULL; + afs_put_call(call); +} + +/* + * Notification of an incoming call. + */ +static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + struct afs_net *net = afs_sock2net(sk); + + queue_work(afs_wq, &net->charge_preallocation_work); +} + +/* + * Grab the operation ID from an incoming cache manager call. The socket + * buffer is discarded on error or if we don't yet have sufficient data. + */ +static int afs_deliver_cm_op_id(struct afs_call *call) +{ + int ret; + + _enter("{%zu}", iov_iter_count(call->iter)); + + /* the operation ID forms the first four bytes of the request data */ + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->operation_ID = ntohl(call->tmp); + afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST); + + /* ask the cache manager to route the call (it'll change the call type + * if successful) */ + if (!afs_cm_incoming_call(call)) + return -ENOTSUPP; + + trace_afs_cb_call(call); + + /* pass responsibility for the remainer of this message off to the + * cache manager op */ + return call->type->deliver(call); +} + +/* + * Advance the AFS call state when an RxRPC service call ends the transmit + * phase. + */ +static void afs_notify_end_reply_tx(struct sock *sock, + struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ + struct afs_call *call = (struct afs_call *)call_user_ID; + + afs_set_call_state(call, AFS_CALL_SV_REPLYING, AFS_CALL_SV_AWAIT_ACK); +} + +/* + * send an empty reply + */ +void afs_send_empty_reply(struct afs_call *call) +{ + struct afs_net *net = call->net; + struct msghdr msg; + + _enter(""); + + rxrpc_kernel_set_tx_length(net->socket, call->rxcall, 0); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0); + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0, + afs_notify_end_reply_tx)) { + case 0: + _leave(" [replied]"); + return; + + case -ENOMEM: + _debug("oom"); + rxrpc_kernel_abort_call(net->socket, call->rxcall, + RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + fallthrough; + default: + _leave(" [error]"); + return; + } +} + +/* + * send a simple reply + */ +void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) +{ + struct afs_net *net = call->net; + struct msghdr msg; + struct kvec iov[1]; + int n; + + _enter(""); + + rxrpc_kernel_set_tx_length(net->socket, call->rxcall, len); + + iov[0].iov_base = (void *) buf; + iov[0].iov_len = len; + msg.msg_name = NULL; + msg.msg_namelen = 0; + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len, + afs_notify_end_reply_tx); + if (n >= 0) { + /* Success */ + _leave(" [replied]"); + return; + } + + if (n == -ENOMEM) { + _debug("oom"); + rxrpc_kernel_abort_call(net->socket, call->rxcall, + RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + } + _leave(" [error]"); +} + +/* + * Extract a piece of data from the received data socket buffers. + */ +int afs_extract_data(struct afs_call *call, bool want_more) +{ + struct afs_net *net = call->net; + struct iov_iter *iter = call->iter; + enum afs_call_state state; + u32 remote_abort = 0; + int ret; + + _enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more); + + ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter, + want_more, &remote_abort, + &call->service_id); + if (ret == 0 || ret == -EAGAIN) + return ret; + + state = READ_ONCE(call->state); + if (ret == 1) { + switch (state) { + case AFS_CALL_CL_AWAIT_REPLY: + afs_set_call_state(call, state, AFS_CALL_CL_PROC_REPLY); + break; + case AFS_CALL_SV_AWAIT_REQUEST: + afs_set_call_state(call, state, AFS_CALL_SV_REPLYING); + break; + case AFS_CALL_COMPLETE: + kdebug("prem complete %d", call->error); + return afs_io_error(call, afs_io_error_extract); + default: + break; + } + return 0; + } + + afs_set_call_complete(call, ret, remote_abort); + return ret; +} + +/* + * Log protocol error production. + */ +noinline int afs_protocol_error(struct afs_call *call, + enum afs_eproto_cause cause) +{ + trace_afs_protocol_error(call, cause); + if (call) + call->unmarshalling_error = true; + return -EBADMSG; +} diff --git a/fs/afs/security.c b/fs/afs/security.c new file mode 100644 index 000000000..9cf3102f3 --- /dev/null +++ b/fs/afs/security.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS security handling + * + * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/ctype.h> +#include <linux/sched.h> +#include <linux/hashtable.h> +#include <keys/rxrpc-type.h> +#include "internal.h" + +static DEFINE_HASHTABLE(afs_permits_cache, 10); +static DEFINE_SPINLOCK(afs_permits_lock); + +/* + * get a key + */ +struct key *afs_request_key(struct afs_cell *cell) +{ + struct key *key; + + _enter("{%x}", key_serial(cell->anonymous_key)); + + _debug("key %s", cell->anonymous_key->description); + key = request_key_net(&key_type_rxrpc, cell->anonymous_key->description, + cell->net->net, NULL); + if (IS_ERR(key)) { + if (PTR_ERR(key) != -ENOKEY) { + _leave(" = %ld", PTR_ERR(key)); + return key; + } + + /* act as anonymous user */ + _leave(" = {%x} [anon]", key_serial(cell->anonymous_key)); + return key_get(cell->anonymous_key); + } else { + /* act as authorised user */ + _leave(" = {%x} [auth]", key_serial(key)); + return key; + } +} + +/* + * Get a key when pathwalk is in rcuwalk mode. + */ +struct key *afs_request_key_rcu(struct afs_cell *cell) +{ + struct key *key; + + _enter("{%x}", key_serial(cell->anonymous_key)); + + _debug("key %s", cell->anonymous_key->description); + key = request_key_net_rcu(&key_type_rxrpc, + cell->anonymous_key->description, + cell->net->net); + if (IS_ERR(key)) { + if (PTR_ERR(key) != -ENOKEY) { + _leave(" = %ld", PTR_ERR(key)); + return key; + } + + /* act as anonymous user */ + _leave(" = {%x} [anon]", key_serial(cell->anonymous_key)); + return key_get(cell->anonymous_key); + } else { + /* act as authorised user */ + _leave(" = {%x} [auth]", key_serial(key)); + return key; + } +} + +/* + * Dispose of a list of permits. + */ +static void afs_permits_rcu(struct rcu_head *rcu) +{ + struct afs_permits *permits = + container_of(rcu, struct afs_permits, rcu); + int i; + + for (i = 0; i < permits->nr_permits; i++) + key_put(permits->permits[i].key); + kfree(permits); +} + +/* + * Discard a permission cache. + */ +void afs_put_permits(struct afs_permits *permits) +{ + if (permits && refcount_dec_and_test(&permits->usage)) { + spin_lock(&afs_permits_lock); + hash_del_rcu(&permits->hash_node); + spin_unlock(&afs_permits_lock); + call_rcu(&permits->rcu, afs_permits_rcu); + } +} + +/* + * Clear a permit cache on callback break. + */ +void afs_clear_permits(struct afs_vnode *vnode) +{ + struct afs_permits *permits; + + spin_lock(&vnode->lock); + permits = rcu_dereference_protected(vnode->permit_cache, + lockdep_is_held(&vnode->lock)); + RCU_INIT_POINTER(vnode->permit_cache, NULL); + spin_unlock(&vnode->lock); + + afs_put_permits(permits); +} + +/* + * Hash a list of permits. Use simple addition to make it easy to add an extra + * one at an as-yet indeterminate position in the list. + */ +static void afs_hash_permits(struct afs_permits *permits) +{ + unsigned long h = permits->nr_permits; + int i; + + for (i = 0; i < permits->nr_permits; i++) { + h += (unsigned long)permits->permits[i].key / sizeof(void *); + h += permits->permits[i].access; + } + + permits->h = h; +} + +/* + * Cache the CallerAccess result obtained from doing a fileserver operation + * that returned a vnode status for a particular key. If a callback break + * occurs whilst the operation was in progress then we have to ditch the cache + * as the ACL *may* have changed. + */ +void afs_cache_permit(struct afs_vnode *vnode, struct key *key, + unsigned int cb_break, struct afs_status_cb *scb) +{ + struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL; + afs_access_t caller_access = scb->status.caller_access; + size_t size = 0; + bool changed = false; + int i, j; + + _enter("{%llx:%llu},%x,%x", + vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access); + + rcu_read_lock(); + + /* Check for the common case first: We got back the same access as last + * time we tried and already have it recorded. + */ + permits = rcu_dereference(vnode->permit_cache); + if (permits) { + if (!permits->invalidated) { + for (i = 0; i < permits->nr_permits; i++) { + if (permits->permits[i].key < key) + continue; + if (permits->permits[i].key > key) + break; + if (permits->permits[i].access != caller_access) { + changed = true; + break; + } + + if (afs_cb_is_broken(cb_break, vnode)) { + changed = true; + break; + } + + /* The cache is still good. */ + rcu_read_unlock(); + return; + } + } + + changed |= permits->invalidated; + size = permits->nr_permits; + + /* If this set of permits is now wrong, clear the permits + * pointer so that no one tries to use the stale information. + */ + if (changed) { + spin_lock(&vnode->lock); + if (permits != rcu_access_pointer(vnode->permit_cache)) + goto someone_else_changed_it_unlock; + RCU_INIT_POINTER(vnode->permit_cache, NULL); + spin_unlock(&vnode->lock); + + afs_put_permits(permits); + permits = NULL; + size = 0; + } + } + + if (afs_cb_is_broken(cb_break, vnode)) + goto someone_else_changed_it; + + /* We need a ref on any permits list we want to copy as we'll have to + * drop the lock to do memory allocation. + */ + if (permits && !refcount_inc_not_zero(&permits->usage)) + goto someone_else_changed_it; + + rcu_read_unlock(); + + /* Speculatively create a new list with the revised permission set. We + * discard this if we find an extant match already in the hash, but + * it's easier to compare with memcmp this way. + * + * We fill in the key pointers at this time, but we don't get the refs + * yet. + */ + size++; + new = kzalloc(sizeof(struct afs_permits) + + sizeof(struct afs_permit) * size, GFP_NOFS); + if (!new) + goto out_put; + + refcount_set(&new->usage, 1); + new->nr_permits = size; + i = j = 0; + if (permits) { + for (i = 0; i < permits->nr_permits; i++) { + if (j == i && permits->permits[i].key > key) { + new->permits[j].key = key; + new->permits[j].access = caller_access; + j++; + } + new->permits[j].key = permits->permits[i].key; + new->permits[j].access = permits->permits[i].access; + j++; + } + } + + if (j == i) { + new->permits[j].key = key; + new->permits[j].access = caller_access; + } + + afs_hash_permits(new); + + /* Now see if the permit list we want is actually already available */ + spin_lock(&afs_permits_lock); + + hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) { + if (xpermits->h != new->h || + xpermits->invalidated || + xpermits->nr_permits != new->nr_permits || + memcmp(xpermits->permits, new->permits, + new->nr_permits * sizeof(struct afs_permit)) != 0) + continue; + + if (refcount_inc_not_zero(&xpermits->usage)) { + replacement = xpermits; + goto found; + } + + break; + } + + for (i = 0; i < new->nr_permits; i++) + key_get(new->permits[i].key); + hash_add_rcu(afs_permits_cache, &new->hash_node, new->h); + replacement = new; + new = NULL; + +found: + spin_unlock(&afs_permits_lock); + + kfree(new); + + rcu_read_lock(); + spin_lock(&vnode->lock); + zap = rcu_access_pointer(vnode->permit_cache); + if (!afs_cb_is_broken(cb_break, vnode) && zap == permits) + rcu_assign_pointer(vnode->permit_cache, replacement); + else + zap = replacement; + spin_unlock(&vnode->lock); + rcu_read_unlock(); + afs_put_permits(zap); +out_put: + afs_put_permits(permits); + return; + +someone_else_changed_it_unlock: + spin_unlock(&vnode->lock); +someone_else_changed_it: + /* Someone else changed the cache under us - don't recheck at this + * time. + */ + rcu_read_unlock(); + return; +} + +static bool afs_check_permit_rcu(struct afs_vnode *vnode, struct key *key, + afs_access_t *_access) +{ + const struct afs_permits *permits; + int i; + + _enter("{%llx:%llu},%x", + vnode->fid.vid, vnode->fid.vnode, key_serial(key)); + + /* check the permits to see if we've got one yet */ + if (key == vnode->volume->cell->anonymous_key) { + *_access = vnode->status.anon_access; + _leave(" = t [anon %x]", *_access); + return true; + } + + permits = rcu_dereference(vnode->permit_cache); + if (permits) { + for (i = 0; i < permits->nr_permits; i++) { + if (permits->permits[i].key < key) + continue; + if (permits->permits[i].key > key) + break; + + *_access = permits->permits[i].access; + _leave(" = %u [perm %x]", !permits->invalidated, *_access); + return !permits->invalidated; + } + } + + _leave(" = f"); + return false; +} + +/* + * check with the fileserver to see if the directory or parent directory is + * permitted to be accessed with this authorisation, and if so, what access it + * is granted + */ +int afs_check_permit(struct afs_vnode *vnode, struct key *key, + afs_access_t *_access) +{ + struct afs_permits *permits; + bool valid = false; + int i, ret; + + _enter("{%llx:%llu},%x", + vnode->fid.vid, vnode->fid.vnode, key_serial(key)); + + /* check the permits to see if we've got one yet */ + if (key == vnode->volume->cell->anonymous_key) { + _debug("anon"); + *_access = vnode->status.anon_access; + valid = true; + } else { + rcu_read_lock(); + permits = rcu_dereference(vnode->permit_cache); + if (permits) { + for (i = 0; i < permits->nr_permits; i++) { + if (permits->permits[i].key < key) + continue; + if (permits->permits[i].key > key) + break; + + *_access = permits->permits[i].access; + valid = !permits->invalidated; + break; + } + } + rcu_read_unlock(); + } + + if (!valid) { + /* Check the status on the file we're actually interested in + * (the post-processing will cache the result). + */ + _debug("no valid permit"); + + ret = afs_fetch_status(vnode, key, false, _access); + if (ret < 0) { + *_access = 0; + _leave(" = %d", ret); + return ret; + } + } + + _leave(" = 0 [access %x]", *_access); + return 0; +} + +/* + * check the permissions on an AFS file + * - AFS ACLs are attached to directories only, and a file is controlled by its + * parent directory's ACL + */ +int afs_permission(struct inode *inode, int mask) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + afs_access_t access; + struct key *key; + int ret = 0; + + _enter("{{%llx:%llu},%lx},%x,", + vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); + + if (mask & MAY_NOT_BLOCK) { + key = afs_request_key_rcu(vnode->volume->cell); + if (IS_ERR(key)) + return -ECHILD; + + ret = -ECHILD; + if (!afs_check_validity(vnode) || + !afs_check_permit_rcu(vnode, key, &access)) + goto error; + } else { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) { + _leave(" = %ld [key]", PTR_ERR(key)); + return PTR_ERR(key); + } + + ret = afs_validate(vnode, key); + if (ret < 0) + goto error; + + /* check the permits to see if we've got one yet */ + ret = afs_check_permit(vnode, key, &access); + if (ret < 0) + goto error; + } + + /* interpret the access mask */ + _debug("REQ %x ACC %x on %s", + mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file"); + + ret = 0; + if (S_ISDIR(inode->i_mode)) { + if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) { + if (!(access & AFS_ACE_LOOKUP)) + goto permission_denied; + } + if (mask & MAY_WRITE) { + if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */ + AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */ + goto permission_denied; + } + } else { + if (!(access & AFS_ACE_LOOKUP)) + goto permission_denied; + if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR)) + goto permission_denied; + if (mask & (MAY_EXEC | MAY_READ)) { + if (!(access & AFS_ACE_READ)) + goto permission_denied; + if (!(inode->i_mode & S_IRUSR)) + goto permission_denied; + } else if (mask & MAY_WRITE) { + if (!(access & AFS_ACE_WRITE)) + goto permission_denied; + if (!(inode->i_mode & S_IWUSR)) + goto permission_denied; + } + } + + key_put(key); + _leave(" = %d", ret); + return ret; + +permission_denied: + ret = -EACCES; +error: + key_put(key); + _leave(" = %d", ret); + return ret; +} + +void __exit afs_clean_up_permit_cache(void) +{ + int i; + + for (i = 0; i < HASH_SIZE(afs_permits_cache); i++) + WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i])); + +} diff --git a/fs/afs/server.c b/fs/afs/server.c new file mode 100644 index 000000000..684a2b02b --- /dev/null +++ b/fs/afs/server.c @@ -0,0 +1,713 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS server record management + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include "afs_fs.h" +#include "internal.h" +#include "protocol_yfs.h" + +static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ +static atomic_t afs_server_debug_id; + +static struct afs_server *afs_maybe_use_server(struct afs_server *, + enum afs_server_trace); +static void __afs_put_server(struct afs_net *, struct afs_server *); + +/* + * Find a server by one of its addresses. + */ +struct afs_server *afs_find_server(struct afs_net *net, + const struct sockaddr_rxrpc *srx) +{ + const struct afs_addr_list *alist; + struct afs_server *server = NULL; + unsigned int i; + int seq = 0, diff; + + rcu_read_lock(); + + do { + if (server) + afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq); + server = NULL; + read_seqbegin_or_lock(&net->fs_addr_lock, &seq); + + if (srx->transport.family == AF_INET6) { + const struct sockaddr_in6 *a = &srx->transport.sin6, *b; + hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { + alist = rcu_dereference(server->addresses); + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { + b = &alist->addrs[i].transport.sin6; + diff = ((u16 __force)a->sin6_port - + (u16 __force)b->sin6_port); + if (diff == 0) + diff = memcmp(&a->sin6_addr, + &b->sin6_addr, + sizeof(struct in6_addr)); + if (diff == 0) + goto found; + } + } + } else { + const struct sockaddr_in *a = &srx->transport.sin, *b; + hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { + alist = rcu_dereference(server->addresses); + for (i = 0; i < alist->nr_ipv4; i++) { + b = &alist->addrs[i].transport.sin; + diff = ((u16 __force)a->sin_port - + (u16 __force)b->sin_port); + if (diff == 0) + diff = ((u32 __force)a->sin_addr.s_addr - + (u32 __force)b->sin_addr.s_addr); + if (diff == 0) + goto found; + } + } + } + + server = NULL; + continue; + found: + server = afs_maybe_use_server(server, afs_server_trace_get_by_addr); + + } while (need_seqretry(&net->fs_addr_lock, seq)); + + done_seqretry(&net->fs_addr_lock, seq); + + rcu_read_unlock(); + return server; +} + +/* + * Look up a server by its UUID and mark it active. + */ +struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) +{ + struct afs_server *server = NULL; + struct rb_node *p; + int diff, seq = 0; + + _enter("%pU", uuid); + + do { + /* Unfortunately, rbtree walking doesn't give reliable results + * under just the RCU read lock, so we have to check for + * changes. + */ + if (server) + afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq); + server = NULL; + + read_seqbegin_or_lock(&net->fs_lock, &seq); + + p = net->fs_servers.rb_node; + while (p) { + server = rb_entry(p, struct afs_server, uuid_rb); + + diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); + if (diff < 0) { + p = p->rb_left; + } else if (diff > 0) { + p = p->rb_right; + } else { + afs_use_server(server, afs_server_trace_get_by_uuid); + break; + } + + server = NULL; + } + } while (need_seqretry(&net->fs_lock, seq)); + + done_seqretry(&net->fs_lock, seq); + + _leave(" = %p", server); + return server; +} + +/* + * Install a server record in the namespace tree. If there's a clash, we stick + * it into a list anchored on whichever afs_server struct is actually in the + * tree. + */ +static struct afs_server *afs_install_server(struct afs_cell *cell, + struct afs_server *candidate) +{ + const struct afs_addr_list *alist; + struct afs_server *server, *next; + struct afs_net *net = cell->net; + struct rb_node **pp, *p; + int diff; + + _enter("%p", candidate); + + write_seqlock(&net->fs_lock); + + /* Firstly install the server in the UUID lookup tree */ + pp = &net->fs_servers.rb_node; + p = NULL; + while (*pp) { + p = *pp; + _debug("- consider %p", p); + server = rb_entry(p, struct afs_server, uuid_rb); + diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); + if (diff < 0) { + pp = &(*pp)->rb_left; + } else if (diff > 0) { + pp = &(*pp)->rb_right; + } else { + if (server->cell == cell) + goto exists; + + /* We have the same UUID representing servers in + * different cells. Append the new server to the list. + */ + for (;;) { + next = rcu_dereference_protected( + server->uuid_next, + lockdep_is_held(&net->fs_lock.lock)); + if (!next) + break; + server = next; + } + rcu_assign_pointer(server->uuid_next, candidate); + candidate->uuid_prev = server; + server = candidate; + goto added_dup; + } + } + + server = candidate; + rb_link_node(&server->uuid_rb, p, pp); + rb_insert_color(&server->uuid_rb, &net->fs_servers); + hlist_add_head_rcu(&server->proc_link, &net->fs_proc); + +added_dup: + write_seqlock(&net->fs_addr_lock); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&net->fs_addr_lock.lock)); + + /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install + * it in the IPv4 and/or IPv6 reverse-map lists. + * + * TODO: For speed we want to use something other than a flat list + * here; even sorting the list in terms of lowest address would help a + * bit, but anything we might want to do gets messy and memory + * intensive. + */ + if (alist->nr_ipv4 > 0) + hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); + if (alist->nr_addrs > alist->nr_ipv4) + hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); + + write_sequnlock(&net->fs_addr_lock); + +exists: + afs_get_server(server, afs_server_trace_get_install); + write_sequnlock(&net->fs_lock); + return server; +} + +/* + * Allocate a new server record and mark it active. + */ +static struct afs_server *afs_alloc_server(struct afs_cell *cell, + const uuid_t *uuid, + struct afs_addr_list *alist) +{ + struct afs_server *server; + struct afs_net *net = cell->net; + + _enter(""); + + server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); + if (!server) + goto enomem; + + atomic_set(&server->ref, 1); + atomic_set(&server->active, 1); + server->debug_id = atomic_inc_return(&afs_server_debug_id); + RCU_INIT_POINTER(server->addresses, alist); + server->addr_version = alist->version; + server->uuid = *uuid; + rwlock_init(&server->fs_lock); + init_waitqueue_head(&server->probe_wq); + INIT_LIST_HEAD(&server->probe_link); + spin_lock_init(&server->probe_lock); + server->cell = cell; + server->rtt = UINT_MAX; + + afs_inc_servers_outstanding(net); + trace_afs_server(server, 1, 1, afs_server_trace_alloc); + _leave(" = %p", server); + return server; + +enomem: + _leave(" = NULL [nomem]"); + return NULL; +} + +/* + * Look up an address record for a server + */ +static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, + struct key *key, const uuid_t *uuid) +{ + struct afs_vl_cursor vc; + struct afs_addr_list *alist = NULL; + int ret; + + ret = -ERESTARTSYS; + if (afs_begin_vlserver_operation(&vc, cell, key)) { + while (afs_select_vlserver(&vc)) { + if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) + alist = afs_yfsvl_get_endpoints(&vc, uuid); + else + alist = afs_vl_get_addrs_u(&vc, uuid); + } + + ret = afs_end_vlserver_operation(&vc); + } + + return ret < 0 ? ERR_PTR(ret) : alist; +} + +/* + * Get or create a fileserver record. + */ +struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, + const uuid_t *uuid, u32 addr_version) +{ + struct afs_addr_list *alist; + struct afs_server *server, *candidate; + + _enter("%p,%pU", cell->net, uuid); + + server = afs_find_server_by_uuid(cell->net, uuid); + if (server) { + if (server->addr_version != addr_version) + set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags); + return server; + } + + alist = afs_vl_lookup_addrs(cell, key, uuid); + if (IS_ERR(alist)) + return ERR_CAST(alist); + + candidate = afs_alloc_server(cell, uuid, alist); + if (!candidate) { + afs_put_addrlist(alist); + return ERR_PTR(-ENOMEM); + } + + server = afs_install_server(cell, candidate); + if (server != candidate) { + afs_put_addrlist(alist); + kfree(candidate); + } else { + /* Immediately dispatch an asynchronous probe to each interface + * on the fileserver. This will make sure the repeat-probing + * service is started. + */ + afs_fs_probe_fileserver(cell->net, server, key, true); + } + + return server; +} + +/* + * Set the server timer to fire after a given delay, assuming it's not already + * set for an earlier time. + */ +static void afs_set_server_timer(struct afs_net *net, time64_t delay) +{ + if (net->live) { + afs_inc_servers_outstanding(net); + if (timer_reduce(&net->fs_timer, jiffies + delay * HZ)) + afs_dec_servers_outstanding(net); + } +} + +/* + * Server management timer. We have an increment on fs_outstanding that we + * need to pass along to the work item. + */ +void afs_servers_timer(struct timer_list *timer) +{ + struct afs_net *net = container_of(timer, struct afs_net, fs_timer); + + _enter(""); + if (!queue_work(afs_wq, &net->fs_manager)) + afs_dec_servers_outstanding(net); +} + +/* + * Get a reference on a server object. + */ +struct afs_server *afs_get_server(struct afs_server *server, + enum afs_server_trace reason) +{ + unsigned int u = atomic_inc_return(&server->ref); + + trace_afs_server(server, u, atomic_read(&server->active), reason); + return server; +} + +/* + * Try to get a reference on a server object. + */ +static struct afs_server *afs_maybe_use_server(struct afs_server *server, + enum afs_server_trace reason) +{ + unsigned int r = atomic_fetch_add_unless(&server->ref, 1, 0); + unsigned int a; + + if (r == 0) + return NULL; + + a = atomic_inc_return(&server->active); + trace_afs_server(server, r, a, reason); + return server; +} + +/* + * Get an active count on a server object. + */ +struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_trace reason) +{ + unsigned int r = atomic_inc_return(&server->ref); + unsigned int a = atomic_inc_return(&server->active); + + trace_afs_server(server, r, a, reason); + return server; +} + +/* + * Release a reference on a server record. + */ +void afs_put_server(struct afs_net *net, struct afs_server *server, + enum afs_server_trace reason) +{ + unsigned int usage; + + if (!server) + return; + + usage = atomic_dec_return(&server->ref); + trace_afs_server(server, usage, atomic_read(&server->active), reason); + if (unlikely(usage == 0)) + __afs_put_server(net, server); +} + +/* + * Drop an active count on a server object without updating the last-unused + * time. + */ +void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server, + enum afs_server_trace reason) +{ + if (server) { + unsigned int active = atomic_dec_return(&server->active); + + if (active == 0) + afs_set_server_timer(net, afs_server_gc_delay); + afs_put_server(net, server, reason); + } +} + +/* + * Drop an active count on a server object. + */ +void afs_unuse_server(struct afs_net *net, struct afs_server *server, + enum afs_server_trace reason) +{ + if (server) { + server->unuse_time = ktime_get_real_seconds(); + afs_unuse_server_notime(net, server, reason); + } +} + +static void afs_server_rcu(struct rcu_head *rcu) +{ + struct afs_server *server = container_of(rcu, struct afs_server, rcu); + + trace_afs_server(server, atomic_read(&server->ref), + atomic_read(&server->active), afs_server_trace_free); + afs_put_addrlist(rcu_access_pointer(server->addresses)); + kfree(server); +} + +static void __afs_put_server(struct afs_net *net, struct afs_server *server) +{ + call_rcu(&server->rcu, afs_server_rcu); + afs_dec_servers_outstanding(net); +} + +static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server) +{ + struct afs_addr_list *alist = rcu_access_pointer(server->addresses); + struct afs_addr_cursor ac = { + .alist = alist, + .index = alist->preferred, + .error = 0, + }; + + afs_fs_give_up_all_callbacks(net, server, &ac, NULL); +} + +/* + * destroy a dead server + */ +static void afs_destroy_server(struct afs_net *net, struct afs_server *server) +{ + if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) + afs_give_up_callbacks(net, server); + + afs_put_server(net, server, afs_server_trace_destroy); +} + +/* + * Garbage collect any expired servers. + */ +static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) +{ + struct afs_server *server, *next, *prev; + int active; + + while ((server = gc_list)) { + gc_list = server->gc_next; + + write_seqlock(&net->fs_lock); + + active = atomic_read(&server->active); + if (active == 0) { + trace_afs_server(server, atomic_read(&server->ref), + active, afs_server_trace_gc); + next = rcu_dereference_protected( + server->uuid_next, lockdep_is_held(&net->fs_lock.lock)); + prev = server->uuid_prev; + if (!prev) { + /* The one at the front is in the tree */ + if (!next) { + rb_erase(&server->uuid_rb, &net->fs_servers); + } else { + rb_replace_node_rcu(&server->uuid_rb, + &next->uuid_rb, + &net->fs_servers); + next->uuid_prev = NULL; + } + } else { + /* This server is not at the front */ + rcu_assign_pointer(prev->uuid_next, next); + if (next) + next->uuid_prev = prev; + } + + list_del(&server->probe_link); + hlist_del_rcu(&server->proc_link); + if (!hlist_unhashed(&server->addr4_link)) + hlist_del_rcu(&server->addr4_link); + if (!hlist_unhashed(&server->addr6_link)) + hlist_del_rcu(&server->addr6_link); + } + write_sequnlock(&net->fs_lock); + + if (active == 0) + afs_destroy_server(net, server); + } +} + +/* + * Manage the records of servers known to be within a network namespace. This + * includes garbage collecting unused servers. + * + * Note also that we were given an increment on net->servers_outstanding by + * whoever queued us that we need to deal with before returning. + */ +void afs_manage_servers(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, fs_manager); + struct afs_server *gc_list = NULL; + struct rb_node *cursor; + time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; + bool purging = !net->live; + + _enter(""); + + /* Trawl the server list looking for servers that have expired from + * lack of use. + */ + read_seqlock_excl(&net->fs_lock); + + for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { + struct afs_server *server = + rb_entry(cursor, struct afs_server, uuid_rb); + int active = atomic_read(&server->active); + + _debug("manage %pU %u", &server->uuid, active); + + if (purging) { + trace_afs_server(server, atomic_read(&server->ref), + active, afs_server_trace_purging); + if (active != 0) + pr_notice("Can't purge s=%08x\n", server->debug_id); + } + + if (active == 0) { + time64_t expire_at = server->unuse_time; + + if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && + !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) + expire_at += afs_server_gc_delay; + if (purging || expire_at <= now) { + server->gc_next = gc_list; + gc_list = server; + } else if (expire_at < next_manage) { + next_manage = expire_at; + } + } + } + + read_sequnlock_excl(&net->fs_lock); + + /* Update the timer on the way out. We have to pass an increment on + * servers_outstanding in the namespace that we are in to the timer or + * the work scheduler. + */ + if (!purging && next_manage < TIME64_MAX) { + now = ktime_get_real_seconds(); + + if (next_manage - now <= 0) { + if (queue_work(afs_wq, &net->fs_manager)) + afs_inc_servers_outstanding(net); + } else { + afs_set_server_timer(net, next_manage - now); + } + } + + afs_gc_servers(net, gc_list); + + afs_dec_servers_outstanding(net); + _leave(" [%d]", atomic_read(&net->servers_outstanding)); +} + +static void afs_queue_server_manager(struct afs_net *net) +{ + afs_inc_servers_outstanding(net); + if (!queue_work(afs_wq, &net->fs_manager)) + afs_dec_servers_outstanding(net); +} + +/* + * Purge list of servers. + */ +void afs_purge_servers(struct afs_net *net) +{ + _enter(""); + + if (del_timer_sync(&net->fs_timer)) + afs_dec_servers_outstanding(net); + + afs_queue_server_manager(net); + + _debug("wait"); + atomic_dec(&net->servers_outstanding); + wait_var_event(&net->servers_outstanding, + !atomic_read(&net->servers_outstanding)); + _leave(""); +} + +/* + * Get an update for a server's address list. + */ +static noinline bool afs_update_server_record(struct afs_operation *op, + struct afs_server *server) +{ + struct afs_addr_list *alist, *discard; + + _enter(""); + + trace_afs_server(server, atomic_read(&server->ref), atomic_read(&server->active), + afs_server_trace_update); + + alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid); + if (IS_ERR(alist)) { + if ((PTR_ERR(alist) == -ERESTARTSYS || + PTR_ERR(alist) == -EINTR) && + (op->flags & AFS_OPERATION_UNINTR) && + server->addresses) { + _leave(" = t [intr]"); + return true; + } + op->error = PTR_ERR(alist); + _leave(" = f [%d]", op->error); + return false; + } + + discard = alist; + if (server->addr_version != alist->version) { + write_lock(&server->fs_lock); + discard = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + rcu_assign_pointer(server->addresses, alist); + server->addr_version = alist->version; + write_unlock(&server->fs_lock); + } + + afs_put_addrlist(discard); + _leave(" = t"); + return true; +} + +/* + * See if a server's address list needs updating. + */ +bool afs_check_server_record(struct afs_operation *op, struct afs_server *server) +{ + bool success; + int ret, retries = 0; + + _enter(""); + + ASSERT(server); + +retry: + if (test_bit(AFS_SERVER_FL_UPDATING, &server->flags)) + goto wait; + if (test_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags)) + goto update; + _leave(" = t [good]"); + return true; + +update: + if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { + clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags); + success = afs_update_server_record(op, server); + clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); + wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); + _leave(" = %d", success); + return success; + } + +wait: + ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, + (op->flags & AFS_OPERATION_UNINTR) ? + TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + op->error = ret; + _leave(" = f [intr]"); + return false; + } + + retries++; + if (retries == 4) { + _leave(" = f [stale]"); + ret = -ESTALE; + return false; + } + goto retry; +} diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c new file mode 100644 index 000000000..b59896b1d --- /dev/null +++ b/fs/afs/server_list.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS fileserver list management. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include "internal.h" + +void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) +{ + int i; + + if (slist && refcount_dec_and_test(&slist->usage)) { + for (i = 0; i < slist->nr_servers; i++) + afs_unuse_server(net, slist->servers[i].server, + afs_server_trace_put_slist); + kfree_rcu(slist, rcu); + } +} + +/* + * Build a server list from a VLDB record. + */ +struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, + struct key *key, + struct afs_vldb_entry *vldb, + u8 type_mask) +{ + struct afs_server_list *slist; + struct afs_server *server; + int ret = -ENOMEM, nr_servers = 0, i, j; + + for (i = 0; i < vldb->nr_servers; i++) + if (vldb->fs_mask[i] & type_mask) + nr_servers++; + + slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL); + if (!slist) + goto error; + + refcount_set(&slist->usage, 1); + rwlock_init(&slist->lock); + + for (i = 0; i < AFS_MAXTYPES; i++) + slist->vids[i] = vldb->vid[i]; + + /* Make sure a records exists for each server in the list. */ + for (i = 0; i < vldb->nr_servers; i++) { + if (!(vldb->fs_mask[i] & type_mask)) + continue; + + server = afs_lookup_server(cell, key, &vldb->fs_server[i], + vldb->addr_version[i]); + if (IS_ERR(server)) { + ret = PTR_ERR(server); + if (ret == -ENOENT || + ret == -ENOMEDIUM) + continue; + goto error_2; + } + + /* Insertion-sort by UUID */ + for (j = 0; j < slist->nr_servers; j++) + if (memcmp(&slist->servers[j].server->uuid, + &server->uuid, + sizeof(server->uuid)) >= 0) + break; + if (j < slist->nr_servers) { + if (slist->servers[j].server == server) { + afs_put_server(cell->net, server, + afs_server_trace_put_slist_isort); + continue; + } + + memmove(slist->servers + j + 1, + slist->servers + j, + (slist->nr_servers - j) * sizeof(struct afs_server_entry)); + } + + slist->servers[j].server = server; + slist->nr_servers++; + } + + if (slist->nr_servers == 0) { + ret = -EDESTADDRREQ; + goto error_2; + } + + return slist; + +error_2: + afs_put_serverlist(cell->net, slist); +error: + return ERR_PTR(ret); +} + +/* + * Copy the annotations from an old server list to its potential replacement. + */ +bool afs_annotate_server_list(struct afs_server_list *new, + struct afs_server_list *old) +{ + struct afs_server *cur; + int i, j; + + if (old->nr_servers != new->nr_servers) + goto changed; + + for (i = 0; i < old->nr_servers; i++) + if (old->servers[i].server != new->servers[i].server) + goto changed; + + return false; + +changed: + /* Maintain the same preferred server as before if possible. */ + cur = old->servers[old->preferred].server; + for (j = 0; j < new->nr_servers; j++) { + if (new->servers[j].server == cur) { + new->preferred = j; + break; + } + } + + return true; +} diff --git a/fs/afs/super.c b/fs/afs/super.c new file mode 100644 index 000000000..1b62a99b3 --- /dev/null +++ b/fs/afs/super.c @@ -0,0 +1,777 @@ +/* AFS superblock handling + * + * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved. + * + * This software may be freely redistributed under the terms of the + * GNU General Public License. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Authors: David Howells <dhowells@redhat.com> + * David Woodhouse <dwmw2@infradead.org> + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mount.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/fs_parser.h> +#include <linux/statfs.h> +#include <linux/sched.h> +#include <linux/nsproxy.h> +#include <linux/magic.h> +#include <net/net_namespace.h> +#include "internal.h" + +static void afs_i_init_once(void *foo); +static void afs_kill_super(struct super_block *sb); +static struct inode *afs_alloc_inode(struct super_block *sb); +static void afs_destroy_inode(struct inode *inode); +static void afs_free_inode(struct inode *inode); +static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); +static int afs_show_devname(struct seq_file *m, struct dentry *root); +static int afs_show_options(struct seq_file *m, struct dentry *root); +static int afs_init_fs_context(struct fs_context *fc); +static const struct fs_parameter_spec afs_fs_parameters[]; + +struct file_system_type afs_fs_type = { + .owner = THIS_MODULE, + .name = "afs", + .init_fs_context = afs_init_fs_context, + .parameters = afs_fs_parameters, + .kill_sb = afs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE, +}; +MODULE_ALIAS_FS("afs"); + +int afs_net_id; + +static const struct super_operations afs_super_ops = { + .statfs = afs_statfs, + .alloc_inode = afs_alloc_inode, + .drop_inode = afs_drop_inode, + .destroy_inode = afs_destroy_inode, + .free_inode = afs_free_inode, + .evict_inode = afs_evict_inode, + .show_devname = afs_show_devname, + .show_options = afs_show_options, +}; + +static struct kmem_cache *afs_inode_cachep; +static atomic_t afs_count_active_inodes; + +enum afs_param { + Opt_autocell, + Opt_dyn, + Opt_flock, + Opt_source, +}; + +static const struct constant_table afs_param_flock[] = { + {"local", afs_flock_mode_local }, + {"openafs", afs_flock_mode_openafs }, + {"strict", afs_flock_mode_strict }, + {"write", afs_flock_mode_write }, + {} +}; + +static const struct fs_parameter_spec afs_fs_parameters[] = { + fsparam_flag ("autocell", Opt_autocell), + fsparam_flag ("dyn", Opt_dyn), + fsparam_enum ("flock", Opt_flock, afs_param_flock), + fsparam_string("source", Opt_source), + {} +}; + +/* + * initialise the filesystem + */ +int __init afs_fs_init(void) +{ + int ret; + + _enter(""); + + /* create ourselves an inode cache */ + atomic_set(&afs_count_active_inodes, 0); + + ret = -ENOMEM; + afs_inode_cachep = kmem_cache_create("afs_inode_cache", + sizeof(struct afs_vnode), + 0, + SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, + afs_i_init_once); + if (!afs_inode_cachep) { + printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n"); + return ret; + } + + /* now export our filesystem to lesser mortals */ + ret = register_filesystem(&afs_fs_type); + if (ret < 0) { + kmem_cache_destroy(afs_inode_cachep); + _leave(" = %d", ret); + return ret; + } + + _leave(" = 0"); + return 0; +} + +/* + * clean up the filesystem + */ +void afs_fs_exit(void) +{ + _enter(""); + + afs_mntpt_kill_timer(); + unregister_filesystem(&afs_fs_type); + + if (atomic_read(&afs_count_active_inodes) != 0) { + printk("kAFS: %d active inode objects still present\n", + atomic_read(&afs_count_active_inodes)); + BUG(); + } + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(afs_inode_cachep); + _leave(""); +} + +/* + * Display the mount device name in /proc/mounts. + */ +static int afs_show_devname(struct seq_file *m, struct dentry *root) +{ + struct afs_super_info *as = AFS_FS_S(root->d_sb); + struct afs_volume *volume = as->volume; + struct afs_cell *cell = as->cell; + const char *suf = ""; + char pref = '%'; + + if (as->dyn_root) { + seq_puts(m, "none"); + return 0; + } + + switch (volume->type) { + case AFSVL_RWVOL: + break; + case AFSVL_ROVOL: + pref = '#'; + if (volume->type_force) + suf = ".readonly"; + break; + case AFSVL_BACKVOL: + pref = '#'; + suf = ".backup"; + break; + } + + seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf); + return 0; +} + +/* + * Display the mount options in /proc/mounts. + */ +static int afs_show_options(struct seq_file *m, struct dentry *root) +{ + struct afs_super_info *as = AFS_FS_S(root->d_sb); + const char *p = NULL; + + if (as->dyn_root) + seq_puts(m, ",dyn"); + if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags)) + seq_puts(m, ",autocell"); + switch (as->flock_mode) { + case afs_flock_mode_unset: break; + case afs_flock_mode_local: p = "local"; break; + case afs_flock_mode_openafs: p = "openafs"; break; + case afs_flock_mode_strict: p = "strict"; break; + case afs_flock_mode_write: p = "write"; break; + } + if (p) + seq_printf(m, ",flock=%s", p); + + return 0; +} + +/* + * Parse the source name to get cell name, volume name, volume type and R/W + * selector. + * + * This can be one of the following: + * "%[cell:]volume[.]" R/W volume + * "#[cell:]volume[.]" R/O or R/W volume (R/O parent), + * or R/W (R/W parent) volume + * "%[cell:]volume.readonly" R/O volume + * "#[cell:]volume.readonly" R/O volume + * "%[cell:]volume.backup" Backup volume + * "#[cell:]volume.backup" Backup volume + */ +static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) +{ + struct afs_fs_context *ctx = fc->fs_private; + struct afs_cell *cell; + const char *cellname, *suffix, *name = param->string; + int cellnamesz; + + _enter(",%s", name); + + if (fc->source) + return invalf(fc, "kAFS: Multiple sources not supported"); + + if (!name) { + printk(KERN_ERR "kAFS: no volume name specified\n"); + return -EINVAL; + } + + if ((name[0] != '%' && name[0] != '#') || !name[1]) { + /* To use dynroot, we don't want to have to provide a source */ + if (strcmp(name, "none") == 0) { + ctx->no_cell = true; + return 0; + } + printk(KERN_ERR "kAFS: unparsable volume name\n"); + return -EINVAL; + } + + /* determine the type of volume we're looking for */ + if (name[0] == '%') { + ctx->type = AFSVL_RWVOL; + ctx->force = true; + } + name++; + + /* split the cell name out if there is one */ + ctx->volname = strchr(name, ':'); + if (ctx->volname) { + cellname = name; + cellnamesz = ctx->volname - name; + ctx->volname++; + } else { + ctx->volname = name; + cellname = NULL; + cellnamesz = 0; + } + + /* the volume type is further affected by a possible suffix */ + suffix = strrchr(ctx->volname, '.'); + if (suffix) { + if (strcmp(suffix, ".readonly") == 0) { + ctx->type = AFSVL_ROVOL; + ctx->force = true; + } else if (strcmp(suffix, ".backup") == 0) { + ctx->type = AFSVL_BACKVOL; + ctx->force = true; + } else if (suffix[1] == 0) { + } else { + suffix = NULL; + } + } + + ctx->volnamesz = suffix ? + suffix - ctx->volname : strlen(ctx->volname); + + _debug("cell %*.*s [%p]", + cellnamesz, cellnamesz, cellname ?: "", ctx->cell); + + /* lookup the cell record */ + if (cellname) { + cell = afs_lookup_cell(ctx->net, cellname, cellnamesz, + NULL, false); + if (IS_ERR(cell)) { + pr_err("kAFS: unable to lookup cell '%*.*s'\n", + cellnamesz, cellnamesz, cellname ?: ""); + return PTR_ERR(cell); + } + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse); + afs_see_cell(cell, afs_cell_trace_see_source); + ctx->cell = cell; + } + + _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", + ctx->cell->name, ctx->cell, + ctx->volnamesz, ctx->volnamesz, ctx->volname, + suffix ?: "-", ctx->type, ctx->force ? " FORCE" : ""); + + fc->source = param->string; + param->string = NULL; + return 0; +} + +/* + * Parse a single mount parameter. + */ +static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct fs_parse_result result; + struct afs_fs_context *ctx = fc->fs_private; + int opt; + + opt = fs_parse(fc, afs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_source: + return afs_parse_source(fc, param); + + case Opt_autocell: + ctx->autocell = true; + break; + + case Opt_dyn: + ctx->dyn_root = true; + break; + + case Opt_flock: + ctx->flock_mode = result.uint_32; + break; + + default: + return -EINVAL; + } + + _leave(" = 0"); + return 0; +} + +/* + * Validate the options, get the cell key and look up the volume. + */ +static int afs_validate_fc(struct fs_context *fc) +{ + struct afs_fs_context *ctx = fc->fs_private; + struct afs_volume *volume; + struct afs_cell *cell; + struct key *key; + int ret; + + if (!ctx->dyn_root) { + if (ctx->no_cell) { + pr_warn("kAFS: Can only specify source 'none' with -o dyn\n"); + return -EINVAL; + } + + if (!ctx->cell) { + pr_warn("kAFS: No cell specified\n"); + return -EDESTADDRREQ; + } + + reget_key: + /* We try to do the mount securely. */ + key = afs_request_key(ctx->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + + ctx->key = key; + + if (ctx->volume) { + afs_put_volume(ctx->net, ctx->volume, + afs_volume_trace_put_validate_fc); + ctx->volume = NULL; + } + + if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &ctx->cell->flags)) { + ret = afs_cell_detect_alias(ctx->cell, key); + if (ret < 0) + return ret; + if (ret == 1) { + _debug("switch to alias"); + key_put(ctx->key); + ctx->key = NULL; + cell = afs_use_cell(ctx->cell->alias_of, + afs_cell_trace_use_fc_alias); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); + ctx->cell = cell; + goto reget_key; + } + } + + volume = afs_create_volume(ctx); + if (IS_ERR(volume)) + return PTR_ERR(volume); + + ctx->volume = volume; + if (volume->type != AFSVL_RWVOL) + ctx->flock_mode = afs_flock_mode_local; + } + + return 0; +} + +/* + * check a superblock to see if it's the one we're looking for + */ +static int afs_test_super(struct super_block *sb, struct fs_context *fc) +{ + struct afs_fs_context *ctx = fc->fs_private; + struct afs_super_info *as = AFS_FS_S(sb); + + return (as->net_ns == fc->net_ns && + as->volume && + as->volume->vid == ctx->volume->vid && + as->cell == ctx->cell && + !as->dyn_root); +} + +static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc) +{ + struct afs_super_info *as = AFS_FS_S(sb); + + return (as->net_ns == fc->net_ns && + as->dyn_root); +} + +static int afs_set_super(struct super_block *sb, struct fs_context *fc) +{ + return set_anon_super(sb, NULL); +} + +/* + * fill in the superblock + */ +static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) +{ + struct afs_super_info *as = AFS_FS_S(sb); + struct inode *inode = NULL; + int ret; + + _enter(""); + + /* fill in the superblock */ + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_magic = AFS_FS_MAGIC; + sb->s_op = &afs_super_ops; + if (!as->dyn_root) + sb->s_xattr = afs_xattr_handlers; + ret = super_setup_bdi(sb); + if (ret) + return ret; + + /* allocate the root inode and dentry */ + if (as->dyn_root) { + inode = afs_iget_pseudo_dir(sb, true); + } else { + sprintf(sb->s_id, "%llu", as->volume->vid); + afs_activate_volume(as->volume); + inode = afs_root_iget(sb, ctx->key); + } + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (ctx->autocell || as->dyn_root) + set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); + + ret = -ENOMEM; + sb->s_root = d_make_root(inode); + if (!sb->s_root) + goto error; + + if (as->dyn_root) { + sb->s_d_op = &afs_dynroot_dentry_operations; + ret = afs_dynroot_populate(sb); + if (ret < 0) + goto error; + } else { + sb->s_d_op = &afs_fs_dentry_operations; + rcu_assign_pointer(as->volume->sb, sb); + } + + _leave(" = 0"); + return 0; + +error: + _leave(" = %d", ret); + return ret; +} + +static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) +{ + struct afs_fs_context *ctx = fc->fs_private; + struct afs_super_info *as; + + as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); + if (as) { + as->net_ns = get_net(fc->net_ns); + as->flock_mode = ctx->flock_mode; + if (ctx->dyn_root) { + as->dyn_root = true; + } else { + as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi); + as->volume = afs_get_volume(ctx->volume, + afs_volume_trace_get_alloc_sbi); + } + } + return as; +} + +static void afs_destroy_sbi(struct afs_super_info *as) +{ + if (as) { + struct afs_net *net = afs_net(as->net_ns); + afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi); + afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi); + put_net(as->net_ns); + kfree(as); + } +} + +static void afs_kill_super(struct super_block *sb) +{ + struct afs_super_info *as = AFS_FS_S(sb); + + if (as->dyn_root) + afs_dynroot_depopulate(sb); + + /* Clear the callback interests (which will do ilookup5) before + * deactivating the superblock. + */ + if (as->volume) + rcu_assign_pointer(as->volume->sb, NULL); + kill_anon_super(sb); + if (as->volume) + afs_deactivate_volume(as->volume); + afs_destroy_sbi(as); +} + +/* + * Get an AFS superblock and root directory. + */ +static int afs_get_tree(struct fs_context *fc) +{ + struct afs_fs_context *ctx = fc->fs_private; + struct super_block *sb; + struct afs_super_info *as; + int ret; + + ret = afs_validate_fc(fc); + if (ret) + goto error; + + _enter(""); + + /* allocate a superblock info record */ + ret = -ENOMEM; + as = afs_alloc_sbi(fc); + if (!as) + goto error; + fc->s_fs_info = as; + + /* allocate a deviceless superblock */ + sb = sget_fc(fc, + as->dyn_root ? afs_dynroot_test_super : afs_test_super, + afs_set_super); + if (IS_ERR(sb)) { + ret = PTR_ERR(sb); + goto error; + } + + if (!sb->s_root) { + /* initial superblock/root creation */ + _debug("create"); + ret = afs_fill_super(sb, ctx); + if (ret < 0) + goto error_sb; + sb->s_flags |= SB_ACTIVE; + } else { + _debug("reuse"); + ASSERTCMP(sb->s_flags, &, SB_ACTIVE); + } + + fc->root = dget(sb->s_root); + trace_afs_get_tree(as->cell, as->volume); + _leave(" = 0 [%p]", sb); + return 0; + +error_sb: + deactivate_locked_super(sb); +error: + _leave(" = %d", ret); + return ret; +} + +static void afs_free_fc(struct fs_context *fc) +{ + struct afs_fs_context *ctx = fc->fs_private; + + afs_destroy_sbi(fc->s_fs_info); + afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); + key_put(ctx->key); + kfree(ctx); +} + +static const struct fs_context_operations afs_context_ops = { + .free = afs_free_fc, + .parse_param = afs_parse_param, + .get_tree = afs_get_tree, +}; + +/* + * Set up the filesystem mount context. + */ +static int afs_init_fs_context(struct fs_context *fc) +{ + struct afs_fs_context *ctx; + struct afs_cell *cell; + + ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->type = AFSVL_ROVOL; + ctx->net = afs_net(fc->net_ns); + + /* Default to the workstation cell. */ + cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc); + if (IS_ERR(cell)) + cell = NULL; + ctx->cell = cell; + + fc->fs_private = ctx; + fc->ops = &afs_context_ops; + return 0; +} + +/* + * Initialise an inode cache slab element prior to any use. Note that + * afs_alloc_inode() *must* reset anything that could incorrectly leak from one + * inode to another. + */ +static void afs_i_init_once(void *_vnode) +{ + struct afs_vnode *vnode = _vnode; + + memset(vnode, 0, sizeof(*vnode)); + inode_init_once(&vnode->vfs_inode); + mutex_init(&vnode->io_lock); + init_rwsem(&vnode->validate_lock); + spin_lock_init(&vnode->wb_lock); + spin_lock_init(&vnode->lock); + INIT_LIST_HEAD(&vnode->wb_keys); + INIT_LIST_HEAD(&vnode->pending_locks); + INIT_LIST_HEAD(&vnode->granted_locks); + INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); + seqlock_init(&vnode->cb_lock); +} + +/* + * allocate an AFS inode struct from our slab cache + */ +static struct inode *afs_alloc_inode(struct super_block *sb) +{ + struct afs_vnode *vnode; + + vnode = kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL); + if (!vnode) + return NULL; + + atomic_inc(&afs_count_active_inodes); + + /* Reset anything that shouldn't leak from one inode to the next. */ + memset(&vnode->fid, 0, sizeof(vnode->fid)); + memset(&vnode->status, 0, sizeof(vnode->status)); + + vnode->volume = NULL; + vnode->lock_key = NULL; + vnode->permit_cache = NULL; +#ifdef CONFIG_AFS_FSCACHE + vnode->cache = NULL; +#endif + + vnode->flags = 1 << AFS_VNODE_UNSET; + vnode->lock_state = AFS_VNODE_LOCK_NONE; + + init_rwsem(&vnode->rmdir_lock); + + _leave(" = %p", &vnode->vfs_inode); + return &vnode->vfs_inode; +} + +static void afs_free_inode(struct inode *inode) +{ + kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode)); +} + +/* + * destroy an AFS inode struct + */ +static void afs_destroy_inode(struct inode *inode) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + + _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode); + + _debug("DESTROY INODE %p", inode); + + atomic_dec(&afs_count_active_inodes); +} + +static void afs_get_volume_status_success(struct afs_operation *op) +{ + struct afs_volume_status *vs = &op->volstatus.vs; + struct kstatfs *buf = op->volstatus.buf; + + if (vs->max_quota == 0) + buf->f_blocks = vs->part_max_blocks; + else + buf->f_blocks = vs->max_quota; + + if (buf->f_blocks > vs->blocks_in_use) + buf->f_bavail = buf->f_bfree = + buf->f_blocks - vs->blocks_in_use; +} + +static const struct afs_operation_ops afs_get_volume_status_operation = { + .issue_afs_rpc = afs_fs_get_volume_status, + .issue_yfs_rpc = yfs_fs_get_volume_status, + .success = afs_get_volume_status_success, +}; + +/* + * return information about an AFS volume + */ +static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct afs_super_info *as = AFS_FS_S(dentry->d_sb); + struct afs_operation *op; + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + + buf->f_type = dentry->d_sb->s_magic; + buf->f_bsize = AFS_BLOCK_SIZE; + buf->f_namelen = AFSNAMEMAX - 1; + + if (as->dyn_root) { + buf->f_blocks = 1; + buf->f_bavail = 0; + buf->f_bfree = 0; + return 0; + } + + op = afs_alloc_operation(NULL, as->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + afs_op_set_vnode(op, 0, vnode); + op->nr_files = 1; + op->volstatus.buf = buf; + op->ops = &afs_get_volume_status_operation; + return afs_do_sync_operation(op); +} diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c new file mode 100644 index 000000000..f04a80e4f --- /dev/null +++ b/fs/afs/vl_alias.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS cell alias detection + * + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/namei.h> +#include <keys/rxrpc-type.h> +#include "internal.h" + +/* + * Sample a volume. + */ +static struct afs_volume *afs_sample_volume(struct afs_cell *cell, struct key *key, + const char *name, unsigned int namelen) +{ + struct afs_volume *volume; + struct afs_fs_context fc = { + .type = 0, /* Explicitly leave it to the VLDB */ + .volnamesz = namelen, + .volname = name, + .net = cell->net, + .cell = cell, + .key = key, /* This might need to be something */ + }; + + volume = afs_create_volume(&fc); + _leave(" = %p", volume); + return volume; +} + +/* + * Compare two addresses. + */ +static int afs_compare_addrs(const struct sockaddr_rxrpc *srx_a, + const struct sockaddr_rxrpc *srx_b) +{ + short port_a, port_b; + int addr_a, addr_b, diff; + + diff = (short)srx_a->transport_type - (short)srx_b->transport_type; + if (diff) + goto out; + + switch (srx_a->transport_type) { + case AF_INET: { + const struct sockaddr_in *a = &srx_a->transport.sin; + const struct sockaddr_in *b = &srx_b->transport.sin; + addr_a = ntohl(a->sin_addr.s_addr); + addr_b = ntohl(b->sin_addr.s_addr); + diff = addr_a - addr_b; + if (diff == 0) { + port_a = ntohs(a->sin_port); + port_b = ntohs(b->sin_port); + diff = port_a - port_b; + } + break; + } + + case AF_INET6: { + const struct sockaddr_in6 *a = &srx_a->transport.sin6; + const struct sockaddr_in6 *b = &srx_b->transport.sin6; + diff = memcmp(&a->sin6_addr, &b->sin6_addr, 16); + if (diff == 0) { + port_a = ntohs(a->sin6_port); + port_b = ntohs(b->sin6_port); + diff = port_a - port_b; + } + break; + } + + default: + WARN_ON(1); + diff = 1; + } + +out: + return diff; +} + +/* + * Compare the address lists of a pair of fileservers. + */ +static int afs_compare_fs_alists(const struct afs_server *server_a, + const struct afs_server *server_b) +{ + const struct afs_addr_list *la, *lb; + int a = 0, b = 0, addr_matches = 0; + + la = rcu_dereference(server_a->addresses); + lb = rcu_dereference(server_b->addresses); + + while (a < la->nr_addrs && b < lb->nr_addrs) { + const struct sockaddr_rxrpc *srx_a = &la->addrs[a]; + const struct sockaddr_rxrpc *srx_b = &lb->addrs[b]; + int diff = afs_compare_addrs(srx_a, srx_b); + + if (diff < 0) { + a++; + } else if (diff > 0) { + b++; + } else { + addr_matches++; + a++; + b++; + } + } + + return addr_matches; +} + +/* + * Compare the fileserver lists of two volumes. The server lists are sorted in + * order of ascending UUID. + */ +static int afs_compare_volume_slists(const struct afs_volume *vol_a, + const struct afs_volume *vol_b) +{ + const struct afs_server_list *la, *lb; + int i, a = 0, b = 0, uuid_matches = 0, addr_matches = 0; + + la = rcu_dereference(vol_a->servers); + lb = rcu_dereference(vol_b->servers); + + for (i = 0; i < AFS_MAXTYPES; i++) + if (la->vids[i] != lb->vids[i]) + return 0; + + while (a < la->nr_servers && b < lb->nr_servers) { + const struct afs_server *server_a = la->servers[a].server; + const struct afs_server *server_b = lb->servers[b].server; + int diff = memcmp(&server_a->uuid, &server_b->uuid, sizeof(uuid_t)); + + if (diff < 0) { + a++; + } else if (diff > 0) { + b++; + } else { + uuid_matches++; + addr_matches += afs_compare_fs_alists(server_a, server_b); + a++; + b++; + } + } + + _leave(" = %d [um %d]", addr_matches, uuid_matches); + return addr_matches; +} + +/* + * Compare root.cell volumes. + */ +static int afs_compare_cell_roots(struct afs_cell *cell) +{ + struct afs_cell *p; + + _enter(""); + + rcu_read_lock(); + + hlist_for_each_entry_rcu(p, &cell->net->proc_cells, proc_link) { + if (p == cell || p->alias_of) + continue; + if (!p->root_volume) + continue; /* Ignore cells that don't have a root.cell volume. */ + + if (afs_compare_volume_slists(cell->root_volume, p->root_volume) != 0) + goto is_alias; + } + + rcu_read_unlock(); + _leave(" = 0"); + return 0; + +is_alias: + rcu_read_unlock(); + cell->alias_of = afs_use_cell(p, afs_cell_trace_use_alias); + return 1; +} + +/* + * Query the new cell for a volume from a cell we're already using. + */ +static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key, + struct afs_cell *p) +{ + struct afs_volume *volume, *pvol = NULL; + int ret; + + /* Arbitrarily pick a volume from the list. */ + read_seqlock_excl(&p->volume_lock); + if (!RB_EMPTY_ROOT(&p->volumes)) + pvol = afs_get_volume(rb_entry(p->volumes.rb_node, + struct afs_volume, cell_node), + afs_volume_trace_get_query_alias); + read_sequnlock_excl(&p->volume_lock); + if (!pvol) + return 0; + + _enter("%s:%s", cell->name, pvol->name); + + /* And see if it's in the new cell. */ + volume = afs_sample_volume(cell, key, pvol->name, pvol->name_len); + if (IS_ERR(volume)) { + afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias); + if (PTR_ERR(volume) != -ENOMEDIUM) + return PTR_ERR(volume); + /* That volume is not in the new cell, so not an alias */ + return 0; + } + + /* The new cell has a like-named volume also - compare volume ID, + * server and address lists. + */ + ret = 0; + if (pvol->vid == volume->vid) { + rcu_read_lock(); + if (afs_compare_volume_slists(volume, pvol)) + ret = 1; + rcu_read_unlock(); + } + + afs_put_volume(cell->net, volume, afs_volume_trace_put_query_alias); + afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias); + return ret; +} + +/* + * Query the new cell for volumes we know exist in cells we're already using. + */ +static int afs_query_for_alias(struct afs_cell *cell, struct key *key) +{ + struct afs_cell *p; + + _enter("%s", cell->name); + + if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) + return -ERESTARTSYS; + + hlist_for_each_entry(p, &cell->net->proc_cells, proc_link) { + if (p == cell || p->alias_of) + continue; + if (RB_EMPTY_ROOT(&p->volumes)) + continue; + if (p->root_volume) + continue; /* Ignore cells that have a root.cell volume. */ + afs_use_cell(p, afs_cell_trace_use_check_alias); + mutex_unlock(&cell->net->proc_cells_lock); + + if (afs_query_for_alias_one(cell, key, p) != 0) + goto is_alias; + + if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) { + afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias); + return -ERESTARTSYS; + } + + afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias); + } + + mutex_unlock(&cell->net->proc_cells_lock); + _leave(" = 0"); + return 0; + +is_alias: + cell->alias_of = p; /* Transfer our ref */ + return 1; +} + +/* + * Look up a VLDB record for a volume. + */ +static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key) +{ + struct afs_vl_cursor vc; + char *cell_name = ERR_PTR(-EDESTADDRREQ); + bool skipped = false, not_skipped = false; + int ret; + + if (!afs_begin_vlserver_operation(&vc, cell, key)) + return ERR_PTR(-ERESTARTSYS); + + while (afs_select_vlserver(&vc)) { + if (!test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) { + vc.ac.error = -EOPNOTSUPP; + skipped = true; + continue; + } + not_skipped = true; + cell_name = afs_yfsvl_get_cell_name(&vc); + } + + ret = afs_end_vlserver_operation(&vc); + if (skipped && !not_skipped) + ret = -EOPNOTSUPP; + return ret < 0 ? ERR_PTR(ret) : cell_name; +} + +static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key) +{ + struct afs_cell *master; + char *cell_name; + + cell_name = afs_vl_get_cell_name(cell, key); + if (IS_ERR(cell_name)) + return PTR_ERR(cell_name); + + if (strcmp(cell_name, cell->name) == 0) { + kfree(cell_name); + return 0; + } + + master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name), + NULL, false); + kfree(cell_name); + if (IS_ERR(master)) + return PTR_ERR(master); + + cell->alias_of = master; /* Transfer our ref */ + return 1; +} + +static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key) +{ + struct afs_volume *root_volume; + int ret; + + _enter("%s", cell->name); + + ret = yfs_check_canonical_cell_name(cell, key); + if (ret != -EOPNOTSUPP) + return ret; + + /* Try and get the root.cell volume for comparison with other cells */ + root_volume = afs_sample_volume(cell, key, "root.cell", 9); + if (!IS_ERR(root_volume)) { + cell->root_volume = root_volume; + return afs_compare_cell_roots(cell); + } + + if (PTR_ERR(root_volume) != -ENOMEDIUM) + return PTR_ERR(root_volume); + + /* Okay, this cell doesn't have an root.cell volume. We need to + * locate some other random volume and use that to check. + */ + return afs_query_for_alias(cell, key); +} + +/* + * Check to see if a new cell is an alias of a cell we already have. At this + * point we have the cell's volume server list. + * + * Returns 0 if we didn't detect an alias, 1 if we found an alias and an error + * if we had problems gathering the data required. In the case the we did + * detect an alias, cell->alias_of is set to point to the assumed master. + */ +int afs_cell_detect_alias(struct afs_cell *cell, struct key *key) +{ + struct afs_net *net = cell->net; + int ret; + + if (mutex_lock_interruptible(&net->cells_alias_lock) < 0) + return -ERESTARTSYS; + + if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &cell->flags)) { + ret = afs_do_cell_detect_alias(cell, key); + if (ret >= 0) + clear_bit_unlock(AFS_CELL_FL_CHECK_ALIAS, &cell->flags); + } else { + ret = cell->alias_of ? 1 : 0; + } + + mutex_unlock(&net->cells_alias_lock); + + if (ret == 1) + pr_notice("kAFS: Cell %s is an alias of %s\n", + cell->name, cell->alias_of->name); + return ret; +} diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c new file mode 100644 index 000000000..38b2ba1d9 --- /dev/null +++ b/fs/afs/vl_list.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS vlserver list management. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include "internal.h" + +struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, + unsigned short port) +{ + struct afs_vlserver *vlserver; + + vlserver = kzalloc(struct_size(vlserver, name, name_len + 1), + GFP_KERNEL); + if (vlserver) { + atomic_set(&vlserver->usage, 1); + rwlock_init(&vlserver->lock); + init_waitqueue_head(&vlserver->probe_wq); + spin_lock_init(&vlserver->probe_lock); + vlserver->rtt = UINT_MAX; + vlserver->name_len = name_len; + vlserver->port = port; + memcpy(vlserver->name, name, name_len); + } + return vlserver; +} + +static void afs_vlserver_rcu(struct rcu_head *rcu) +{ + struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu); + + afs_put_addrlist(rcu_access_pointer(vlserver->addresses)); + kfree_rcu(vlserver, rcu); +} + +void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver) +{ + if (vlserver) { + unsigned int u = atomic_dec_return(&vlserver->usage); + //_debug("VL PUT %p{%u}", vlserver, u); + + if (u == 0) + call_rcu(&vlserver->rcu, afs_vlserver_rcu); + } +} + +struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers) +{ + struct afs_vlserver_list *vllist; + + vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL); + if (vllist) { + atomic_set(&vllist->usage, 1); + rwlock_init(&vllist->lock); + } + + return vllist; +} + +void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist) +{ + if (vllist) { + unsigned int u = atomic_dec_return(&vllist->usage); + + //_debug("VLLS PUT %p{%u}", vllist, u); + if (u == 0) { + int i; + + for (i = 0; i < vllist->nr_servers; i++) { + afs_put_vlserver(net, vllist->servers[i].server); + } + kfree_rcu(vllist, rcu); + } + } +} + +static u16 afs_extract_le16(const u8 **_b) +{ + u16 val; + + val = (u16)*(*_b)++ << 0; + val |= (u16)*(*_b)++ << 8; + return val; +} + +/* + * Build a VL server address list from a DNS queried server list. + */ +static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end, + u8 nr_addrs, u16 port) +{ + struct afs_addr_list *alist; + const u8 *b = *_b; + int ret = -EINVAL; + + alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port); + if (!alist) + return ERR_PTR(-ENOMEM); + if (nr_addrs == 0) + return alist; + + for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) { + struct dns_server_list_v1_address hdr; + __be32 x[4]; + + hdr.address_type = *b++; + + switch (hdr.address_type) { + case DNS_ADDRESS_IS_IPV4: + if (end - b < 4) { + _leave(" = -EINVAL [short inet]"); + goto error; + } + memcpy(x, b, 4); + afs_merge_fs_addr4(alist, x[0], port); + b += 4; + break; + + case DNS_ADDRESS_IS_IPV6: + if (end - b < 16) { + _leave(" = -EINVAL [short inet6]"); + goto error; + } + memcpy(x, b, 16); + afs_merge_fs_addr6(alist, x, port); + b += 16; + break; + + default: + _leave(" = -EADDRNOTAVAIL [unknown af %u]", + hdr.address_type); + ret = -EADDRNOTAVAIL; + goto error; + } + } + + /* Start with IPv6 if available. */ + if (alist->nr_ipv4 < alist->nr_addrs) + alist->preferred = alist->nr_ipv4; + + *_b = b; + return alist; + +error: + *_b = b; + afs_put_addrlist(alist); + return ERR_PTR(ret); +} + +/* + * Build a VL server list from a DNS queried server list. + */ +struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, + const void *buffer, + size_t buffer_size) +{ + const struct dns_server_list_v1_header *hdr = buffer; + struct dns_server_list_v1_server bs; + struct afs_vlserver_list *vllist, *previous; + struct afs_addr_list *addrs; + struct afs_vlserver *server; + const u8 *b = buffer, *end = buffer + buffer_size; + int ret = -ENOMEM, nr_servers, i, j; + + _enter(""); + + /* Check that it's a server list, v1 */ + if (end - b < sizeof(*hdr) || + hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST || + hdr->hdr.version != 1) { + pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n", + hdr->hdr.content, hdr->hdr.version, end - b); + ret = -EDESTADDRREQ; + goto dump; + } + + nr_servers = hdr->nr_servers; + + vllist = afs_alloc_vlserver_list(nr_servers); + if (!vllist) + return ERR_PTR(-ENOMEM); + + vllist->source = (hdr->source < NR__dns_record_source) ? + hdr->source : NR__dns_record_source; + vllist->status = (hdr->status < NR__dns_lookup_status) ? + hdr->status : NR__dns_lookup_status; + + read_lock(&cell->vl_servers_lock); + previous = afs_get_vlserverlist( + rcu_dereference_protected(cell->vl_servers, + lockdep_is_held(&cell->vl_servers_lock))); + read_unlock(&cell->vl_servers_lock); + + b += sizeof(*hdr); + while (end - b >= sizeof(bs)) { + bs.name_len = afs_extract_le16(&b); + bs.priority = afs_extract_le16(&b); + bs.weight = afs_extract_le16(&b); + bs.port = afs_extract_le16(&b); + bs.source = *b++; + bs.status = *b++; + bs.protocol = *b++; + bs.nr_addrs = *b++; + + _debug("extract %u %u %u %u %u %u %*.*s", + bs.name_len, bs.priority, bs.weight, + bs.port, bs.protocol, bs.nr_addrs, + bs.name_len, bs.name_len, b); + + if (end - b < bs.name_len) + break; + + ret = -EPROTONOSUPPORT; + if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) { + bs.protocol = DNS_SERVER_PROTOCOL_UDP; + } else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) { + _leave(" = [proto %u]", bs.protocol); + goto error; + } + + if (bs.port == 0) + bs.port = AFS_VL_PORT; + if (bs.source > NR__dns_record_source) + bs.source = NR__dns_record_source; + if (bs.status > NR__dns_lookup_status) + bs.status = NR__dns_lookup_status; + + /* See if we can update an old server record */ + server = NULL; + for (i = 0; i < previous->nr_servers; i++) { + struct afs_vlserver *p = previous->servers[i].server; + + if (p->name_len == bs.name_len && + p->port == bs.port && + strncasecmp(b, p->name, bs.name_len) == 0) { + server = afs_get_vlserver(p); + break; + } + } + + if (!server) { + ret = -ENOMEM; + server = afs_alloc_vlserver(b, bs.name_len, bs.port); + if (!server) + goto error; + } + + b += bs.name_len; + + /* Extract the addresses - note that we can't skip this as we + * have to advance the payload pointer. + */ + addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port); + if (IS_ERR(addrs)) { + ret = PTR_ERR(addrs); + goto error_2; + } + + if (vllist->nr_servers >= nr_servers) { + _debug("skip %u >= %u", vllist->nr_servers, nr_servers); + afs_put_addrlist(addrs); + afs_put_vlserver(cell->net, server); + continue; + } + + addrs->source = bs.source; + addrs->status = bs.status; + + if (addrs->nr_addrs == 0) { + afs_put_addrlist(addrs); + if (!rcu_access_pointer(server->addresses)) { + afs_put_vlserver(cell->net, server); + continue; + } + } else { + struct afs_addr_list *old = addrs; + + write_lock(&server->lock); + old = rcu_replace_pointer(server->addresses, old, + lockdep_is_held(&server->lock)); + write_unlock(&server->lock); + afs_put_addrlist(old); + } + + + /* TODO: Might want to check for duplicates */ + + /* Insertion-sort by priority and weight */ + for (j = 0; j < vllist->nr_servers; j++) { + if (bs.priority < vllist->servers[j].priority) + break; /* Lower preferable */ + if (bs.priority == vllist->servers[j].priority && + bs.weight > vllist->servers[j].weight) + break; /* Higher preferable */ + } + + if (j < vllist->nr_servers) { + memmove(vllist->servers + j + 1, + vllist->servers + j, + (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry)); + } + + clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags); + + vllist->servers[j].priority = bs.priority; + vllist->servers[j].weight = bs.weight; + vllist->servers[j].server = server; + vllist->nr_servers++; + } + + if (b != end) { + _debug("parse error %zd", b - end); + goto error; + } + + afs_put_vlserverlist(cell->net, previous); + _leave(" = ok [%u]", vllist->nr_servers); + return vllist; + +error_2: + afs_put_vlserver(cell->net, server); +error: + afs_put_vlserverlist(cell->net, vllist); + afs_put_vlserverlist(cell->net, previous); +dump: + if (ret != -ENOMEM) { + printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer); + print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size); + } + return ERR_PTR(ret); +} diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c new file mode 100644 index 000000000..58452b86e --- /dev/null +++ b/fs/afs/vl_probe.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS vlserver probing + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include "afs_fs.h" +#include "internal.h" +#include "protocol_yfs.h" + + +/* + * Handle the completion of a set of probes. + */ +static void afs_finished_vl_probe(struct afs_vlserver *server) +{ + if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) { + server->rtt = UINT_MAX; + clear_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags); + } + + clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags); + wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING); +} + +/* + * Handle the completion of a probe RPC call. + */ +static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up) +{ + if (atomic_dec_and_test(&server->probe_outstanding)) { + afs_finished_vl_probe(server); + wake_up = true; + } + + if (wake_up) + wake_up_all(&server->probe_wq); +} + +/* + * Process the result of probing a vlserver. This is called after successful + * or failed delivery of an VL.GetCapabilities operation. + */ +void afs_vlserver_probe_result(struct afs_call *call) +{ + struct afs_addr_list *alist = call->alist; + struct afs_vlserver *server = call->vlserver; + unsigned int server_index = call->server_index; + unsigned int rtt_us = 0; + unsigned int index = call->addr_ix; + bool have_result = false; + int ret = call->error; + + _enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code); + + spin_lock(&server->probe_lock); + + switch (ret) { + case 0: + server->probe.error = 0; + goto responded; + case -ECONNABORTED: + if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) { + server->probe.abort_code = call->abort_code; + server->probe.error = ret; + } + goto responded; + case -ENOMEM: + case -ENONET: + case -EKEYEXPIRED: + case -EKEYREVOKED: + case -EKEYREJECTED: + server->probe.flags |= AFS_VLSERVER_PROBE_LOCAL_FAILURE; + if (server->probe.error == 0) + server->probe.error = ret; + trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail); + goto out; + case -ECONNRESET: /* Responded, but call expired. */ + case -ERFKILL: + case -EADDRNOTAVAIL: + case -ENETUNREACH: + case -EHOSTUNREACH: + case -EHOSTDOWN: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + default: + clear_bit(index, &alist->responded); + set_bit(index, &alist->failed); + if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) && + (server->probe.error == 0 || + server->probe.error == -ETIMEDOUT || + server->probe.error == -ETIME)) + server->probe.error = ret; + trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail); + goto out; + } + +responded: + set_bit(index, &alist->responded); + clear_bit(index, &alist->failed); + + if (call->service_id == YFS_VL_SERVICE) { + server->probe.flags |= AFS_VLSERVER_PROBE_IS_YFS; + set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } else { + server->probe.flags |= AFS_VLSERVER_PROBE_NOT_YFS; + if (!(server->probe.flags & AFS_VLSERVER_PROBE_IS_YFS)) { + clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } + } + + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { + server->probe.rtt = rtt_us; + server->rtt = rtt_us; + alist->preferred = index; + } + + smp_wmb(); /* Set rtt before responded. */ + server->probe.flags |= AFS_VLSERVER_PROBE_RESPONDED; + set_bit(AFS_VLSERVER_FL_PROBED, &server->flags); + set_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags); + have_result = true; +out: + spin_unlock(&server->probe_lock); + + _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", + server_index, index, &alist->addrs[index].transport, rtt_us, ret); + + afs_done_one_vl_probe(server, have_result); +} + +/* + * Probe all of a vlserver's addresses to find out the best route and to + * query its capabilities. + */ +static bool afs_do_probe_vlserver(struct afs_net *net, + struct afs_vlserver *server, + struct key *key, + unsigned int server_index, + struct afs_error *_e) +{ + struct afs_addr_cursor ac = { + .index = 0, + }; + struct afs_call *call; + bool in_progress = false; + + _enter("%s", server->name); + + read_lock(&server->lock); + ac.alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->lock)); + read_unlock(&server->lock); + + atomic_set(&server->probe_outstanding, ac.alist->nr_addrs); + memset(&server->probe, 0, sizeof(server->probe)); + server->probe.rtt = UINT_MAX; + + for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) { + call = afs_vl_get_capabilities(net, &ac, key, server, + server_index); + if (!IS_ERR(call)) { + afs_put_call(call); + in_progress = true; + } else { + afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code); + afs_done_one_vl_probe(server, false); + } + } + + return in_progress; +} + +/* + * Send off probes to all unprobed servers. + */ +int afs_send_vl_probes(struct afs_net *net, struct key *key, + struct afs_vlserver_list *vllist) +{ + struct afs_vlserver *server; + struct afs_error e; + bool in_progress = false; + int i; + + e.error = 0; + e.responded = false; + for (i = 0; i < vllist->nr_servers; i++) { + server = vllist->servers[i].server; + if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags)) + continue; + + if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags) && + afs_do_probe_vlserver(net, server, key, i, &e)) + in_progress = true; + } + + return in_progress ? 0 : e.error; +} + +/* + * Wait for the first as-yet untried server to respond. + */ +int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, + unsigned long untried) +{ + struct wait_queue_entry *waits; + struct afs_vlserver *server; + unsigned int rtt = UINT_MAX, rtt_s; + bool have_responders = false; + int pref = -1, i; + + _enter("%u,%lx", vllist->nr_servers, untried); + + /* Only wait for servers that have a probe outstanding. */ + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) + __clear_bit(i, &untried); + if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) + have_responders = true; + } + } + if (have_responders || !untried) + return 0; + + waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL); + if (!waits) + return -ENOMEM; + + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + init_waitqueue_entry(&waits[i], current); + add_wait_queue(&server->probe_wq, &waits[i]); + } + } + + for (;;) { + bool still_probing = false; + + set_current_state(TASK_INTERRUPTIBLE); + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) + goto stop; + if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) + still_probing = true; + } + } + + if (!still_probing || signal_pending(current)) + goto stop; + schedule(); + } + +stop: + set_current_state(TASK_RUNNING); + + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + rtt_s = READ_ONCE(server->rtt); + if (test_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags) && + rtt_s < rtt) { + pref = i; + rtt = rtt_s; + } + + remove_wait_queue(&server->probe_wq, &waits[i]); + } + } + + kfree(waits); + + if (pref == -1 && signal_pending(current)) + return -ERESTARTSYS; + + if (pref >= 0) + vllist->preferred = pref; + + _leave(" = 0 [%u]", pref); + return 0; +} diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c new file mode 100644 index 000000000..eb415ce56 --- /dev/null +++ b/fs/afs/vl_rotate.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Handle vlserver selection and rotation. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/sched/signal.h> +#include "internal.h" +#include "afs_vl.h" + +/* + * Begin an operation on a volume location server. + */ +bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, + struct key *key) +{ + memset(vc, 0, sizeof(*vc)); + vc->cell = cell; + vc->key = key; + vc->error = -EDESTADDRREQ; + vc->ac.error = SHRT_MAX; + + if (signal_pending(current)) { + vc->error = -EINTR; + vc->flags |= AFS_VL_CURSOR_STOP; + return false; + } + + return true; +} + +/* + * Begin iteration through a server list, starting with the last used server if + * possible, or the last recorded good server if not. + */ +static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) +{ + struct afs_cell *cell = vc->cell; + unsigned int dns_lookup_count; + + if (cell->dns_source == DNS_RECORD_UNAVAILABLE || + cell->dns_expiry <= ktime_get_real_seconds()) { + dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); + set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); + afs_queue_cell(cell, afs_cell_trace_get_queue_dns); + + if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { + if (wait_var_event_interruptible( + &cell->dns_lookup_count, + smp_load_acquire(&cell->dns_lookup_count) + != dns_lookup_count) < 0) { + vc->error = -ERESTARTSYS; + return false; + } + } + + /* Status load is ordered after lookup counter load */ + if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) { + pr_warn("No record of cell %s\n", cell->name); + vc->error = -ENOENT; + return false; + } + + if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { + vc->error = -EDESTADDRREQ; + return false; + } + } + + read_lock(&cell->vl_servers_lock); + vc->server_list = afs_get_vlserverlist( + rcu_dereference_protected(cell->vl_servers, + lockdep_is_held(&cell->vl_servers_lock))); + read_unlock(&cell->vl_servers_lock); + if (!vc->server_list->nr_servers) + return false; + + vc->untried = (1UL << vc->server_list->nr_servers) - 1; + vc->index = -1; + return true; +} + +/* + * Select the vlserver to use. May be called multiple times to rotate + * through the vlservers. + */ +bool afs_select_vlserver(struct afs_vl_cursor *vc) +{ + struct afs_addr_list *alist; + struct afs_vlserver *vlserver; + struct afs_error e; + u32 rtt; + int error = vc->ac.error, i; + + _enter("%lx[%d],%lx[%d],%d,%d", + vc->untried, vc->index, + vc->ac.tried, vc->ac.index, + error, vc->ac.abort_code); + + if (vc->flags & AFS_VL_CURSOR_STOP) { + _leave(" = f [stopped]"); + return false; + } + + vc->nr_iterations++; + + /* Evaluate the result of the previous operation, if there was one. */ + switch (error) { + case SHRT_MAX: + goto start; + + default: + case 0: + /* Success or local failure. Stop. */ + vc->error = error; + vc->flags |= AFS_VL_CURSOR_STOP; + _leave(" = f [okay/local %d]", vc->ac.error); + return false; + + case -ECONNABORTED: + /* The far side rejected the operation on some grounds. This + * might involve the server being busy or the volume having been moved. + */ + switch (vc->ac.abort_code) { + case AFSVL_IO: + case AFSVL_BADVOLOPER: + case AFSVL_NOMEM: + /* The server went weird. */ + vc->error = -EREMOTEIO; + //write_lock(&vc->cell->vl_servers_lock); + //vc->server_list->weird_mask |= 1 << vc->index; + //write_unlock(&vc->cell->vl_servers_lock); + goto next_server; + + default: + vc->error = afs_abort_to_error(vc->ac.abort_code); + goto failed; + } + + case -ERFKILL: + case -EADDRNOTAVAIL: + case -ENETUNREACH: + case -EHOSTUNREACH: + case -EHOSTDOWN: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + _debug("no conn %d", error); + vc->error = error; + goto iterate_address; + + case -ECONNRESET: + _debug("call reset"); + vc->error = error; + vc->flags |= AFS_VL_CURSOR_RETRY; + goto next_server; + + case -EOPNOTSUPP: + _debug("notsupp"); + goto next_server; + } + +restart_from_beginning: + _debug("restart"); + afs_end_cursor(&vc->ac); + afs_put_vlserverlist(vc->cell->net, vc->server_list); + vc->server_list = NULL; + if (vc->flags & AFS_VL_CURSOR_RETRIED) + goto failed; + vc->flags |= AFS_VL_CURSOR_RETRIED; +start: + _debug("start"); + + if (!afs_start_vl_iteration(vc)) + goto failed; + + error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); + if (error < 0) + goto failed_set_error; + +pick_server: + _debug("pick [%lx]", vc->untried); + + error = afs_wait_for_vl_probes(vc->server_list, vc->untried); + if (error < 0) + goto failed_set_error; + + /* Pick the untried server with the lowest RTT. */ + vc->index = vc->server_list->preferred; + if (test_bit(vc->index, &vc->untried)) + goto selected_server; + + vc->index = -1; + rtt = U32_MAX; + for (i = 0; i < vc->server_list->nr_servers; i++) { + struct afs_vlserver *s = vc->server_list->servers[i].server; + + if (!test_bit(i, &vc->untried) || + !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) + continue; + if (s->probe.rtt < rtt) { + vc->index = i; + rtt = s->probe.rtt; + } + } + + if (vc->index == -1) + goto no_more_servers; + +selected_server: + _debug("use %d", vc->index); + __clear_bit(vc->index, &vc->untried); + + /* We're starting on a different vlserver from the list. We need to + * check it, find its address list and probe its capabilities before we + * use it. + */ + ASSERTCMP(vc->ac.alist, ==, NULL); + vlserver = vc->server_list->servers[vc->index].server; + vc->server = vlserver; + + _debug("USING VLSERVER: %s", vlserver->name); + + read_lock(&vlserver->lock); + alist = rcu_dereference_protected(vlserver->addresses, + lockdep_is_held(&vlserver->lock)); + afs_get_addrlist(alist); + read_unlock(&vlserver->lock); + + memset(&vc->ac, 0, sizeof(vc->ac)); + + if (!vc->ac.alist) + vc->ac.alist = alist; + else + afs_put_addrlist(alist); + + vc->ac.index = -1; + +iterate_address: + ASSERT(vc->ac.alist); + /* Iterate over the current server's address list to try and find an + * address on which it will respond to us. + */ + if (!afs_iterate_addresses(&vc->ac)) + goto next_server; + + _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); + + _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); + return true; + +next_server: + _debug("next"); + afs_end_cursor(&vc->ac); + goto pick_server; + +no_more_servers: + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (vc->flags & AFS_VL_CURSOR_RETRY) + goto restart_from_beginning; + + e.error = -EDESTADDRREQ; + e.responded = false; + for (i = 0; i < vc->server_list->nr_servers; i++) { + struct afs_vlserver *s = vc->server_list->servers[i].server; + + if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) + e.responded = true; + afs_prioritise_error(&e, READ_ONCE(s->probe.error), + s->probe.abort_code); + } + + error = e.error; + +failed_set_error: + vc->error = error; +failed: + vc->flags |= AFS_VL_CURSOR_STOP; + afs_end_cursor(&vc->ac); + _leave(" = f [failed %d]", vc->error); + return false; +} + +/* + * Dump cursor state in the case of the error being EDESTADDRREQ. + */ +static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) +{ + struct afs_cell *cell = vc->cell; + static int count; + int i; + + if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) + return; + count++; + + rcu_read_lock(); + pr_notice("EDESTADDR occurred\n"); + pr_notice("CELL: %s err=%d\n", cell->name, cell->error); + pr_notice("DNS: src=%u st=%u lc=%x\n", + cell->dns_source, cell->dns_status, cell->dns_lookup_count); + pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", + vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); + + if (vc->server_list) { + const struct afs_vlserver_list *sl = vc->server_list; + pr_notice("VC: SL nr=%u ix=%u\n", + sl->nr_servers, sl->index); + for (i = 0; i < sl->nr_servers; i++) { + const struct afs_vlserver *s = sl->servers[i].server; + pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", + s->name, s->port, s->flags, s->probe.error); + if (s->addresses) { + const struct afs_addr_list *a = + rcu_dereference(s->addresses); + pr_notice("VC: - nr=%u/%u/%u pf=%u\n", + a->nr_ipv4, a->nr_addrs, a->max_addrs, + a->preferred); + pr_notice("VC: - R=%lx F=%lx\n", + a->responded, a->failed); + if (a == vc->ac.alist) + pr_notice("VC: - current\n"); + } + } + } + + pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", + vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, + vc->ac.responded, vc->ac.nr_iterations); + rcu_read_unlock(); +} + +/* + * Tidy up a volume location server cursor and unlock the vnode. + */ +int afs_end_vlserver_operation(struct afs_vl_cursor *vc) +{ + struct afs_net *net = vc->cell->net; + + if (vc->error == -EDESTADDRREQ || + vc->error == -EADDRNOTAVAIL || + vc->error == -ENETUNREACH || + vc->error == -EHOSTUNREACH) + afs_vl_dump_edestaddrreq(vc); + + afs_end_cursor(&vc->ac); + afs_put_vlserverlist(net, vc->server_list); + + if (vc->error == -ECONNABORTED) + vc->error = afs_abort_to_error(vc->ac.abort_code); + + return vc->error; +} diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c new file mode 100644 index 000000000..dc9327332 --- /dev/null +++ b/fs/afs/vlclient.c @@ -0,0 +1,758 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS Volume Location Service client + * + * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/gfp.h> +#include <linux/init.h> +#include <linux/sched.h> +#include "afs_fs.h" +#include "internal.h" + +/* + * Deliver reply data to a VL.GetEntryByNameU call. + */ +static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) +{ + struct afs_uvldbentry__xdr *uvldb; + struct afs_vldb_entry *entry; + bool new_only = false; + u32 tmp, nr_servers, vlflags; + int i, ret; + + _enter(""); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + uvldb = call->buffer; + entry = call->ret_vldb; + + nr_servers = ntohl(uvldb->nServers); + if (nr_servers > AFS_NMAXNSERVERS) + nr_servers = AFS_NMAXNSERVERS; + + for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++) + entry->name[i] = (u8)ntohl(uvldb->name[i]); + entry->name[i] = 0; + entry->name_len = strlen(entry->name); + + /* If there is a new replication site that we can use, ignore all the + * sites that aren't marked as new. + */ + for (i = 0; i < nr_servers; i++) { + tmp = ntohl(uvldb->serverFlags[i]); + if (!(tmp & AFS_VLSF_DONTUSE) && + (tmp & AFS_VLSF_NEWREPSITE)) + new_only = true; + } + + vlflags = ntohl(uvldb->flags); + for (i = 0; i < nr_servers; i++) { + struct afs_uuid__xdr *xdr; + struct afs_uuid *uuid; + int j; + int n = entry->nr_servers; + + tmp = ntohl(uvldb->serverFlags[i]); + if (tmp & AFS_VLSF_DONTUSE || + (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) + continue; + if (tmp & AFS_VLSF_RWVOL) { + entry->fs_mask[n] |= AFS_VOL_VTM_RW; + if (vlflags & AFS_VLF_BACKEXISTS) + entry->fs_mask[n] |= AFS_VOL_VTM_BAK; + } + if (tmp & AFS_VLSF_ROVOL) + entry->fs_mask[n] |= AFS_VOL_VTM_RO; + if (!entry->fs_mask[n]) + continue; + + xdr = &uvldb->serverNumber[i]; + uuid = (struct afs_uuid *)&entry->fs_server[n]; + uuid->time_low = xdr->time_low; + uuid->time_mid = htons(ntohl(xdr->time_mid)); + uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version)); + uuid->clock_seq_hi_and_reserved = (u8)ntohl(xdr->clock_seq_hi_and_reserved); + uuid->clock_seq_low = (u8)ntohl(xdr->clock_seq_low); + for (j = 0; j < 6; j++) + uuid->node[j] = (u8)ntohl(xdr->node[j]); + + entry->addr_version[n] = ntohl(uvldb->serverUnique[i]); + entry->nr_servers++; + } + + for (i = 0; i < AFS_MAXTYPES; i++) + entry->vid[i] = ntohl(uvldb->volumeId[i]); + + if (vlflags & AFS_VLF_RWEXISTS) + __set_bit(AFS_VLDB_HAS_RW, &entry->flags); + if (vlflags & AFS_VLF_ROEXISTS) + __set_bit(AFS_VLDB_HAS_RO, &entry->flags); + if (vlflags & AFS_VLF_BACKEXISTS) + __set_bit(AFS_VLDB_HAS_BAK, &entry->flags); + + if (!(vlflags & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { + entry->error = -ENOMEDIUM; + __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags); + } + + __set_bit(AFS_VLDB_QUERY_VALID, &entry->flags); + _leave(" = 0 [done]"); + return 0; +} + +static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call) +{ + kfree(call->ret_vldb); + afs_flat_call_destructor(call); +} + +/* + * VL.GetEntryByNameU operation type. + */ +static const struct afs_call_type afs_RXVLGetEntryByNameU = { + .name = "VL.GetEntryByNameU", + .op = afs_VL_GetEntryByNameU, + .deliver = afs_deliver_vl_get_entry_by_name_u, + .destructor = afs_destroy_vl_get_entry_by_name_u, +}; + +/* + * Dispatch a get volume entry by name or ID operation (uuid variant). If the + * volname is a decimal number then it's a volume ID not a volume name. + */ +struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc, + const char *volname, + int volnamesz) +{ + struct afs_vldb_entry *entry; + struct afs_call *call; + struct afs_net *net = vc->cell->net; + size_t reqsz, padsz; + __be32 *bp; + + _enter(""); + + padsz = (4 - (volnamesz & 3)) & 3; + reqsz = 8 + volnamesz + padsz; + + entry = kzalloc(sizeof(struct afs_vldb_entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByNameU, reqsz, + sizeof(struct afs_uvldbentry__xdr)); + if (!call) { + kfree(entry); + return ERR_PTR(-ENOMEM); + } + + call->key = vc->key; + call->ret_vldb = entry; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; + + /* Marshall the parameters */ + bp = call->request; + *bp++ = htonl(VLGETENTRYBYNAMEU); + *bp++ = htonl(volnamesz); + memcpy(bp, volname, volnamesz); + if (padsz > 0) + memset((void *)bp + volnamesz, 0, padsz); + + trace_afs_make_vl_call(call); + afs_make_call(&vc->ac, call, GFP_KERNEL); + return (struct afs_vldb_entry *)afs_wait_for_call_to_complete(call, &vc->ac); +} + +/* + * Deliver reply data to a VL.GetAddrsU call. + * + * GetAddrsU(IN ListAddrByAttributes *inaddr, + * OUT afsUUID *uuidp1, + * OUT uint32_t *uniquifier, + * OUT uint32_t *nentries, + * OUT bulkaddrs *blkaddrs); + */ +static int afs_deliver_vl_get_addrs_u(struct afs_call *call) +{ + struct afs_addr_list *alist; + __be32 *bp; + u32 uniquifier, nentries, count; + int i, ret; + + _enter("{%u,%zu/%u}", + call->unmarshall, iov_iter_count(call->iter), call->count); + + switch (call->unmarshall) { + case 0: + afs_extract_to_buf(call, + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32)); + call->unmarshall++; + + /* Extract the returned uuid, uniquifier, nentries and + * blkaddrs size */ + fallthrough; + case 1: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer + sizeof(struct afs_uuid__xdr); + uniquifier = ntohl(*bp++); + nentries = ntohl(*bp++); + count = ntohl(*bp); + + nentries = min(nentries, count); + alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT); + if (!alist) + return -ENOMEM; + alist->version = uniquifier; + call->ret_alist = alist; + call->count = count; + call->count2 = nentries; + call->unmarshall++; + + more_entries: + count = min(call->count, 4U); + afs_extract_to_buf(call, count * sizeof(__be32)); + + fallthrough; /* and extract entries */ + case 2: + ret = afs_extract_data(call, call->count > 4); + if (ret < 0) + return ret; + + alist = call->ret_alist; + bp = call->buffer; + count = min(call->count, 4U); + for (i = 0; i < count; i++) + if (alist->nr_addrs < call->count2) + afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT); + + call->count -= count; + if (call->count > 0) + goto more_entries; + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +static void afs_vl_get_addrs_u_destructor(struct afs_call *call) +{ + afs_put_addrlist(call->ret_alist); + return afs_flat_call_destructor(call); +} + +/* + * VL.GetAddrsU operation type. + */ +static const struct afs_call_type afs_RXVLGetAddrsU = { + .name = "VL.GetAddrsU", + .op = afs_VL_GetAddrsU, + .deliver = afs_deliver_vl_get_addrs_u, + .destructor = afs_vl_get_addrs_u_destructor, +}; + +/* + * Dispatch an operation to get the addresses for a server, where the server is + * nominated by UUID. + */ +struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc, + const uuid_t *uuid) +{ + struct afs_ListAddrByAttributes__xdr *r; + const struct afs_uuid *u = (const struct afs_uuid *)uuid; + struct afs_call *call; + struct afs_net *net = vc->cell->net; + __be32 *bp; + int i; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXVLGetAddrsU, + sizeof(__be32) + sizeof(struct afs_ListAddrByAttributes__xdr), + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32)); + if (!call) + return ERR_PTR(-ENOMEM); + + call->key = vc->key; + call->ret_alist = NULL; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; + + /* Marshall the parameters */ + bp = call->request; + *bp++ = htonl(VLGETADDRSU); + r = (struct afs_ListAddrByAttributes__xdr *)bp; + r->Mask = htonl(AFS_VLADDR_UUID); + r->ipaddr = 0; + r->index = 0; + r->spare = 0; + r->uuid.time_low = u->time_low; + r->uuid.time_mid = htonl(ntohs(u->time_mid)); + r->uuid.time_hi_and_version = htonl(ntohs(u->time_hi_and_version)); + r->uuid.clock_seq_hi_and_reserved = htonl(u->clock_seq_hi_and_reserved); + r->uuid.clock_seq_low = htonl(u->clock_seq_low); + for (i = 0; i < 6; i++) + r->uuid.node[i] = htonl(u->node[i]); + + trace_afs_make_vl_call(call); + afs_make_call(&vc->ac, call, GFP_KERNEL); + return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac); +} + +/* + * Deliver reply data to an VL.GetCapabilities operation. + */ +static int afs_deliver_vl_get_capabilities(struct afs_call *call) +{ + u32 count; + int ret; + + _enter("{%u,%zu/%u}", + call->unmarshall, iov_iter_count(call->iter), call->count); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + fallthrough; /* and extract the capabilities word count */ + case 1: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + count = ntohl(call->tmp); + call->count = count; + call->count2 = count; + + call->unmarshall++; + afs_extract_discard(call, count * sizeof(__be32)); + + fallthrough; /* and extract capabilities words */ + case 2: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + /* TODO: Examine capabilities */ + + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +static void afs_destroy_vl_get_capabilities(struct afs_call *call) +{ + afs_put_vlserver(call->net, call->vlserver); + afs_flat_call_destructor(call); +} + +/* + * VL.GetCapabilities operation type + */ +static const struct afs_call_type afs_RXVLGetCapabilities = { + .name = "VL.GetCapabilities", + .op = afs_VL_GetCapabilities, + .deliver = afs_deliver_vl_get_capabilities, + .done = afs_vlserver_probe_result, + .destructor = afs_destroy_vl_get_capabilities, +}; + +/* + * Probe a volume server for the capabilities that it supports. This can + * return up to 196 words. + * + * We use this to probe for service upgrade to determine what the server at the + * other end supports. + */ +struct afs_call *afs_vl_get_capabilities(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + struct afs_vlserver *server, + unsigned int server_index) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4); + if (!call) + return ERR_PTR(-ENOMEM); + + call->key = key; + call->vlserver = afs_get_vlserver(server); + call->server_index = server_index; + call->upgrade = true; + call->async = true; + call->max_lifespan = AFS_PROBE_MAX_LIFESPAN; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(VLGETCAPABILITIES); + + /* Can't take a ref on server */ + trace_afs_make_vl_call(call); + afs_make_call(ac, call, GFP_KERNEL); + return call; +} + +/* + * Deliver reply data to a YFSVL.GetEndpoints call. + * + * GetEndpoints(IN yfsServerAttributes *attr, + * OUT opr_uuid *uuid, + * OUT afs_int32 *uniquifier, + * OUT endpoints *fsEndpoints, + * OUT endpoints *volEndpoints) + */ +static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) +{ + struct afs_addr_list *alist; + __be32 *bp; + u32 uniquifier, size; + int ret; + + _enter("{%u,%zu,%u}", + call->unmarshall, iov_iter_count(call->iter), call->count2); + + switch (call->unmarshall) { + case 0: + afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32)); + call->unmarshall = 1; + + /* Extract the returned uuid, uniquifier, fsEndpoints count and + * either the first fsEndpoint type or the volEndpoints + * count if there are no fsEndpoints. */ + fallthrough; + case 1: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer + sizeof(uuid_t); + uniquifier = ntohl(*bp++); + call->count = ntohl(*bp++); + call->count2 = ntohl(*bp); /* Type or next count */ + + if (call->count > YFS_MAXENDPOINTS) + return afs_protocol_error(call, afs_eproto_yvl_fsendpt_num); + + alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT); + if (!alist) + return -ENOMEM; + alist->version = uniquifier; + call->ret_alist = alist; + + if (call->count == 0) + goto extract_volendpoints; + + next_fsendpoint: + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + size = sizeof(__be32) * (1 + 1 + 1); + break; + case YFS_ENDPOINT_IPV6: + size = sizeof(__be32) * (1 + 4 + 1); + break; + default: + return afs_protocol_error(call, afs_eproto_yvl_fsendpt_type); + } + + size += sizeof(__be32); + afs_extract_to_buf(call, size); + call->unmarshall = 2; + + fallthrough; /* and extract fsEndpoints[] entries */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + alist = call->ret_alist; + bp = call->buffer; + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + if (ntohl(bp[0]) != sizeof(__be32) * 2) + return afs_protocol_error( + call, afs_eproto_yvl_fsendpt4_len); + afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2])); + bp += 3; + break; + case YFS_ENDPOINT_IPV6: + if (ntohl(bp[0]) != sizeof(__be32) * 5) + return afs_protocol_error( + call, afs_eproto_yvl_fsendpt6_len); + afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5])); + bp += 6; + break; + default: + return afs_protocol_error(call, afs_eproto_yvl_fsendpt_type); + } + + /* Got either the type of the next entry or the count of + * volEndpoints if no more fsEndpoints. + */ + call->count2 = ntohl(*bp++); + + call->count--; + if (call->count > 0) + goto next_fsendpoint; + + extract_volendpoints: + /* Extract the list of volEndpoints. */ + call->count = call->count2; + if (!call->count) + goto end; + if (call->count > YFS_MAXENDPOINTS) + return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type); + + afs_extract_to_buf(call, 1 * sizeof(__be32)); + call->unmarshall = 3; + + /* Extract the type of volEndpoints[0]. Normally we would + * extract the type of the next endpoint when we extract the + * data of the current one, but this is the first... + */ + fallthrough; + case 3: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer; + + next_volendpoint: + call->count2 = ntohl(*bp++); + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + size = sizeof(__be32) * (1 + 1 + 1); + break; + case YFS_ENDPOINT_IPV6: + size = sizeof(__be32) * (1 + 4 + 1); + break; + default: + return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type); + } + + if (call->count > 1) + size += sizeof(__be32); /* Get next type too */ + afs_extract_to_buf(call, size); + call->unmarshall = 4; + + fallthrough; /* and extract volEndpoints[] entries */ + case 4: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer; + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + if (ntohl(bp[0]) != sizeof(__be32) * 2) + return afs_protocol_error( + call, afs_eproto_yvl_vlendpt4_len); + bp += 3; + break; + case YFS_ENDPOINT_IPV6: + if (ntohl(bp[0]) != sizeof(__be32) * 5) + return afs_protocol_error( + call, afs_eproto_yvl_vlendpt6_len); + bp += 6; + break; + default: + return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type); + } + + /* Got either the type of the next entry or the count of + * volEndpoints if no more fsEndpoints. + */ + call->count--; + if (call->count > 0) + goto next_volendpoint; + + end: + afs_extract_discard(call, 0); + call->unmarshall = 5; + + fallthrough; /* Done */ + case 5: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + call->unmarshall = 6; + + case 6: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFSVL.GetEndpoints operation type. + */ +static const struct afs_call_type afs_YFSVLGetEndpoints = { + .name = "YFSVL.GetEndpoints", + .op = afs_YFSVL_GetEndpoints, + .deliver = afs_deliver_yfsvl_get_endpoints, + .destructor = afs_vl_get_addrs_u_destructor, +}; + +/* + * Dispatch an operation to get the addresses for a server, where the server is + * nominated by UUID. + */ +struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, + const uuid_t *uuid) +{ + struct afs_call *call; + struct afs_net *net = vc->cell->net; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints, + sizeof(__be32) * 2 + sizeof(*uuid), + sizeof(struct in6_addr) + sizeof(__be32) * 3); + if (!call) + return ERR_PTR(-ENOMEM); + + call->key = vc->key; + call->ret_alist = NULL; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; + + /* Marshall the parameters */ + bp = call->request; + *bp++ = htonl(YVLGETENDPOINTS); + *bp++ = htonl(YFS_SERVER_UUID); + memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */ + + trace_afs_make_vl_call(call); + afs_make_call(&vc->ac, call, GFP_KERNEL); + return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac); +} + +/* + * Deliver reply data to a YFSVL.GetCellName operation. + */ +static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call) +{ + char *cell_name; + u32 namesz, paddedsz; + int ret; + + _enter("{%u,%zu/%u}", + call->unmarshall, iov_iter_count(call->iter), call->count); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + fallthrough; /* and extract the cell name length */ + case 1: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + namesz = ntohl(call->tmp); + if (namesz > AFS_MAXCELLNAME) + return afs_protocol_error(call, afs_eproto_cellname_len); + paddedsz = (namesz + 3) & ~3; + call->count = namesz; + call->count2 = paddedsz - namesz; + + cell_name = kmalloc(namesz + 1, GFP_KERNEL); + if (!cell_name) + return -ENOMEM; + cell_name[namesz] = 0; + call->ret_str = cell_name; + + afs_extract_begin(call, cell_name, namesz); + call->unmarshall++; + + fallthrough; /* and extract cell name */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + afs_extract_discard(call, call->count2); + call->unmarshall++; + + fallthrough; /* and extract padding */ + case 3: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +static void afs_destroy_yfsvl_get_cell_name(struct afs_call *call) +{ + kfree(call->ret_str); + afs_flat_call_destructor(call); +} + +/* + * VL.GetCapabilities operation type + */ +static const struct afs_call_type afs_YFSVLGetCellName = { + .name = "YFSVL.GetCellName", + .op = afs_YFSVL_GetCellName, + .deliver = afs_deliver_yfsvl_get_cell_name, + .destructor = afs_destroy_yfsvl_get_cell_name, +}; + +/* + * Probe a volume server for the capabilities that it supports. This can + * return up to 196 words. + * + * We use this to probe for service upgrade to determine what the server at the + * other end supports. + */ +char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc) +{ + struct afs_call *call; + struct afs_net *net = vc->cell->net; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_YFSVLGetCellName, 1 * 4, 0); + if (!call) + return ERR_PTR(-ENOMEM); + + call->key = vc->key; + call->ret_str = NULL; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(YVLGETCELLNAME); + + /* Can't take a ref on server */ + trace_afs_make_vl_call(call); + afs_make_call(&vc->ac, call, GFP_KERNEL); + return (char *)afs_wait_for_call_to_complete(call, &vc->ac); +} diff --git a/fs/afs/volume.c b/fs/afs/volume.c new file mode 100644 index 000000000..f84194b79 --- /dev/null +++ b/fs/afs/volume.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS volume management + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include "internal.h" + +unsigned __read_mostly afs_volume_gc_delay = 10; +unsigned __read_mostly afs_volume_record_life = 60 * 60; + +/* + * Insert a volume into a cell. If there's an existing volume record, that is + * returned instead with a ref held. + */ +static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell, + struct afs_volume *volume) +{ + struct afs_volume *p; + struct rb_node *parent = NULL, **pp; + + write_seqlock(&cell->volume_lock); + + pp = &cell->volumes.rb_node; + while (*pp) { + parent = *pp; + p = rb_entry(parent, struct afs_volume, cell_node); + if (p->vid < volume->vid) { + pp = &(*pp)->rb_left; + } else if (p->vid > volume->vid) { + pp = &(*pp)->rb_right; + } else { + volume = afs_get_volume(p, afs_volume_trace_get_cell_insert); + goto found; + } + } + + rb_link_node_rcu(&volume->cell_node, parent, pp); + rb_insert_color(&volume->cell_node, &cell->volumes); + hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes); + +found: + write_sequnlock(&cell->volume_lock); + return volume; + +} + +static void afs_remove_volume_from_cell(struct afs_volume *volume) +{ + struct afs_cell *cell = volume->cell; + + if (!hlist_unhashed(&volume->proc_link)) { + trace_afs_volume(volume->vid, atomic_read(&volume->usage), + afs_volume_trace_remove); + write_seqlock(&cell->volume_lock); + hlist_del_rcu(&volume->proc_link); + rb_erase(&volume->cell_node, &cell->volumes); + write_sequnlock(&cell->volume_lock); + } +} + +/* + * Allocate a volume record and load it up from a vldb record. + */ +static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, + struct afs_vldb_entry *vldb, + unsigned long type_mask) +{ + struct afs_server_list *slist; + struct afs_volume *volume; + int ret = -ENOMEM, nr_servers = 0, i; + + for (i = 0; i < vldb->nr_servers; i++) + if (vldb->fs_mask[i] & type_mask) + nr_servers++; + + volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); + if (!volume) + goto error_0; + + volume->vid = vldb->vid[params->type]; + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); + volume->type = params->type; + volume->type_force = params->force; + volume->name_len = vldb->name_len; + + atomic_set(&volume->usage, 1); + INIT_HLIST_NODE(&volume->proc_link); + rwlock_init(&volume->servers_lock); + rwlock_init(&volume->cb_v_break_lock); + memcpy(volume->name, vldb->name, vldb->name_len + 1); + + slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); + if (IS_ERR(slist)) { + ret = PTR_ERR(slist); + goto error_1; + } + + refcount_set(&slist->usage, 1); + rcu_assign_pointer(volume->servers, slist); + trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc); + return volume; + +error_1: + afs_put_cell(volume->cell, afs_cell_trace_put_vol); + kfree(volume); +error_0: + return ERR_PTR(ret); +} + +/* + * Look up or allocate a volume record. + */ +static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params, + struct afs_vldb_entry *vldb, + unsigned long type_mask) +{ + struct afs_volume *candidate, *volume; + + candidate = afs_alloc_volume(params, vldb, type_mask); + if (IS_ERR(candidate)) + return candidate; + + volume = afs_insert_volume_into_cell(params->cell, candidate); + if (volume != candidate) + afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup); + return volume; +} + +/* + * Look up a VLDB record for a volume. + */ +static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, + struct key *key, + const char *volname, + size_t volnamesz) +{ + struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ); + struct afs_vl_cursor vc; + int ret; + + if (!afs_begin_vlserver_operation(&vc, cell, key)) + return ERR_PTR(-ERESTARTSYS); + + while (afs_select_vlserver(&vc)) { + vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); + } + + ret = afs_end_vlserver_operation(&vc); + return ret < 0 ? ERR_PTR(ret) : vldb; +} + +/* + * Look up a volume in the VL server and create a candidate volume record for + * it. + * + * The volume name can be one of the following: + * "%[cell:]volume[.]" R/W volume + * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), + * or R/W (rwparent=1) volume + * "%[cell:]volume.readonly" R/O volume + * "#[cell:]volume.readonly" R/O volume + * "%[cell:]volume.backup" Backup volume + * "#[cell:]volume.backup" Backup volume + * + * The cell name is optional, and defaults to the current cell. + * + * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin + * Guide + * - Rule 1: Explicit type suffix forces access of that type or nothing + * (no suffix, then use Rule 2 & 3) + * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W + * if not available + * - Rule 3: If parent volume is R/W, then only mount R/W volume unless + * explicitly told otherwise + */ +struct afs_volume *afs_create_volume(struct afs_fs_context *params) +{ + struct afs_vldb_entry *vldb; + struct afs_volume *volume; + unsigned long type_mask = 1UL << params->type; + + vldb = afs_vl_lookup_vldb(params->cell, params->key, + params->volname, params->volnamesz); + if (IS_ERR(vldb)) + return ERR_CAST(vldb); + + if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { + volume = ERR_PTR(vldb->error); + goto error; + } + + /* Make the final decision on the type we want */ + volume = ERR_PTR(-ENOMEDIUM); + if (params->force) { + if (!(vldb->flags & type_mask)) + goto error; + } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { + params->type = AFSVL_ROVOL; + } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { + params->type = AFSVL_RWVOL; + } else { + goto error; + } + + type_mask = 1UL << params->type; + volume = afs_lookup_volume(params, vldb, type_mask); + +error: + kfree(vldb); + return volume; +} + +/* + * Destroy a volume record + */ +static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) +{ + _enter("%p", volume); + +#ifdef CONFIG_AFS_FSCACHE + ASSERTCMP(volume->cache, ==, NULL); +#endif + + afs_remove_volume_from_cell(volume); + afs_put_serverlist(net, rcu_access_pointer(volume->servers)); + afs_put_cell(volume->cell, afs_cell_trace_put_vol); + trace_afs_volume(volume->vid, atomic_read(&volume->usage), + afs_volume_trace_free); + kfree_rcu(volume, rcu); + + _leave(" [destroyed]"); +} + +/* + * Get a reference on a volume record. + */ +struct afs_volume *afs_get_volume(struct afs_volume *volume, + enum afs_volume_trace reason) +{ + if (volume) { + int u = atomic_inc_return(&volume->usage); + trace_afs_volume(volume->vid, u, reason); + } + return volume; +} + + +/* + * Drop a reference on a volume record. + */ +void afs_put_volume(struct afs_net *net, struct afs_volume *volume, + enum afs_volume_trace reason) +{ + if (volume) { + afs_volid_t vid = volume->vid; + int u = atomic_dec_return(&volume->usage); + trace_afs_volume(vid, u, reason); + if (u == 0) + afs_destroy_volume(net, volume); + } +} + +/* + * Activate a volume. + */ +void afs_activate_volume(struct afs_volume *volume) +{ +#ifdef CONFIG_AFS_FSCACHE + volume->cache = fscache_acquire_cookie(volume->cell->cache, + &afs_volume_cache_index_def, + &volume->vid, sizeof(volume->vid), + NULL, 0, + volume, 0, true); +#endif +} + +/* + * Deactivate a volume. + */ +void afs_deactivate_volume(struct afs_volume *volume) +{ + _enter("%s", volume->name); + +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(volume->cache, NULL, + test_bit(AFS_VOLUME_DELETED, &volume->flags)); + volume->cache = NULL; +#endif + + _leave(""); +} + +/* + * Query the VL service to update the volume status. + */ +static int afs_update_volume_status(struct afs_volume *volume, struct key *key) +{ + struct afs_server_list *new, *old, *discard; + struct afs_vldb_entry *vldb; + char idbuf[16]; + int ret, idsz; + + _enter(""); + + /* We look up an ID by passing it as a decimal string in the + * operation's name parameter. + */ + idsz = sprintf(idbuf, "%llu", volume->vid); + + vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); + if (IS_ERR(vldb)) { + ret = PTR_ERR(vldb); + goto error; + } + + /* See if the volume got renamed. */ + if (vldb->name_len != volume->name_len || + memcmp(vldb->name, volume->name, vldb->name_len) != 0) { + /* TODO: Use RCU'd string. */ + memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); + volume->name_len = vldb->name_len; + } + + /* See if the volume's server list got updated. */ + new = afs_alloc_server_list(volume->cell, key, + vldb, (1 << volume->type)); + if (IS_ERR(new)) { + ret = PTR_ERR(new); + goto error_vldb; + } + + write_lock(&volume->servers_lock); + + discard = new; + old = rcu_dereference_protected(volume->servers, + lockdep_is_held(&volume->servers_lock)); + if (afs_annotate_server_list(new, old)) { + new->seq = volume->servers_seq + 1; + rcu_assign_pointer(volume->servers, new); + smp_wmb(); + volume->servers_seq++; + discard = old; + } + + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + write_unlock(&volume->servers_lock); + ret = 0; + + afs_put_serverlist(volume->cell->net, discard); +error_vldb: + kfree(vldb); +error: + _leave(" = %d", ret); + return ret; +} + +/* + * Make sure the volume record is up to date. + */ +int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op) +{ + int ret, retries = 0; + + _enter(""); + +retry: + if (test_bit(AFS_VOLUME_WAIT, &volume->flags)) + goto wait; + if (volume->update_at <= ktime_get_real_seconds() || + test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags)) + goto update; + _leave(" = 0"); + return 0; + +update: + if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { + clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); + ret = afs_update_volume_status(volume, op->key); + if (ret < 0) + set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); + clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); + clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); + wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); + _leave(" = %d", ret); + return ret; + } + +wait: + if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { + _leave(" = 0 [no wait]"); + return 0; + } + + ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, + (op->flags & AFS_OPERATION_UNINTR) ? + TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + _leave(" = %d", ret); + return ret; + } + + retries++; + if (retries == 4) { + _leave(" = -ESTALE"); + return -ESTALE; + } + goto retry; +} diff --git a/fs/afs/write.c b/fs/afs/write.c new file mode 100644 index 000000000..be60cf110 --- /dev/null +++ b/fs/afs/write.c @@ -0,0 +1,948 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* handling of writes to regular files and writing back to the server + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/backing-dev.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/writeback.h> +#include <linux/pagevec.h> +#include "internal.h" + +/* + * mark a page as having been made dirty and thus needing writeback + */ +int afs_set_page_dirty(struct page *page) +{ + _enter(""); + return __set_page_dirty_nobuffers(page); +} + +/* + * partly or wholly fill a page that's under preparation for writing + */ +static int afs_fill_page(struct afs_vnode *vnode, struct key *key, + loff_t pos, unsigned int len, struct page *page) +{ + struct afs_read *req; + size_t p; + void *data; + int ret; + + _enter(",,%llu", (unsigned long long)pos); + + if (pos >= vnode->vfs_inode.i_size) { + p = pos & ~PAGE_MASK; + ASSERTCMP(p + len, <=, PAGE_SIZE); + data = kmap(page); + memset(data + p, 0, len); + kunmap(page); + return 0; + } + + req = kzalloc(struct_size(req, array, 1), GFP_KERNEL); + if (!req) + return -ENOMEM; + + refcount_set(&req->usage, 1); + req->pos = pos; + req->len = len; + req->nr_pages = 1; + req->pages = req->array; + req->pages[0] = page; + get_page(page); + + ret = afs_fetch_data(vnode, key, req); + afs_put_read(req); + if (ret < 0) { + if (ret == -ENOENT) { + _debug("got NOENT from server" + " - marking file deleted and stale"); + set_bit(AFS_VNODE_DELETED, &vnode->flags); + ret = -ESTALE; + } + } + + _leave(" = %d", ret); + return ret; +} + +/* + * prepare to perform part of a write to a page + */ +int afs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **_page, void **fsdata) +{ + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); + struct page *page; + struct key *key = afs_file_key(file); + unsigned long priv; + unsigned f, from = pos & (PAGE_SIZE - 1); + unsigned t, to = from + len; + pgoff_t index = pos >> PAGE_SHIFT; + int ret; + + _enter("{%llx:%llu},{%lx},%u,%u", + vnode->fid.vid, vnode->fid.vnode, index, from, to); + + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; + + if (!PageUptodate(page) && len != PAGE_SIZE) { + ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); + if (ret < 0) { + unlock_page(page); + put_page(page); + _leave(" = %d [prep]", ret); + return ret; + } + SetPageUptodate(page); + } + +try_again: + /* See if this page is already partially written in a way that we can + * merge the new write with. + */ + t = f = 0; + if (PagePrivate(page)) { + priv = page_private(page); + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); + ASSERTCMP(f, <=, t); + } + + if (f != t) { + if (PageWriteback(page)) { + trace_afs_page_dirty(vnode, tracepoint_string("alrdy"), + page->index, priv); + goto flush_conflicting_write; + } + /* If the file is being filled locally, allow inter-write + * spaces to be merged into writes. If it's not, only write + * back what the user gives us. + */ + if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && + (to < f || from > t)) + goto flush_conflicting_write; + } + + *_page = page; + _leave(" = 0"); + return 0; + + /* The previous write and this write aren't adjacent or overlapping, so + * flush the page out. + */ +flush_conflicting_write: + _debug("flush conflict"); + ret = write_one_page(page); + if (ret < 0) + goto error; + + ret = lock_page_killable(page); + if (ret < 0) + goto error; + goto try_again; + +error: + put_page(page); + _leave(" = %d", ret); + return ret; +} + +/* + * finalise part of a write to a page + */ +int afs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); + struct key *key = afs_file_key(file); + unsigned long priv; + unsigned int f, from = pos & (PAGE_SIZE - 1); + unsigned int t, to = from + copied; + loff_t i_size, maybe_i_size; + int ret = 0; + + _enter("{%llx:%llu},{%lx}", + vnode->fid.vid, vnode->fid.vnode, page->index); + + if (copied == 0) + goto out; + + maybe_i_size = pos + copied; + + i_size = i_size_read(&vnode->vfs_inode); + if (maybe_i_size > i_size) { + write_seqlock(&vnode->cb_lock); + i_size = i_size_read(&vnode->vfs_inode); + if (maybe_i_size > i_size) + afs_set_i_size(vnode, maybe_i_size); + write_sequnlock(&vnode->cb_lock); + } + + if (!PageUptodate(page)) { + if (copied < len) { + /* Try and load any missing data from the server. The + * unmarshalling routine will take care of clearing any + * bits that are beyond the EOF. + */ + ret = afs_fill_page(vnode, key, pos + copied, + len - copied, page); + if (ret < 0) + goto out; + } + SetPageUptodate(page); + } + + if (PagePrivate(page)) { + priv = page_private(page); + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); + if (from < f) + f = from; + if (to > t) + t = to; + priv = afs_page_dirty(f, t); + set_page_private(page, priv); + trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), + page->index, priv); + } else { + priv = afs_page_dirty(from, to); + attach_page_private(page, (void *)priv); + trace_afs_page_dirty(vnode, tracepoint_string("dirty"), + page->index, priv); + } + + set_page_dirty(page); + if (PageDirty(page)) + _debug("dirtied"); + ret = copied; + +out: + unlock_page(page); + put_page(page); + return ret; +} + +/* + * kill all the pages in the given range + */ +static void afs_kill_pages(struct address_space *mapping, + pgoff_t first, pgoff_t last) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct pagevec pv; + unsigned count, loop; + + _enter("{%llx:%llu},%lx-%lx", + vnode->fid.vid, vnode->fid.vnode, first, last); + + pagevec_init(&pv); + + do { + _debug("kill %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); + ASSERTCMP(pv.nr, ==, count); + + for (loop = 0; loop < count; loop++) { + struct page *page = pv.pages[loop]; + ClearPageUptodate(page); + SetPageError(page); + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; + lock_page(page); + generic_error_remove_page(mapping, page); + unlock_page(page); + } + + __pagevec_release(&pv); + } while (first <= last); + + _leave(""); +} + +/* + * Redirty all the pages in a given range. + */ +static void afs_redirty_pages(struct writeback_control *wbc, + struct address_space *mapping, + pgoff_t first, pgoff_t last) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct pagevec pv; + unsigned count, loop; + + _enter("{%llx:%llu},%lx-%lx", + vnode->fid.vid, vnode->fid.vnode, first, last); + + pagevec_init(&pv); + + do { + _debug("redirty %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); + ASSERTCMP(pv.nr, ==, count); + + for (loop = 0; loop < count; loop++) { + struct page *page = pv.pages[loop]; + + redirty_page_for_writepage(wbc, page); + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; + } + + __pagevec_release(&pv); + } while (first <= last); + + _leave(""); +} + +/* + * completion of write to server + */ +static void afs_pages_written_back(struct afs_vnode *vnode, + pgoff_t first, pgoff_t last) +{ + struct pagevec pv; + unsigned long priv; + unsigned count, loop; + + _enter("{%llx:%llu},{%lx-%lx}", + vnode->fid.vid, vnode->fid.vnode, first, last); + + pagevec_init(&pv); + + do { + _debug("done %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, + first, count, pv.pages); + ASSERTCMP(pv.nr, ==, count); + + for (loop = 0; loop < count; loop++) { + priv = (unsigned long)detach_page_private(pv.pages[loop]); + trace_afs_page_dirty(vnode, tracepoint_string("clear"), + pv.pages[loop]->index, priv); + end_page_writeback(pv.pages[loop]); + } + first += count; + __pagevec_release(&pv); + } while (first <= last); + + afs_prune_wb_keys(vnode); + _leave(""); +} + +/* + * Find a key to use for the writeback. We cached the keys used to author the + * writes on the vnode. *_wbk will contain the last writeback key used or NULL + * and we need to start from there if it's set. + */ +static int afs_get_writeback_key(struct afs_vnode *vnode, + struct afs_wb_key **_wbk) +{ + struct afs_wb_key *wbk = NULL; + struct list_head *p; + int ret = -ENOKEY, ret2; + + spin_lock(&vnode->wb_lock); + if (*_wbk) + p = (*_wbk)->vnode_link.next; + else + p = vnode->wb_keys.next; + + while (p != &vnode->wb_keys) { + wbk = list_entry(p, struct afs_wb_key, vnode_link); + _debug("wbk %u", key_serial(wbk->key)); + ret2 = key_validate(wbk->key); + if (ret2 == 0) { + refcount_inc(&wbk->usage); + _debug("USE WB KEY %u", key_serial(wbk->key)); + break; + } + + wbk = NULL; + if (ret == -ENOKEY) + ret = ret2; + p = p->next; + } + + spin_unlock(&vnode->wb_lock); + if (*_wbk) + afs_put_wb_key(*_wbk); + *_wbk = wbk; + return 0; +} + +static void afs_store_data_success(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + + op->ctime = op->file[0].scb.status.mtime_client; + afs_vnode_commit_status(op, &op->file[0]); + if (op->error == 0) { + if (!op->store.laundering) + afs_pages_written_back(vnode, op->store.first, op->store.last); + afs_stat_v(vnode, n_stores); + atomic_long_add((op->store.last * PAGE_SIZE + op->store.last_to) - + (op->store.first * PAGE_SIZE + op->store.first_offset), + &afs_v2net(vnode)->n_store_bytes); + } +} + +static const struct afs_operation_ops afs_store_data_operation = { + .issue_afs_rpc = afs_fs_store_data, + .issue_yfs_rpc = yfs_fs_store_data, + .success = afs_store_data_success, +}; + +/* + * write to a file + */ +static int afs_store_data(struct address_space *mapping, + pgoff_t first, pgoff_t last, + unsigned offset, unsigned to, bool laundering) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct afs_operation *op; + struct afs_wb_key *wbk = NULL; + int ret; + + _enter("%s{%llx:%llu.%u},%lx,%lx,%x,%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + first, last, offset, to); + + ret = afs_get_writeback_key(vnode, &wbk); + if (ret) { + _leave(" = %d [no keys]", ret); + return ret; + } + + op = afs_alloc_operation(wbk->key, vnode->volume); + if (IS_ERR(op)) { + afs_put_wb_key(wbk); + return -ENOMEM; + } + + afs_op_set_vnode(op, 0, vnode); + op->file[0].dv_delta = 1; + op->store.mapping = mapping; + op->file[0].modification = true; + op->store.first = first; + op->store.last = last; + op->store.first_offset = offset; + op->store.last_to = to; + op->store.laundering = laundering; + op->mtime = vnode->vfs_inode.i_mtime; + op->flags |= AFS_OPERATION_UNINTR; + op->ops = &afs_store_data_operation; + +try_next_key: + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + + switch (op->error) { + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + _debug("next"); + + ret = afs_get_writeback_key(vnode, &wbk); + if (ret == 0) { + key_put(op->key); + op->key = key_get(wbk->key); + goto try_next_key; + } + break; + } + + afs_put_wb_key(wbk); + _leave(" = %d", op->error); + return afs_put_operation(op); +} + +/* + * Synchronously write back the locked page and any subsequent non-locked dirty + * pages. + */ +static int afs_write_back_from_locked_page(struct address_space *mapping, + struct writeback_control *wbc, + struct page *primary_page, + pgoff_t final_page) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct page *pages[8], *page; + unsigned long count, priv; + unsigned n, offset, to, f, t; + pgoff_t start, first, last; + loff_t i_size, end; + int loop, ret; + + _enter(",%lx", primary_page->index); + + count = 1; + if (test_set_page_writeback(primary_page)) + BUG(); + + /* Find all consecutive lockable dirty pages that have contiguous + * written regions, stopping when we find a page that is not + * immediately lockable, is not dirty or is missing, or we reach the + * end of the range. + */ + start = primary_page->index; + priv = page_private(primary_page); + offset = afs_page_dirty_from(priv); + to = afs_page_dirty_to(priv); + trace_afs_page_dirty(vnode, tracepoint_string("store"), + primary_page->index, priv); + + WARN_ON(offset == to); + if (offset == to) + trace_afs_page_dirty(vnode, tracepoint_string("WARN"), + primary_page->index, priv); + + if (start >= final_page || + (to < PAGE_SIZE && !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags))) + goto no_more; + + start++; + do { + _debug("more %lx [%lx]", start, count); + n = final_page - start + 1; + if (n > ARRAY_SIZE(pages)) + n = ARRAY_SIZE(pages); + n = find_get_pages_contig(mapping, start, ARRAY_SIZE(pages), pages); + _debug("fgpc %u", n); + if (n == 0) + goto no_more; + if (pages[0]->index != start) { + do { + put_page(pages[--n]); + } while (n > 0); + goto no_more; + } + + for (loop = 0; loop < n; loop++) { + page = pages[loop]; + if (to != PAGE_SIZE && + !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) + break; + if (page->index > final_page) + break; + if (!trylock_page(page)) + break; + if (!PageDirty(page) || PageWriteback(page)) { + unlock_page(page); + break; + } + + priv = page_private(page); + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); + if (f != 0 && + !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) { + unlock_page(page); + break; + } + to = t; + + trace_afs_page_dirty(vnode, tracepoint_string("store+"), + page->index, priv); + + if (!clear_page_dirty_for_io(page)) + BUG(); + if (test_set_page_writeback(page)) + BUG(); + unlock_page(page); + put_page(page); + } + count += loop; + if (loop < n) { + for (; loop < n; loop++) + put_page(pages[loop]); + goto no_more; + } + + start += loop; + } while (start <= final_page && count < 65536); + +no_more: + /* We now have a contiguous set of dirty pages, each with writeback + * set; the first page is still locked at this point, but all the rest + * have been unlocked. + */ + unlock_page(primary_page); + + first = primary_page->index; + last = first + count - 1; + + end = (loff_t)last * PAGE_SIZE + to; + i_size = i_size_read(&vnode->vfs_inode); + + _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); + if (end > i_size) + to = i_size & ~PAGE_MASK; + + ret = afs_store_data(mapping, first, last, offset, to, false); + switch (ret) { + case 0: + ret = count; + break; + + default: + pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret); + fallthrough; + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + afs_redirty_pages(wbc, mapping, first, last); + mapping_set_error(mapping, ret); + break; + + case -EDQUOT: + case -ENOSPC: + afs_redirty_pages(wbc, mapping, first, last); + mapping_set_error(mapping, -ENOSPC); + break; + + case -EROFS: + case -EIO: + case -EREMOTEIO: + case -EFBIG: + case -ENOENT: + case -ENOMEDIUM: + case -ENXIO: + trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail); + afs_kill_pages(mapping, first, last); + mapping_set_error(mapping, ret); + break; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * write a page back to the server + * - the caller locked the page for us + */ +int afs_writepage(struct page *page, struct writeback_control *wbc) +{ + int ret; + + _enter("{%lx},", page->index); + + ret = afs_write_back_from_locked_page(page->mapping, wbc, page, + wbc->range_end >> PAGE_SHIFT); + if (ret < 0) { + _leave(" = %d", ret); + return 0; + } + + wbc->nr_to_write -= ret; + + _leave(" = 0"); + return 0; +} + +/* + * write a region of pages back to the server + */ +static int afs_writepages_region(struct address_space *mapping, + struct writeback_control *wbc, + pgoff_t index, pgoff_t end, pgoff_t *_next) +{ + struct page *page; + int ret, n; + + _enter(",,%lx,%lx,", index, end); + + do { + n = find_get_pages_range_tag(mapping, &index, end, + PAGECACHE_TAG_DIRTY, 1, &page); + if (!n) + break; + + _debug("wback %lx", page->index); + + /* + * at this point we hold neither the i_pages lock nor the + * page lock: the page may be truncated or invalidated + * (changing page->mapping to NULL), or even swizzled + * back from swapper_space to tmpfs file mapping + */ + ret = lock_page_killable(page); + if (ret < 0) { + put_page(page); + _leave(" = %d", ret); + return ret; + } + + if (page->mapping != mapping || !PageDirty(page)) { + unlock_page(page); + put_page(page); + continue; + } + + if (PageWriteback(page)) { + unlock_page(page); + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + put_page(page); + continue; + } + + if (!clear_page_dirty_for_io(page)) + BUG(); + ret = afs_write_back_from_locked_page(mapping, wbc, page, end); + put_page(page); + if (ret < 0) { + _leave(" = %d", ret); + return ret; + } + + wbc->nr_to_write -= ret; + + cond_resched(); + } while (index < end && wbc->nr_to_write > 0); + + *_next = index; + _leave(" = 0 [%lx]", *_next); + return 0; +} + +/* + * write some of the pending data back to the server + */ +int afs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + pgoff_t start, end, next; + int ret; + + _enter(""); + + /* We have to be careful as we can end up racing with setattr() + * truncating the pagecache since the caller doesn't take a lock here + * to prevent it. + */ + if (wbc->sync_mode == WB_SYNC_ALL) + down_read(&vnode->validate_lock); + else if (!down_read_trylock(&vnode->validate_lock)) + return 0; + + if (wbc->range_cyclic) { + start = mapping->writeback_index; + end = -1; + ret = afs_writepages_region(mapping, wbc, start, end, &next); + if (start > 0 && wbc->nr_to_write > 0 && ret == 0) + ret = afs_writepages_region(mapping, wbc, 0, start, + &next); + mapping->writeback_index = next; + } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { + end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT); + ret = afs_writepages_region(mapping, wbc, 0, end, &next); + if (wbc->nr_to_write > 0) + mapping->writeback_index = next; + } else { + start = wbc->range_start >> PAGE_SHIFT; + end = wbc->range_end >> PAGE_SHIFT; + ret = afs_writepages_region(mapping, wbc, start, end, &next); + } + + up_read(&vnode->validate_lock); + _leave(" = %d", ret); + return ret; +} + +/* + * write to an AFS file + */ +ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) +{ + struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); + ssize_t result; + size_t count = iov_iter_count(from); + + _enter("{%llx:%llu},{%zu},", + vnode->fid.vid, vnode->fid.vnode, count); + + if (IS_SWAPFILE(&vnode->vfs_inode)) { + printk(KERN_INFO + "AFS: Attempt to write to active swap file!\n"); + return -EBUSY; + } + + if (!count) + return 0; + + result = generic_file_write_iter(iocb, from); + + _leave(" = %zd", result); + return result; +} + +/* + * flush any dirty pages for this process, and check for write errors. + * - the return status from this call provides a reliable indication of + * whether any write errors occurred for this process. + */ +int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct inode *inode = file_inode(file); + struct afs_vnode *vnode = AFS_FS_I(inode); + + _enter("{%llx:%llu},{n=%pD},%d", + vnode->fid.vid, vnode->fid.vnode, file, + datasync); + + return file_write_and_wait_range(file, start, end); +} + +/* + * notification that a previously read-only page is about to become writable + * - if it returns an error, the caller will deliver a bus error signal + */ +vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) +{ + struct file *file = vmf->vma->vm_file; + struct inode *inode = file_inode(file); + struct afs_vnode *vnode = AFS_FS_I(inode); + unsigned long priv; + + _enter("{{%llx:%llu}},{%lx}", + vnode->fid.vid, vnode->fid.vnode, vmf->page->index); + + sb_start_pagefault(inode->i_sb); + + /* Wait for the page to be written to the cache before we allow it to + * be modified. We then assume the entire page will need writing back. + */ +#ifdef CONFIG_AFS_FSCACHE + fscache_wait_on_page_write(vnode->cache, vmf->page); +#endif + + if (PageWriteback(vmf->page) && + wait_on_page_bit_killable(vmf->page, PG_writeback) < 0) + return VM_FAULT_RETRY; + + if (lock_page_killable(vmf->page) < 0) + return VM_FAULT_RETRY; + + /* We mustn't change page->private until writeback is complete as that + * details the portion of the page we need to write back and we might + * need to redirty the page if there's a problem. + */ + wait_on_page_writeback(vmf->page); + + priv = afs_page_dirty(0, PAGE_SIZE); + priv = afs_page_dirty_mmapped(priv); + trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), + vmf->page->index, priv); + if (PagePrivate(vmf->page)) + set_page_private(vmf->page, priv); + else + attach_page_private(vmf->page, (void *)priv); + file_update_time(file); + + sb_end_pagefault(inode->i_sb); + return VM_FAULT_LOCKED; +} + +/* + * Prune the keys cached for writeback. The caller must hold vnode->wb_lock. + */ +void afs_prune_wb_keys(struct afs_vnode *vnode) +{ + LIST_HEAD(graveyard); + struct afs_wb_key *wbk, *tmp; + + /* Discard unused keys */ + spin_lock(&vnode->wb_lock); + + if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) && + !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) { + list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) { + if (refcount_read(&wbk->usage) == 1) + list_move(&wbk->vnode_link, &graveyard); + } + } + + spin_unlock(&vnode->wb_lock); + + while (!list_empty(&graveyard)) { + wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link); + list_del(&wbk->vnode_link); + afs_put_wb_key(wbk); + } +} + +/* + * Clean up a page during invalidation. + */ +int afs_launder_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + unsigned long priv; + unsigned int f, t; + int ret = 0; + + _enter("{%lx}", page->index); + + priv = page_private(page); + if (clear_page_dirty_for_io(page)) { + f = 0; + t = PAGE_SIZE; + if (PagePrivate(page)) { + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); + } + + trace_afs_page_dirty(vnode, tracepoint_string("launder"), + page->index, priv); + ret = afs_store_data(mapping, page->index, page->index, t, f, true); + } + + priv = (unsigned long)detach_page_private(page); + trace_afs_page_dirty(vnode, tracepoint_string("laundered"), + page->index, priv); + +#ifdef CONFIG_AFS_FSCACHE + if (PageFsCache(page)) { + fscache_wait_on_page_write(vnode->cache, page); + fscache_uncache_page(vnode->cache, page); + } +#endif + return ret; +} diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c new file mode 100644 index 000000000..6a29337bd --- /dev/null +++ b/fs/afs/xattr.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Extended attribute handling for AFS. We use xattrs to get and set metadata + * instead of providing pioctl(). + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/xattr.h> +#include "internal.h" + +/* + * Deal with the result of a successful fetch ACL operation. + */ +static void afs_acl_success(struct afs_operation *op) +{ + afs_vnode_commit_status(op, &op->file[0]); +} + +static void afs_acl_put(struct afs_operation *op) +{ + kfree(op->acl); +} + +static const struct afs_operation_ops afs_fetch_acl_operation = { + .issue_afs_rpc = afs_fs_fetch_acl, + .success = afs_acl_success, + .put = afs_acl_put, +}; + +/* + * Get a file's ACL. + */ +static int afs_xattr_get_acl(const struct xattr_handler *handler, + struct dentry *dentry, + struct inode *inode, const char *name, + void *buffer, size_t size) +{ + struct afs_operation *op; + struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_acl *acl = NULL; + int ret; + + op = afs_alloc_operation(NULL, vnode->volume); + if (IS_ERR(op)) + return -ENOMEM; + + afs_op_set_vnode(op, 0, vnode); + op->ops = &afs_fetch_acl_operation; + + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + acl = op->acl; + op->acl = NULL; + ret = afs_put_operation(op); + + if (ret == 0) { + ret = acl->size; + if (size > 0) { + if (acl->size <= size) + memcpy(buffer, acl->data, acl->size); + else + ret = -ERANGE; + } + } + + kfree(acl); + return ret; +} + +static bool afs_make_acl(struct afs_operation *op, + const void *buffer, size_t size) +{ + struct afs_acl *acl; + + acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL); + if (!acl) { + afs_op_nomem(op); + return false; + } + + acl->size = size; + memcpy(acl->data, buffer, size); + op->acl = acl; + return true; +} + +static const struct afs_operation_ops afs_store_acl_operation = { + .issue_afs_rpc = afs_fs_store_acl, + .success = afs_acl_success, + .put = afs_acl_put, +}; + +/* + * Set a file's AFS3 ACL. + */ +static int afs_xattr_set_acl(const struct xattr_handler *handler, + struct dentry *dentry, + struct inode *inode, const char *name, + const void *buffer, size_t size, int flags) +{ + struct afs_operation *op; + struct afs_vnode *vnode = AFS_FS_I(inode); + + if (flags == XATTR_CREATE) + return -EINVAL; + + op = afs_alloc_operation(NULL, vnode->volume); + if (IS_ERR(op)) + return -ENOMEM; + + afs_op_set_vnode(op, 0, vnode); + if (!afs_make_acl(op, buffer, size)) + return afs_put_operation(op); + + op->ops = &afs_store_acl_operation; + return afs_do_sync_operation(op); +} + +static const struct xattr_handler afs_xattr_afs_acl_handler = { + .name = "afs.acl", + .get = afs_xattr_get_acl, + .set = afs_xattr_set_acl, +}; + +static const struct afs_operation_ops yfs_fetch_opaque_acl_operation = { + .issue_yfs_rpc = yfs_fs_fetch_opaque_acl, + .success = afs_acl_success, + /* Don't free op->yacl in .put here */ +}; + +/* + * Get a file's YFS ACL. + */ +static int afs_xattr_get_yfs(const struct xattr_handler *handler, + struct dentry *dentry, + struct inode *inode, const char *name, + void *buffer, size_t size) +{ + struct afs_operation *op; + struct afs_vnode *vnode = AFS_FS_I(inode); + struct yfs_acl *yacl = NULL; + char buf[16], *data; + int which = 0, dsize, ret = -ENOMEM; + + if (strcmp(name, "acl") == 0) + which = 0; + else if (strcmp(name, "acl_inherited") == 0) + which = 1; + else if (strcmp(name, "acl_num_cleaned") == 0) + which = 2; + else if (strcmp(name, "vol_acl") == 0) + which = 3; + else + return -EOPNOTSUPP; + + yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL); + if (!yacl) + goto error; + + if (which == 0) + yacl->flags |= YFS_ACL_WANT_ACL; + else if (which == 3) + yacl->flags |= YFS_ACL_WANT_VOL_ACL; + + op = afs_alloc_operation(NULL, vnode->volume); + if (IS_ERR(op)) + goto error_yacl; + + afs_op_set_vnode(op, 0, vnode); + op->yacl = yacl; + op->ops = &yfs_fetch_opaque_acl_operation; + + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + ret = afs_put_operation(op); + + if (ret == 0) { + switch (which) { + case 0: + data = yacl->acl->data; + dsize = yacl->acl->size; + break; + case 1: + data = buf; + dsize = scnprintf(buf, sizeof(buf), "%u", yacl->inherit_flag); + break; + case 2: + data = buf; + dsize = scnprintf(buf, sizeof(buf), "%u", yacl->num_cleaned); + break; + case 3: + data = yacl->vol_acl->data; + dsize = yacl->vol_acl->size; + break; + default: + ret = -EOPNOTSUPP; + goto error_yacl; + } + + ret = dsize; + if (size > 0) { + if (dsize <= size) + memcpy(buffer, data, dsize); + else + ret = -ERANGE; + } + } else if (ret == -ENOTSUPP) { + ret = -ENODATA; + } + +error_yacl: + yfs_free_opaque_acl(yacl); +error: + return ret; +} + +static const struct afs_operation_ops yfs_store_opaque_acl2_operation = { + .issue_yfs_rpc = yfs_fs_store_opaque_acl2, + .success = afs_acl_success, + .put = afs_acl_put, +}; + +/* + * Set a file's YFS ACL. + */ +static int afs_xattr_set_yfs(const struct xattr_handler *handler, + struct dentry *dentry, + struct inode *inode, const char *name, + const void *buffer, size_t size, int flags) +{ + struct afs_operation *op; + struct afs_vnode *vnode = AFS_FS_I(inode); + int ret; + + if (flags == XATTR_CREATE || + strcmp(name, "acl") != 0) + return -EINVAL; + + op = afs_alloc_operation(NULL, vnode->volume); + if (IS_ERR(op)) + return -ENOMEM; + + afs_op_set_vnode(op, 0, vnode); + if (!afs_make_acl(op, buffer, size)) + return afs_put_operation(op); + + op->ops = &yfs_store_opaque_acl2_operation; + ret = afs_do_sync_operation(op); + if (ret == -ENOTSUPP) + ret = -ENODATA; + return ret; +} + +static const struct xattr_handler afs_xattr_yfs_handler = { + .prefix = "afs.yfs.", + .get = afs_xattr_get_yfs, + .set = afs_xattr_set_yfs, +}; + +/* + * Get the name of the cell on which a file resides. + */ +static int afs_xattr_get_cell(const struct xattr_handler *handler, + struct dentry *dentry, + struct inode *inode, const char *name, + void *buffer, size_t size) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_cell *cell = vnode->volume->cell; + size_t namelen; + + namelen = cell->name_len; + if (size == 0) + return namelen; + if (namelen > size) + return -ERANGE; + memcpy(buffer, cell->name, namelen); + return namelen; +} + +static const struct xattr_handler afs_xattr_afs_cell_handler = { + .name = "afs.cell", + .get = afs_xattr_get_cell, +}; + +/* + * Get the volume ID, vnode ID and vnode uniquifier of a file as a sequence of + * hex numbers separated by colons. + */ +static int afs_xattr_get_fid(const struct xattr_handler *handler, + struct dentry *dentry, + struct inode *inode, const char *name, + void *buffer, size_t size) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + char text[16 + 1 + 24 + 1 + 8 + 1]; + size_t len; + + /* The volume ID is 64-bit, the vnode ID is 96-bit and the + * uniquifier is 32-bit. + */ + len = scnprintf(text, sizeof(text), "%llx:", vnode->fid.vid); + if (vnode->fid.vnode_hi) + len += scnprintf(text + len, sizeof(text) - len, "%x%016llx", + vnode->fid.vnode_hi, vnode->fid.vnode); + else + len += scnprintf(text + len, sizeof(text) - len, "%llx", + vnode->fid.vnode); + len += scnprintf(text + len, sizeof(text) - len, ":%x", + vnode->fid.unique); + + if (size == 0) + return len; + if (len > size) + return -ERANGE; + memcpy(buffer, text, len); + return len; +} + +static const struct xattr_handler afs_xattr_afs_fid_handler = { + .name = "afs.fid", + .get = afs_xattr_get_fid, +}; + +/* + * Get the name of the volume on which a file resides. + */ +static int afs_xattr_get_volume(const struct xattr_handler *handler, + struct dentry *dentry, + struct inode *inode, const char *name, + void *buffer, size_t size) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + const char *volname = vnode->volume->name; + size_t namelen; + + namelen = strlen(volname); + if (size == 0) + return namelen; + if (namelen > size) + return -ERANGE; + memcpy(buffer, volname, namelen); + return namelen; +} + +static const struct xattr_handler afs_xattr_afs_volume_handler = { + .name = "afs.volume", + .get = afs_xattr_get_volume, +}; + +const struct xattr_handler *afs_xattr_handlers[] = { + &afs_xattr_afs_acl_handler, + &afs_xattr_afs_cell_handler, + &afs_xattr_afs_fid_handler, + &afs_xattr_afs_volume_handler, + &afs_xattr_yfs_handler, /* afs.yfs. prefix */ + NULL +}; diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h new file mode 100644 index 000000000..94f1f398e --- /dev/null +++ b/fs/afs/xdr_fs.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* AFS fileserver XDR types + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef XDR_FS_H +#define XDR_FS_H + +struct afs_xdr_AFSFetchStatus { + __be32 if_version; +#define AFS_FSTATUS_VERSION 1 + __be32 type; + __be32 nlink; + __be32 size_lo; + __be32 data_version_lo; + __be32 author; + __be32 owner; + __be32 caller_access; + __be32 anon_access; + __be32 mode; + __be32 parent_vnode; + __be32 parent_unique; + __be32 seg_size; + __be32 mtime_client; + __be32 mtime_server; + __be32 group; + __be32 sync_counter; + __be32 data_version_hi; + __be32 lock_count; + __be32 size_hi; + __be32 abort_code; +} __packed; + +#define AFS_DIR_HASHTBL_SIZE 128 +#define AFS_DIR_DIRENT_SIZE 32 +#define AFS_DIR_SLOTS_PER_BLOCK 64 +#define AFS_DIR_BLOCK_SIZE 2048 +#define AFS_DIR_BLOCKS_PER_PAGE (PAGE_SIZE / AFS_DIR_BLOCK_SIZE) +#define AFS_DIR_MAX_SLOTS 65536 +#define AFS_DIR_BLOCKS_WITH_CTR 128 +#define AFS_DIR_MAX_BLOCKS 1023 +#define AFS_DIR_RESV_BLOCKS 1 +#define AFS_DIR_RESV_BLOCKS0 13 + +/* + * Directory entry structure. + */ +union afs_xdr_dirent { + struct { + u8 valid; + u8 unused[1]; + __be16 hash_next; + __be32 vnode; + __be32 unique; + u8 name[16]; + u8 overflow[4]; /* if any char of the name (inc + * NUL) reaches here, consume + * the next dirent too */ + } u; + u8 extended_name[32]; +} __packed; + +/* + * Directory block header (one at the beginning of every 2048-byte block). + */ +struct afs_xdr_dir_hdr { + __be16 npages; + __be16 magic; +#define AFS_DIR_MAGIC htons(1234) + u8 reserved; + u8 bitmap[8]; + u8 pad[19]; +} __packed; + +/* + * Directory block layout + */ +union afs_xdr_dir_block { + struct afs_xdr_dir_hdr hdr; + + struct { + struct afs_xdr_dir_hdr hdr; + u8 alloc_ctrs[AFS_DIR_MAX_BLOCKS]; + __be16 hashtable[AFS_DIR_HASHTBL_SIZE]; + } meta; + + union afs_xdr_dirent dirents[AFS_DIR_SLOTS_PER_BLOCK]; +} __packed; + +/* + * Directory layout on a linux VM page. + */ +struct afs_xdr_dir_page { + union afs_xdr_dir_block blocks[AFS_DIR_BLOCKS_PER_PAGE]; +}; + +#endif /* XDR_FS_H */ diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c new file mode 100644 index 000000000..5b2ef5ffd --- /dev/null +++ b/fs/afs/yfsclient.c @@ -0,0 +1,1997 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* YFS File Server client stubs + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/circ_buf.h> +#include <linux/iversion.h> +#include "internal.h" +#include "afs_fs.h" +#include "xdr_fs.h" +#include "protocol_yfs.h" + +#define xdr_size(x) (sizeof(*x) / sizeof(__be32)) + +static void xdr_decode_YFSFid(const __be32 **_bp, struct afs_fid *fid) +{ + const struct yfs_xdr_YFSFid *x = (const void *)*_bp; + + fid->vid = xdr_to_u64(x->volume); + fid->vnode = xdr_to_u64(x->vnode.lo); + fid->vnode_hi = ntohl(x->vnode.hi); + fid->unique = ntohl(x->vnode.unique); + *_bp += xdr_size(x); +} + +static __be32 *xdr_encode_u32(__be32 *bp, u32 n) +{ + *bp++ = htonl(n); + return bp; +} + +static __be32 *xdr_encode_u64(__be32 *bp, u64 n) +{ + struct yfs_xdr_u64 *x = (void *)bp; + + *x = u64_to_xdr(n); + return bp + xdr_size(x); +} + +static __be32 *xdr_encode_YFSFid(__be32 *bp, struct afs_fid *fid) +{ + struct yfs_xdr_YFSFid *x = (void *)bp; + + x->volume = u64_to_xdr(fid->vid); + x->vnode.lo = u64_to_xdr(fid->vnode); + x->vnode.hi = htonl(fid->vnode_hi); + x->vnode.unique = htonl(fid->unique); + return bp + xdr_size(x); +} + +static size_t xdr_strlen(unsigned int len) +{ + return sizeof(__be32) + round_up(len, sizeof(__be32)); +} + +static __be32 *xdr_encode_string(__be32 *bp, const char *p, unsigned int len) +{ + bp = xdr_encode_u32(bp, len); + bp = memcpy(bp, p, len); + if (len & 3) { + unsigned int pad = 4 - (len & 3); + + memset((u8 *)bp + len, 0, pad); + len += pad; + } + + return bp + len / sizeof(__be32); +} + +static __be32 *xdr_encode_name(__be32 *bp, const struct qstr *p) +{ + return xdr_encode_string(bp, p->name, p->len); +} + +static s64 linux_to_yfs_time(const struct timespec64 *t) +{ + /* Convert to 100ns intervals. */ + return (u64)t->tv_sec * 10000000 + t->tv_nsec/100; +} + +static __be32 *xdr_encode_YFSStoreStatus_mode(__be32 *bp, mode_t mode) +{ + struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + + x->mask = htonl(AFS_SET_MODE); + x->mode = htonl(mode & S_IALLUGO); + x->mtime_client = u64_to_xdr(0); + x->owner = u64_to_xdr(0); + x->group = u64_to_xdr(0); + return bp + xdr_size(x); +} + +static __be32 *xdr_encode_YFSStoreStatus_mtime(__be32 *bp, const struct timespec64 *t) +{ + struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + s64 mtime = linux_to_yfs_time(t); + + x->mask = htonl(AFS_SET_MTIME); + x->mode = htonl(0); + x->mtime_client = u64_to_xdr(mtime); + x->owner = u64_to_xdr(0); + x->group = u64_to_xdr(0); + return bp + xdr_size(x); +} + +/* + * Convert a signed 100ns-resolution 64-bit time into a timespec. + */ +static struct timespec64 yfs_time_to_linux(s64 t) +{ + struct timespec64 ts; + u64 abs_t; + + /* + * Unfortunately can not use normal 64 bit division on 32 bit arch, but + * the alternative, do_div, does not work with negative numbers so have + * to special case them + */ + if (t < 0) { + abs_t = -t; + ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100); + ts.tv_nsec = -ts.tv_nsec; + ts.tv_sec = -abs_t; + } else { + abs_t = t; + ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100; + ts.tv_sec = abs_t; + } + + return ts; +} + +static struct timespec64 xdr_to_time(const struct yfs_xdr_u64 xdr) +{ + s64 t = xdr_to_u64(xdr); + + return yfs_time_to_linux(t); +} + +static void yfs_check_req(struct afs_call *call, __be32 *bp) +{ + size_t len = (void *)bp - call->request; + + if (len > call->request_size) + pr_err("kAFS: %s: Request buffer overflow (%zu>%u)\n", + call->type->name, len, call->request_size); + else if (len < call->request_size) + pr_warn("kAFS: %s: Request buffer underflow (%zu<%u)\n", + call->type->name, len, call->request_size); +} + +/* + * Dump a bad file status record. + */ +static void xdr_dump_bad(const __be32 *bp) +{ + __be32 x[4]; + int i; + + pr_notice("YFS XDR: Bad status record\n"); + for (i = 0; i < 6 * 4 * 4; i += 16) { + memcpy(x, bp, 16); + bp += 4; + pr_notice("%03x: %08x %08x %08x %08x\n", + i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); + } + + memcpy(x, bp, 8); + pr_notice("0x60: %08x %08x\n", ntohl(x[0]), ntohl(x[1])); +} + +/* + * Decode a YFSFetchStatus block + */ +static void xdr_decode_YFSFetchStatus(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) +{ + const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp; + struct afs_file_status *status = &scb->status; + u32 type; + + status->abort_code = ntohl(xdr->abort_code); + if (status->abort_code != 0) { + if (status->abort_code == VNOVNODE) + status->nlink = 0; + scb->have_error = true; + goto advance; + } + + type = ntohl(xdr->type); + switch (type) { + case AFS_FTYPE_FILE: + case AFS_FTYPE_DIR: + case AFS_FTYPE_SYMLINK: + status->type = type; + break; + default: + goto bad; + } + + status->nlink = ntohl(xdr->nlink); + status->author = xdr_to_u64(xdr->author); + status->owner = xdr_to_u64(xdr->owner); + status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */ + status->anon_access = ntohl(xdr->anon_access); + status->mode = ntohl(xdr->mode) & S_IALLUGO; + status->group = xdr_to_u64(xdr->group); + status->lock_count = ntohl(xdr->lock_count); + + status->mtime_client = xdr_to_time(xdr->mtime_client); + status->mtime_server = xdr_to_time(xdr->mtime_server); + status->size = xdr_to_u64(xdr->size); + status->data_version = xdr_to_u64(xdr->data_version); + scb->have_status = true; +advance: + *_bp += xdr_size(xdr); + return; + +bad: + xdr_dump_bad(*_bp); + afs_protocol_error(call, afs_eproto_bad_status); + goto advance; +} + +/* + * Decode a YFSCallBack block + */ +static void xdr_decode_YFSCallBack(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) +{ + struct yfs_xdr_YFSCallBack *x = (void *)*_bp; + struct afs_callback *cb = &scb->callback; + ktime_t cb_expiry; + + cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100); + cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC); + scb->have_cb = true; + *_bp += xdr_size(x); +} + +/* + * Decode a YFSVolSync block + */ +static void xdr_decode_YFSVolSync(const __be32 **_bp, + struct afs_volsync *volsync) +{ + struct yfs_xdr_YFSVolSync *x = (void *)*_bp; + u64 creation; + + if (volsync) { + creation = xdr_to_u64(x->vol_creation_date); + do_div(creation, 10 * 1000 * 1000); + volsync->creation = creation; + } + + *_bp += xdr_size(x); +} + +/* + * Encode the requested attributes into a YFSStoreStatus block + */ +static __be32 *xdr_encode_YFS_StoreStatus(__be32 *bp, struct iattr *attr) +{ + struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + s64 mtime = 0, owner = 0, group = 0; + u32 mask = 0, mode = 0; + + mask = 0; + if (attr->ia_valid & ATTR_MTIME) { + mask |= AFS_SET_MTIME; + mtime = linux_to_yfs_time(&attr->ia_mtime); + } + + if (attr->ia_valid & ATTR_UID) { + mask |= AFS_SET_OWNER; + owner = from_kuid(&init_user_ns, attr->ia_uid); + } + + if (attr->ia_valid & ATTR_GID) { + mask |= AFS_SET_GROUP; + group = from_kgid(&init_user_ns, attr->ia_gid); + } + + if (attr->ia_valid & ATTR_MODE) { + mask |= AFS_SET_MODE; + mode = attr->ia_mode & S_IALLUGO; + } + + x->mask = htonl(mask); + x->mode = htonl(mode); + x->mtime_client = u64_to_xdr(mtime); + x->owner = u64_to_xdr(owner); + x->group = u64_to_xdr(group); + return bp + xdr_size(x); +} + +/* + * Decode a YFSFetchVolumeStatus block. + */ +static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp, + struct afs_volume_status *vs) +{ + const struct yfs_xdr_YFSFetchVolumeStatus *x = (const void *)*_bp; + u32 flags; + + vs->vid = xdr_to_u64(x->vid); + vs->parent_id = xdr_to_u64(x->parent_id); + flags = ntohl(x->flags); + vs->online = flags & yfs_FVSOnline; + vs->in_service = flags & yfs_FVSInservice; + vs->blessed = flags & yfs_FVSBlessed; + vs->needs_salvage = flags & yfs_FVSNeedsSalvage; + vs->type = ntohl(x->type); + vs->min_quota = 0; + vs->max_quota = xdr_to_u64(x->max_quota); + vs->blocks_in_use = xdr_to_u64(x->blocks_in_use); + vs->part_blocks_avail = xdr_to_u64(x->part_blocks_avail); + vs->part_max_blocks = xdr_to_u64(x->part_max_blocks); + vs->vol_copy_date = xdr_to_u64(x->vol_copy_date); + vs->vol_backup_date = xdr_to_u64(x->vol_backup_date); + *_bp += sizeof(*x) / sizeof(__be32); +} + +/* + * Deliver reply data to operations that just return a file status and a volume + * sync record. + */ +static int yfs_deliver_status_and_volsync(struct afs_call *call) +{ + struct afs_operation *op = call->op; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSFetchStatus(&bp, call, &op->file[0].scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * Deliver reply data to an YFS.FetchData64. + */ +static int yfs_deliver_fs_fetch_data64(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; + struct afs_read *req = op->fetch.req; + const __be32 *bp; + unsigned int size; + int ret; + + _enter("{%u,%zu/%llu}", + call->unmarshall, iov_iter_count(call->iter), req->actual_len); + + switch (call->unmarshall) { + case 0: + req->actual_len = 0; + req->index = 0; + req->offset = req->pos & (PAGE_SIZE - 1); + afs_extract_to_tmp64(call); + call->unmarshall++; + fallthrough; + + /* extract the returned data length */ + case 1: + _debug("extract data length"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + req->actual_len = be64_to_cpu(call->tmp64); + _debug("DATA length: %llu", req->actual_len); + req->remain = min(req->len, req->actual_len); + if (req->remain == 0) + goto no_more_data; + + call->unmarshall++; + + begin_page: + ASSERTCMP(req->index, <, req->nr_pages); + if (req->remain > PAGE_SIZE - req->offset) + size = PAGE_SIZE - req->offset; + else + size = req->remain; + call->bvec[0].bv_len = size; + call->bvec[0].bv_offset = req->offset; + call->bvec[0].bv_page = req->pages[req->index]; + iov_iter_bvec(&call->def_iter, READ, call->bvec, 1, size); + ASSERTCMP(size, <=, PAGE_SIZE); + fallthrough; + + /* extract the returned data */ + case 2: + _debug("extract data %zu/%llu", + iov_iter_count(call->iter), req->remain); + + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + req->remain -= call->bvec[0].bv_len; + req->offset += call->bvec[0].bv_len; + ASSERTCMP(req->offset, <=, PAGE_SIZE); + if (req->offset == PAGE_SIZE) { + req->offset = 0; + req->index++; + if (req->remain > 0) + goto begin_page; + } + + ASSERTCMP(req->remain, ==, 0); + if (req->actual_len <= req->len) + goto no_more_data; + + /* Discard any excess data the server gave us */ + afs_extract_discard(call, req->actual_len - req->len); + call->unmarshall = 3; + fallthrough; + + case 3: + _debug("extract discard %zu/%llu", + iov_iter_count(call->iter), req->actual_len - req->len); + + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + no_more_data: + call->unmarshall = 4; + afs_extract_to_buf(call, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + fallthrough; + + /* extract the metadata */ + case 4: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSCallBack(&bp, call, &vp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + + req->data_version = vp->scb.status.data_version; + req->file_size = vp->scb.status.size; + + call->unmarshall++; + fallthrough; + + case 5: + break; + } + + for (; req->index < req->nr_pages; req->index++) { + if (req->offset < PAGE_SIZE) + zero_user_segment(req->pages[req->index], + req->offset, PAGE_SIZE); + req->offset = 0; + } + + if (req->page_done) + for (req->index = 0; req->index < req->nr_pages; req->index++) + req->page_done(req); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.FetchData64 operation type + */ +static const struct afs_call_type yfs_RXYFSFetchData64 = { + .name = "YFS.FetchData64", + .op = yfs_FS_FetchData64, + .deliver = yfs_deliver_fs_fetch_data64, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch data from a file. + */ +void yfs_fs_fetch_data(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_read *req = op->fetch.req; + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%llx:%llu},%llx,%llx", + key_serial(op->key), vp->fid.vid, vp->fid.vnode, + req->pos, req->len); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchData64, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_u64) * 2, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSFETCHDATA64); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + bp = xdr_encode_u64(bp, req->pos); + bp = xdr_encode_u64(bp, req->len); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data for YFS.CreateFile or YFS.MakeDir. + */ +static int yfs_deliver_fs_create_vnode(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_YFSFid(&bp, &op->file[1].fid); + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_YFSCallBack(&bp, call, &vp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.CreateFile and FS.MakeDir operation type + */ +static const struct afs_call_type afs_RXFSCreateFile = { + .name = "YFS.CreateFile", + .op = yfs_FS_CreateFile, + .deliver = yfs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Create a file. + */ +void yfs_fs_create_file(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t reqsz, rplsz; + __be32 *bp; + + _enter(""); + + reqsz = (sizeof(__be32) + + sizeof(__be32) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(name->len) + + sizeof(struct yfs_xdr_YFSStoreStatus) + + sizeof(__be32)); + rplsz = (sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + + call = afs_alloc_flat_call(op->net, &afs_RXFSCreateFile, reqsz, rplsz); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSCREATEFILE); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + bp = xdr_encode_YFSStoreStatus_mode(bp, op->create.mode); + bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */ + yfs_check_req(call, bp); + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +static const struct afs_call_type yfs_RXFSMakeDir = { + .name = "YFS.MakeDir", + .op = yfs_FS_MakeDir, + .deliver = yfs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Make a directory. + */ +void yfs_fs_make_dir(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t reqsz, rplsz; + __be32 *bp; + + _enter(""); + + reqsz = (sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(name->len) + + sizeof(struct yfs_xdr_YFSStoreStatus)); + rplsz = (sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + + call = afs_alloc_flat_call(op->net, &yfs_RXFSMakeDir, reqsz, rplsz); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSMAKEDIR); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + bp = xdr_encode_YFSStoreStatus_mode(bp, op->create.mode); + yfs_check_req(call, bp); + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to a YFS.RemoveFile2 operation. + */ +static int yfs_deliver_fs_remove_file2(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + struct afs_fid fid; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_YFSFid(&bp, &fid); + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + /* Was deleted if vnode->status.abort_code == VNOVNODE. */ + + xdr_decode_YFSVolSync(&bp, &op->volsync); + return 0; +} + +static void yfs_done_fs_remove_file2(struct afs_call *call) +{ + if (call->error == -ECONNABORTED && + call->abort_code == RX_INVALID_OPERATION) { + set_bit(AFS_SERVER_FL_NO_RM2, &call->server->flags); + call->op->flags |= AFS_OPERATION_DOWNGRADE; + } +} + +/* + * YFS.RemoveFile2 operation type. + */ +static const struct afs_call_type yfs_RXYFSRemoveFile2 = { + .name = "YFS.RemoveFile2", + .op = yfs_FS_RemoveFile2, + .deliver = yfs_deliver_fs_remove_file2, + .done = yfs_done_fs_remove_file2, + .destructor = afs_flat_call_destructor, +}; + +/* + * Remove a file and retrieve new file status. + */ +void yfs_fs_remove_file2(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + const struct qstr *name = &op->dentry->d_name; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveFile2, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(name->len), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSREMOVEFILE2); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + yfs_check_req(call, bp); + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to a YFS.RemoveFile or YFS.RemoveDir operation. + */ +static int yfs_deliver_fs_remove(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + return 0; +} + +/* + * FS.RemoveDir and FS.RemoveFile operation types. + */ +static const struct afs_call_type yfs_RXYFSRemoveFile = { + .name = "YFS.RemoveFile", + .op = yfs_FS_RemoveFile, + .deliver = yfs_deliver_fs_remove, + .destructor = afs_flat_call_destructor, +}; + +/* + * Remove a file. + */ +void yfs_fs_remove_file(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + if (!test_bit(AFS_SERVER_FL_NO_RM2, &op->server->flags)) + return yfs_fs_remove_file2(op); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveFile, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(name->len), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSREMOVEFILE); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + yfs_check_req(call, bp); + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +static const struct afs_call_type yfs_RXYFSRemoveDir = { + .name = "YFS.RemoveDir", + .op = yfs_FS_RemoveDir, + .deliver = yfs_deliver_fs_remove, + .destructor = afs_flat_call_destructor, +}; + +/* + * Remove a directory. + */ +void yfs_fs_remove_dir(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveDir, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(name->len), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSREMOVEDIR); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + yfs_check_req(call, bp); + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to a YFS.Link operation. + */ +static int yfs_deliver_fs_link(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.Link operation type. + */ +static const struct afs_call_type yfs_RXYFSLink = { + .name = "YFS.Link", + .op = yfs_FS_Link, + .deliver = yfs_deliver_fs_link, + .destructor = afs_flat_call_destructor, +}; + +/* + * Make a hard link. + */ +void yfs_fs_link(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSLink, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(name->len) + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSLINK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + bp = xdr_encode_YFSFid(bp, &vp->fid); + yfs_check_req(call, bp); + + trace_afs_make_fs_call1(call, &vp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to a YFS.Symlink operation. + */ +static int yfs_deliver_fs_symlink(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_YFSFid(&bp, &vp->fid); + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.Symlink operation type + */ +static const struct afs_call_type yfs_RXYFSSymlink = { + .name = "YFS.Symlink", + .op = yfs_FS_Symlink, + .deliver = yfs_deliver_fs_symlink, + .destructor = afs_flat_call_destructor, +}; + +/* + * Create a symbolic link. + */ +void yfs_fs_symlink(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t contents_sz; + __be32 *bp; + + _enter(""); + + contents_sz = strlen(op->create.symlink); + call = afs_alloc_flat_call(op->net, &yfs_RXYFSSymlink, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(name->len) + + xdr_strlen(contents_sz) + + sizeof(struct yfs_xdr_YFSStoreStatus), + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSYMLINK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + bp = xdr_encode_string(bp, op->create.symlink, contents_sz); + bp = xdr_encode_YFSStoreStatus_mode(bp, S_IRWXUGO); + yfs_check_req(call, bp); + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to a YFS.Rename operation. + */ +static int yfs_deliver_fs_rename(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + bp = call->buffer; + /* If the two dirs are the same, we have two copies of the same status + * report, so we just decode it twice. + */ + xdr_decode_YFSFetchStatus(&bp, call, &orig_dvp->scb); + xdr_decode_YFSFetchStatus(&bp, call, &new_dvp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.Rename operation type + */ +static const struct afs_call_type yfs_RXYFSRename = { + .name = "FS.Rename", + .op = yfs_FS_Rename, + .deliver = yfs_deliver_fs_rename, + .destructor = afs_flat_call_destructor, +}; + +/* + * Rename a file or directory. + */ +void yfs_fs_rename(struct afs_operation *op) +{ + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; + const struct qstr *orig_name = &op->dentry->d_name; + const struct qstr *new_name = &op->dentry_2->d_name; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(orig_name->len) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(new_name->len), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSRENAME); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &orig_dvp->fid); + bp = xdr_encode_name(bp, orig_name); + bp = xdr_encode_YFSFid(bp, &new_dvp->fid); + bp = xdr_encode_name(bp, new_name); + yfs_check_req(call, bp); + + trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * YFS.StoreData64 operation type. + */ +static const struct afs_call_type yfs_RXYFSStoreData64 = { + .name = "YFS.StoreData64", + .op = yfs_FS_StoreData64, + .deliver = yfs_deliver_status_and_volsync, + .destructor = afs_flat_call_destructor, +}; + +/* + * Store a set of pages to a large file. + */ +void yfs_fs_store_data(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + loff_t size, pos, i_size; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + size = (loff_t)op->store.last_to - (loff_t)op->store.first_offset; + if (op->store.first != op->store.last) + size += (loff_t)(op->store.last - op->store.first) << PAGE_SHIFT; + pos = (loff_t)op->store.first << PAGE_SHIFT; + pos += op->store.first_offset; + + i_size = i_size_read(&vp->vnode->vfs_inode); + if (pos + size > i_size) + i_size = size + pos; + + _debug("size %llx, at %llx, i_size %llx", + (unsigned long long)size, (unsigned long long)pos, + (unsigned long long)i_size); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreData64, + sizeof(__be32) + + sizeof(__be32) + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSStoreStatus) + + sizeof(struct yfs_xdr_u64) * 3, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + call->key = op->key; + call->send_pages = true; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTOREDATA64); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + bp = xdr_encode_YFSStoreStatus_mtime(bp, &op->mtime); + bp = xdr_encode_u64(bp, pos); + bp = xdr_encode_u64(bp, size); + bp = xdr_encode_u64(bp, i_size); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * YFS.StoreStatus operation type + */ +static const struct afs_call_type yfs_RXYFSStoreStatus = { + .name = "YFS.StoreStatus", + .op = yfs_FS_StoreStatus, + .deliver = yfs_deliver_status_and_volsync, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = { + .name = "YFS.StoreData64", + .op = yfs_FS_StoreData64, + .deliver = yfs_deliver_status_and_volsync, + .destructor = afs_flat_call_destructor, +}; + +/* + * Set the attributes on a file, using YFS.StoreData64 rather than + * YFS.StoreStatus so as to alter the file size also. + */ +static void yfs_fs_setattr_size(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + struct iattr *attr = op->setattr.attr; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreData64_as_Status, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSStoreStatus) + + sizeof(struct yfs_xdr_u64) * 3, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTOREDATA64); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + bp = xdr_encode_YFS_StoreStatus(bp, attr); + bp = xdr_encode_u64(bp, attr->ia_size); /* position of start of write */ + bp = xdr_encode_u64(bp, 0); /* size of write */ + bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */ + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Set the attributes on a file, using YFS.StoreData64 if there's a change in + * file size, and YFS.StoreStatus otherwise. + */ +void yfs_fs_setattr(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + struct iattr *attr = op->setattr.attr; + __be32 *bp; + + if (attr->ia_valid & ATTR_SIZE) + return yfs_fs_setattr_size(op); + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreStatus, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSStoreStatus), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTORESTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + bp = xdr_encode_YFS_StoreStatus(bp, attr); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to a YFS.GetVolumeStatus operation. + */ +static int yfs_deliver_fs_get_volume_status(struct afs_call *call) +{ + struct afs_operation *op = call->op; + const __be32 *bp; + char *p; + u32 size; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->unmarshall++; + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus)); + fallthrough; + + /* extract the returned status record */ + case 1: + _debug("extract status"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSFetchVolumeStatus(&bp, &op->volstatus.vs); + call->unmarshall++; + afs_extract_to_tmp(call); + fallthrough; + + /* extract the volume name length */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("volname length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, afs_eproto_volname_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_to_buf(call, size); + call->unmarshall++; + fallthrough; + + /* extract the volume name */ + case 3: + _debug("extract volname"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + p = call->buffer; + p[call->count] = 0; + _debug("volname '%s'", p); + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* extract the offline message length */ + case 4: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("offline msg length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, afs_eproto_offline_msg_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_to_buf(call, size); + call->unmarshall++; + fallthrough; + + /* extract the offline message */ + case 5: + _debug("extract offline"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + p = call->buffer; + p[call->count] = 0; + _debug("offline '%s'", p); + + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* extract the message of the day length */ + case 6: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("motd length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, afs_eproto_motd_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_to_buf(call, size); + call->unmarshall++; + fallthrough; + + /* extract the message of the day */ + case 7: + _debug("extract motd"); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + p = call->buffer; + p[call->count] = 0; + _debug("motd '%s'", p); + + call->unmarshall++; + fallthrough; + + case 8: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.GetVolumeStatus operation type + */ +static const struct afs_call_type yfs_RXYFSGetVolumeStatus = { + .name = "YFS.GetVolumeStatus", + .op = yfs_FS_GetVolumeStatus, + .deliver = yfs_deliver_fs_get_volume_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * fetch the status of a volume + */ +void yfs_fs_get_volume_status(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSGetVolumeStatus, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_u64), + max_t(size_t, + sizeof(struct yfs_xdr_YFSFetchVolumeStatus) + + sizeof(__be32), + AFSOPAQUEMAX + 1)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSGETVOLUMESTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_u64(bp, vp->fid.vid); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * YFS.SetLock operation type + */ +static const struct afs_call_type yfs_RXYFSSetLock = { + .name = "YFS.SetLock", + .op = yfs_FS_SetLock, + .deliver = yfs_deliver_status_and_volsync, + .done = afs_lock_op_done, + .destructor = afs_flat_call_destructor, +}; + +/* + * YFS.ExtendLock operation type + */ +static const struct afs_call_type yfs_RXYFSExtendLock = { + .name = "YFS.ExtendLock", + .op = yfs_FS_ExtendLock, + .deliver = yfs_deliver_status_and_volsync, + .done = afs_lock_op_done, + .destructor = afs_flat_call_destructor, +}; + +/* + * YFS.ReleaseLock operation type + */ +static const struct afs_call_type yfs_RXYFSReleaseLock = { + .name = "YFS.ReleaseLock", + .op = yfs_FS_ReleaseLock, + .deliver = yfs_deliver_status_and_volsync, + .destructor = afs_flat_call_destructor, +}; + +/* + * Set a lock on a file + */ +void yfs_fs_set_lock(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSSetLock, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(__be32), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSETLOCK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + bp = xdr_encode_u32(bp, op->lock.type); + yfs_check_req(call, bp); + + trace_afs_make_fs_calli(call, &vp->fid, op->lock.type); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * extend a lock on a file + */ +void yfs_fs_extend_lock(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSExtendLock, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSEXTENDLOCK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * release a lock on a file + */ +void yfs_fs_release_lock(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSReleaseLock, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSRELEASELOCK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver a reply to YFS.FetchStatus + */ +static int yfs_deliver_fs_fetch_status(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSCallBack(&bp, call, &vp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.FetchStatus operation type + */ +static const struct afs_call_type yfs_RXYFSFetchStatus = { + .name = "YFS.FetchStatus", + .op = yfs_FS_FetchStatus, + .deliver = yfs_deliver_fs_fetch_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for a fid without needing a vnode handle. + */ +void yfs_fs_fetch_status(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchStatus, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSFETCHSTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to an YFS.InlineBulkStatus call + */ +static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_status_cb *scb; + const __be32 *bp; + u32 tmp; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* Extract the file status count and array in two steps */ + case 1: + _debug("extract status count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("status count: %u/%u", tmp, op->nr_files); + if (tmp != op->nr_files) + return afs_protocol_error(call, afs_eproto_ibulkst_count); + + call->count = 0; + call->unmarshall++; + more_counts: + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus)); + fallthrough; + + case 2: + _debug("extract status array %u", call->count); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + switch (call->count) { + case 0: + scb = &op->file[0].scb; + break; + case 1: + scb = &op->file[1].scb; + break; + default: + scb = &op->more_files[call->count - 2].scb; + break; + } + + bp = call->buffer; + xdr_decode_YFSFetchStatus(&bp, call, scb); + + call->count++; + if (call->count < op->nr_files) + goto more_counts; + + call->count = 0; + call->unmarshall++; + afs_extract_to_tmp(call); + fallthrough; + + /* Extract the callback count and array in two steps */ + case 3: + _debug("extract CB count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("CB count: %u", tmp); + if (tmp != op->nr_files) + return afs_protocol_error(call, afs_eproto_ibulkst_cb_count); + call->count = 0; + call->unmarshall++; + more_cbs: + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack)); + fallthrough; + + case 4: + _debug("extract CB array"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + _debug("unmarshall CB array"); + switch (call->count) { + case 0: + scb = &op->file[0].scb; + break; + case 1: + scb = &op->file[1].scb; + break; + default: + scb = &op->more_files[call->count - 2].scb; + break; + } + + bp = call->buffer; + xdr_decode_YFSCallBack(&bp, call, scb); + call->count++; + if (call->count < op->nr_files) + goto more_cbs; + + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync)); + call->unmarshall++; + fallthrough; + + case 5: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSVolSync(&bp, &op->volsync); + + call->unmarshall++; + fallthrough; + + case 6: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.InlineBulkStatus operation type + */ +static const struct afs_call_type yfs_RXYFSInlineBulkStatus = { + .name = "YFS.InlineBulkStatus", + .op = yfs_FS_InlineBulkStatus, + .deliver = yfs_deliver_fs_inline_bulk_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for up to 1024 files + */ +void yfs_fs_inline_bulk_status(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + struct afs_call *call; + __be32 *bp; + int i; + + _enter(",%x,{%llx:%llu},%u", + key_serial(op->key), vp->fid.vid, vp->fid.vnode, op->nr_files); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSInlineBulkStatus, + sizeof(__be32) + + sizeof(__be32) + + sizeof(__be32) + + sizeof(struct yfs_xdr_YFSFid) * op->nr_files, + sizeof(struct yfs_xdr_YFSFetchStatus)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSINLINEBULKSTATUS); + bp = xdr_encode_u32(bp, 0); /* RPCFlags */ + bp = xdr_encode_u32(bp, op->nr_files); + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); + for (i = 0; i < op->nr_files - 2; i++) + bp = xdr_encode_YFSFid(bp, &op->more_files[i].fid); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver reply data to an YFS.FetchOpaqueACL. + */ +static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; + struct yfs_acl *yacl = op->yacl; + struct afs_acl *acl; + const __be32 *bp; + unsigned int size; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* Extract the file ACL length */ + case 1: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + size = call->count2 = ntohl(call->tmp); + size = round_up(size, 4); + + if (yacl->flags & YFS_ACL_WANT_ACL) { + acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL); + if (!acl) + return -ENOMEM; + yacl->acl = acl; + acl->size = call->count2; + afs_extract_begin(call, acl->data, size); + } else { + afs_extract_discard(call, size); + } + call->unmarshall++; + fallthrough; + + /* Extract the file ACL */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + afs_extract_to_tmp(call); + call->unmarshall++; + fallthrough; + + /* Extract the volume ACL length */ + case 3: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + size = call->count2 = ntohl(call->tmp); + size = round_up(size, 4); + + if (yacl->flags & YFS_ACL_WANT_VOL_ACL) { + acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL); + if (!acl) + return -ENOMEM; + yacl->vol_acl = acl; + acl->size = call->count2; + afs_extract_begin(call, acl->data, size); + } else { + afs_extract_discard(call, size); + } + call->unmarshall++; + fallthrough; + + /* Extract the volume ACL */ + case 4: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + afs_extract_to_buf(call, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + call->unmarshall++; + fallthrough; + + /* extract the metadata */ + case 5: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + yacl->inherit_flag = ntohl(*bp++); + yacl->num_cleaned = ntohl(*bp++); + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + + call->unmarshall++; + fallthrough; + + case 6: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +void yfs_free_opaque_acl(struct yfs_acl *yacl) +{ + if (yacl) { + kfree(yacl->acl); + kfree(yacl->vol_acl); + kfree(yacl); + } +} + +/* + * YFS.FetchOpaqueACL operation type + */ +static const struct afs_call_type yfs_RXYFSFetchOpaqueACL = { + .name = "YFS.FetchOpaqueACL", + .op = yfs_FS_FetchOpaqueACL, + .deliver = yfs_deliver_fs_fetch_opaque_acl, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the YFS advanced ACLs for a file. + */ +void yfs_fs_fetch_opaque_acl(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchOpaqueACL, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSFETCHOPAQUEACL); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_KERNEL); +} + +/* + * YFS.StoreOpaqueACL2 operation type + */ +static const struct afs_call_type yfs_RXYFSStoreOpaqueACL2 = { + .name = "YFS.StoreOpaqueACL2", + .op = yfs_FS_StoreOpaqueACL2, + .deliver = yfs_deliver_status_and_volsync, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the YFS ACL for a file. + */ +void yfs_fs_store_opaque_acl2(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct afs_call *call; + struct afs_acl *acl = op->acl; + size_t size; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(op->key), vp->fid.vid, vp->fid.vnode); + + size = round_up(acl->size, 4); + call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreOpaqueACL2, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(__be32) + size, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTOREOPAQUEACL2); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vp->fid); + bp = xdr_encode_u32(bp, acl->size); + memcpy(bp, acl->data, acl->size); + if (acl->size != size) + memset((void *)bp + acl->size, 0, size - acl->size); + bp += size / sizeof(__be32); + yfs_check_req(call, bp); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_KERNEL); +} |