summaryrefslogtreecommitdiffstats
path: root/fs/nfsd/nfs4recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd/nfs4recover.c')
-rw-r--r--fs/nfsd/nfs4recover.c2169
1 files changed, 2169 insertions, 0 deletions
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
new file mode 100644
index 0000000000..3509e73abe
--- /dev/null
+++ b/fs/nfsd/nfs4recover.c
@@ -0,0 +1,2169 @@
+/*
+* Copyright (c) 2004 The Regents of the University of Michigan.
+* Copyright (c) 2012 Jeff Layton <jlayton@redhat.com>
+* All rights reserved.
+*
+* Andy Adamson <andros@citi.umich.edu>
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* 1. Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* 2. Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution.
+* 3. Neither the name of the University nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include <crypto/hash.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <net/net_namespace.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/cld.h>
+
+#include "nfsd.h"
+#include "state.h"
+#include "vfs.h"
+#include "netns.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+/* Declarations */
+struct nfsd4_client_tracking_ops {
+ int (*init)(struct net *);
+ void (*exit)(struct net *);
+ void (*create)(struct nfs4_client *);
+ void (*remove)(struct nfs4_client *);
+ int (*check)(struct nfs4_client *);
+ void (*grace_done)(struct nfsd_net *);
+ uint8_t version;
+ size_t msglen;
+};
+
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
+
+/* Globals */
+static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+
+static int
+nfs4_save_creds(const struct cred **original_creds)
+{
+ struct cred *new;
+
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+ new->fsuid = GLOBAL_ROOT_UID;
+ new->fsgid = GLOBAL_ROOT_GID;
+ *original_creds = override_creds(new);
+ put_cred(new);
+ return 0;
+}
+
+static void
+nfs4_reset_creds(const struct cred *original)
+{
+ revert_creds(original);
+}
+
+static void
+md5_to_hex(char *out, char *md5)
+{
+ int i;
+
+ for (i=0; i<16; i++) {
+ unsigned char c = md5[i];
+
+ *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
+ *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
+ }
+ *out = '\0';
+}
+
+static int
+nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
+{
+ struct xdr_netobj cksum;
+ struct crypto_shash *tfm;
+ int status;
+
+ dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
+ clname->len, clname->data);
+ tfm = crypto_alloc_shash("md5", 0, 0);
+ if (IS_ERR(tfm)) {
+ status = PTR_ERR(tfm);
+ goto out_no_tfm;
+ }
+
+ cksum.len = crypto_shash_digestsize(tfm);
+ cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+ if (cksum.data == NULL) {
+ status = -ENOMEM;
+ goto out;
+ }
+
+ status = crypto_shash_tfm_digest(tfm, clname->data, clname->len,
+ cksum.data);
+ if (status)
+ goto out;
+
+ md5_to_hex(dname, cksum.data);
+
+ status = 0;
+out:
+ kfree(cksum.data);
+ crypto_free_shash(tfm);
+out_no_tfm:
+ return status;
+}
+
+/*
+ * If we had an error generating the recdir name for the legacy tracker
+ * then warn the admin. If the error doesn't appear to be transient,
+ * then disable recovery tracking.
+ */
+static void
+legacy_recdir_name_error(struct nfs4_client *clp, int error)
+{
+ printk(KERN_ERR "NFSD: unable to generate recoverydir "
+ "name (%d).\n", error);
+
+ /*
+ * if the algorithm just doesn't exist, then disable the recovery
+ * tracker altogether. The crypto libs will generally return this if
+ * FIPS is enabled as well.
+ */
+ if (error == -ENOENT) {
+ printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
+ "Reboot recovery will not function correctly!\n");
+ nfsd4_client_tracking_exit(clp->net);
+ }
+}
+
+static void
+__nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
+ const char *dname, int len, struct nfsd_net *nn)
+{
+ struct xdr_netobj name;
+ struct xdr_netobj princhash = { .len = 0, .data = NULL };
+ struct nfs4_client_reclaim *crp;
+
+ name.data = kmemdup(dname, len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return;
+ }
+ name.len = len;
+ crp = nfs4_client_to_reclaim(name, princhash, nn);
+ if (!crp) {
+ kfree(name.data);
+ return;
+ }
+ crp->cr_clp = clp;
+}
+
+static void
+nfsd4_create_clid_dir(struct nfs4_client *clp)
+{
+ const struct cred *original_cred;
+ char dname[HEXDIR_LEN];
+ struct dentry *dir, *dentry;
+ int status;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+ if (!nn->rec_file)
+ return;
+
+ status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+ if (status)
+ return legacy_recdir_name_error(clp, status);
+
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0)
+ return;
+
+ status = mnt_want_write_file(nn->rec_file);
+ if (status)
+ goto out_creds;
+
+ dir = nn->rec_file->f_path.dentry;
+ /* lock the parent */
+ inode_lock(d_inode(dir));
+
+ dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
+ if (IS_ERR(dentry)) {
+ status = PTR_ERR(dentry);
+ goto out_unlock;
+ }
+ if (d_really_is_positive(dentry))
+ /*
+ * In the 4.1 case, where we're called from
+ * reclaim_complete(), records from the previous reboot
+ * may still be left, so this is OK.
+ *
+ * In the 4.0 case, we should never get here; but we may
+ * as well be forgiving and just succeed silently.
+ */
+ goto out_put;
+ status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
+out_put:
+ dput(dentry);
+out_unlock:
+ inode_unlock(d_inode(dir));
+ if (status == 0) {
+ if (nn->in_grace)
+ __nfsd4_create_reclaim_record_grace(clp, dname,
+ HEXDIR_LEN, nn);
+ vfs_fsync(nn->rec_file, 0);
+ } else {
+ printk(KERN_ERR "NFSD: failed to write recovery record"
+ " (err %d); please check that %s exists"
+ " and is writeable", status,
+ user_recovery_dirname);
+ }
+ mnt_drop_write_file(nn->rec_file);
+out_creds:
+ nfs4_reset_creds(original_cred);
+}
+
+typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *);
+
+struct name_list {
+ char name[HEXDIR_LEN];
+ struct list_head list;
+};
+
+struct nfs4_dir_ctx {
+ struct dir_context ctx;
+ struct list_head names;
+};
+
+static bool
+nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct nfs4_dir_ctx *ctx =
+ container_of(__ctx, struct nfs4_dir_ctx, ctx);
+ struct name_list *entry;
+
+ if (namlen != HEXDIR_LEN - 1)
+ return true;
+ entry = kmalloc(sizeof(struct name_list), GFP_KERNEL);
+ if (entry == NULL)
+ return false;
+ memcpy(entry->name, name, HEXDIR_LEN - 1);
+ entry->name[HEXDIR_LEN - 1] = '\0';
+ list_add(&entry->list, &ctx->names);
+ return true;
+}
+
+static int
+nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
+{
+ const struct cred *original_cred;
+ struct dentry *dir = nn->rec_file->f_path.dentry;
+ struct nfs4_dir_ctx ctx = {
+ .ctx.actor = nfsd4_build_namelist,
+ .names = LIST_HEAD_INIT(ctx.names)
+ };
+ struct name_list *entry, *tmp;
+ int status;
+
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0)
+ return status;
+
+ status = vfs_llseek(nn->rec_file, 0, SEEK_SET);
+ if (status < 0) {
+ nfs4_reset_creds(original_cred);
+ return status;
+ }
+
+ status = iterate_dir(nn->rec_file, &ctx.ctx);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+
+ list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
+ if (!status) {
+ struct dentry *dentry;
+ dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
+ if (IS_ERR(dentry)) {
+ status = PTR_ERR(dentry);
+ break;
+ }
+ status = f(dir, dentry, nn);
+ dput(dentry);
+ }
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ inode_unlock(d_inode(dir));
+ nfs4_reset_creds(original_cred);
+
+ list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
+ dprintk("NFSD: %s. Left entry %s\n", __func__, entry->name);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ return status;
+}
+
+static int
+nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
+{
+ struct dentry *dir, *dentry;
+ int status;
+
+ dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+
+ dir = nn->rec_file->f_path.dentry;
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+ dentry = lookup_one_len(name, dir, namlen);
+ if (IS_ERR(dentry)) {
+ status = PTR_ERR(dentry);
+ goto out_unlock;
+ }
+ status = -ENOENT;
+ if (d_really_is_negative(dentry))
+ goto out;
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
+out:
+ dput(dentry);
+out_unlock:
+ inode_unlock(d_inode(dir));
+ return status;
+}
+
+static void
+__nfsd4_remove_reclaim_record_grace(const char *dname, int len,
+ struct nfsd_net *nn)
+{
+ struct xdr_netobj name;
+ struct nfs4_client_reclaim *crp;
+
+ name.data = kmemdup(dname, len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return;
+ }
+ name.len = len;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ nfs4_remove_reclaim_record(crp, nn);
+}
+
+static void
+nfsd4_remove_clid_dir(struct nfs4_client *clp)
+{
+ const struct cred *original_cred;
+ char dname[HEXDIR_LEN];
+ int status;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+ if (status)
+ return legacy_recdir_name_error(clp, status);
+
+ status = mnt_want_write_file(nn->rec_file);
+ if (status)
+ goto out;
+ clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0)
+ goto out_drop_write;
+
+ status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn);
+ nfs4_reset_creds(original_cred);
+ if (status == 0) {
+ vfs_fsync(nn->rec_file, 0);
+ if (nn->in_grace)
+ __nfsd4_remove_reclaim_record_grace(dname,
+ HEXDIR_LEN, nn);
+ }
+out_drop_write:
+ mnt_drop_write_file(nn->rec_file);
+out:
+ if (status)
+ printk("NFSD: Failed to remove expired client state directory"
+ " %.*s\n", HEXDIR_LEN, dname);
+}
+
+static int
+purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
+{
+ int status;
+ struct xdr_netobj name;
+
+ if (child->d_name.len != HEXDIR_LEN - 1) {
+ printk("%s: illegal name %pd in recovery directory\n",
+ __func__, child);
+ /* Keep trying; maybe the others are OK: */
+ return 0;
+ }
+ name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out;
+ }
+ name.len = HEXDIR_LEN;
+ if (nfs4_has_reclaimed_state(name, nn))
+ goto out_free;
+
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child);
+ if (status)
+ printk("failed to remove client recovery directory %pd\n",
+ child);
+out_free:
+ kfree(name.data);
+out:
+ /* Keep trying, success or failure: */
+ return 0;
+}
+
+static void
+nfsd4_recdir_purge_old(struct nfsd_net *nn)
+{
+ int status;
+
+ nn->in_grace = false;
+ if (!nn->rec_file)
+ return;
+ status = mnt_want_write_file(nn->rec_file);
+ if (status)
+ goto out;
+ status = nfsd4_list_rec_dir(purge_old, nn);
+ if (status == 0)
+ vfs_fsync(nn->rec_file, 0);
+ mnt_drop_write_file(nn->rec_file);
+out:
+ nfs4_release_reclaim(nn);
+ if (status)
+ printk("nfsd4: failed to purge old clients from recovery"
+ " directory %pD\n", nn->rec_file);
+}
+
+static int
+load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
+{
+ struct xdr_netobj name;
+ struct xdr_netobj princhash = { .len = 0, .data = NULL };
+
+ if (child->d_name.len != HEXDIR_LEN - 1) {
+ printk("%s: illegal name %pd in recovery directory\n",
+ __func__, child);
+ /* Keep trying; maybe the others are OK: */
+ return 0;
+ }
+ name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out;
+ }
+ name.len = HEXDIR_LEN;
+ if (!nfs4_client_to_reclaim(name, princhash, nn))
+ kfree(name.data);
+out:
+ return 0;
+}
+
+static int
+nfsd4_recdir_load(struct net *net) {
+ int status;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ if (!nn->rec_file)
+ return 0;
+
+ status = nfsd4_list_rec_dir(load_recdir, nn);
+ if (status)
+ printk("nfsd4: failed loading clients from recovery"
+ " directory %pD\n", nn->rec_file);
+ return status;
+}
+
+/*
+ * Hold reference to the recovery directory.
+ */
+
+static int
+nfsd4_init_recdir(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ const struct cred *original_cred;
+ int status;
+
+ printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
+ user_recovery_dirname);
+
+ BUG_ON(nn->rec_file);
+
+ status = nfs4_save_creds(&original_cred);
+ if (status < 0) {
+ printk("NFSD: Unable to change credentials to find recovery"
+ " directory: error %d\n",
+ status);
+ return status;
+ }
+
+ nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
+ if (IS_ERR(nn->rec_file)) {
+ printk("NFSD: unable to find recovery directory %s\n",
+ user_recovery_dirname);
+ status = PTR_ERR(nn->rec_file);
+ nn->rec_file = NULL;
+ }
+
+ nfs4_reset_creds(original_cred);
+ if (!status)
+ nn->in_grace = true;
+ return status;
+}
+
+static void
+nfsd4_shutdown_recdir(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ if (!nn->rec_file)
+ return;
+ fput(nn->rec_file);
+ nn->rec_file = NULL;
+}
+
+static int
+nfs4_legacy_state_init(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int i;
+
+ nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
+ sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!nn->reclaim_str_hashtbl)
+ return -ENOMEM;
+
+ for (i = 0; i < CLIENT_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
+ nn->reclaim_str_hashtbl_size = 0;
+
+ return 0;
+}
+
+static void
+nfs4_legacy_state_shutdown(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ kfree(nn->reclaim_str_hashtbl);
+}
+
+static int
+nfsd4_load_reboot_recovery_data(struct net *net)
+{
+ int status;
+
+ status = nfsd4_init_recdir(net);
+ if (status)
+ return status;
+
+ status = nfsd4_recdir_load(net);
+ if (status)
+ nfsd4_shutdown_recdir(net);
+
+ return status;
+}
+
+static int
+nfsd4_legacy_tracking_init(struct net *net)
+{
+ int status;
+
+ /* XXX: The legacy code won't work in a container */
+ if (net != &init_net) {
+ pr_warn("NFSD: attempt to initialize legacy client tracking in a container ignored.\n");
+ return -EINVAL;
+ }
+
+ status = nfs4_legacy_state_init(net);
+ if (status)
+ return status;
+
+ status = nfsd4_load_reboot_recovery_data(net);
+ if (status)
+ goto err;
+ pr_info("NFSD: Using legacy client tracking operations.\n");
+ return 0;
+
+err:
+ nfs4_legacy_state_shutdown(net);
+ return status;
+}
+
+static void
+nfsd4_legacy_tracking_exit(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfs4_release_reclaim(nn);
+ nfsd4_shutdown_recdir(net);
+ nfs4_legacy_state_shutdown(net);
+}
+
+/*
+ * Change the NFSv4 recovery directory to recdir.
+ */
+int
+nfs4_reset_recoverydir(char *recdir)
+{
+ int status;
+ struct path path;
+
+ status = kern_path(recdir, LOOKUP_FOLLOW, &path);
+ if (status)
+ return status;
+ status = -ENOTDIR;
+ if (d_is_dir(path.dentry)) {
+ strcpy(user_recovery_dirname, recdir);
+ status = 0;
+ }
+ path_put(&path);
+ return status;
+}
+
+char *
+nfs4_recoverydir(void)
+{
+ return user_recovery_dirname;
+}
+
+static int
+nfsd4_check_legacy_client(struct nfs4_client *clp)
+{
+ int status;
+ char dname[HEXDIR_LEN];
+ struct nfs4_client_reclaim *crp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct xdr_netobj name;
+
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+ if (status) {
+ legacy_recdir_name_error(clp, status);
+ return status;
+ }
+
+ /* look for it in the reclaim hashtable otherwise */
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out_enoent;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp) {
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ crp->cr_clp = clp;
+ return 0;
+ }
+
+out_enoent:
+ return -ENOENT;
+}
+
+static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
+ .init = nfsd4_legacy_tracking_init,
+ .exit = nfsd4_legacy_tracking_exit,
+ .create = nfsd4_create_clid_dir,
+ .remove = nfsd4_remove_clid_dir,
+ .check = nfsd4_check_legacy_client,
+ .grace_done = nfsd4_recdir_purge_old,
+ .version = 1,
+ .msglen = 0,
+};
+
+/* Globals */
+#define NFSD_PIPE_DIR "nfsd"
+#define NFSD_CLD_PIPE "cld"
+
+/* per-net-ns structure for holding cld upcall info */
+struct cld_net {
+ struct rpc_pipe *cn_pipe;
+ spinlock_t cn_lock;
+ struct list_head cn_list;
+ unsigned int cn_xid;
+ bool cn_has_legacy;
+ struct crypto_shash *cn_tfm;
+};
+
+struct cld_upcall {
+ struct list_head cu_list;
+ struct cld_net *cu_net;
+ struct completion cu_done;
+ union {
+ struct cld_msg_hdr cu_hdr;
+ struct cld_msg cu_msg;
+ struct cld_msg_v2 cu_msg_v2;
+ } cu_u;
+};
+
+static int
+__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn)
+{
+ int ret;
+ struct rpc_pipe_msg msg;
+ struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.data = cmsg;
+ msg.len = nn->client_tracking_ops->msglen;
+
+ ret = rpc_queue_upcall(pipe, &msg);
+ if (ret < 0) {
+ goto out;
+ }
+
+ wait_for_completion(&cup->cu_done);
+
+ if (msg.errno < 0)
+ ret = msg.errno;
+out:
+ return ret;
+}
+
+static int
+cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn)
+{
+ int ret;
+
+ /*
+ * -EAGAIN occurs when pipe is closed and reopened while there are
+ * upcalls queued.
+ */
+ do {
+ ret = __cld_pipe_upcall(pipe, cmsg, nn);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static ssize_t
+__cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ struct nfsd_net *nn)
+{
+ uint8_t cmd, princhashlen;
+ struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
+ uint16_t namelen;
+ struct cld_net *cn = nn->cld_net;
+
+ if (get_user(cmd, &cmsg->cm_cmd)) {
+ dprintk("%s: error when copying cmd from userspace", __func__);
+ return -EFAULT;
+ }
+ if (cmd == Cld_GraceStart) {
+ if (nn->client_tracking_ops->version >= 2) {
+ const struct cld_clntinfo __user *ci;
+
+ ci = &cmsg->cm_u.cm_clntinfo;
+ if (get_user(namelen, &ci->cc_name.cn_len))
+ return -EFAULT;
+ name.data = memdup_user(&ci->cc_name.cn_id, namelen);
+ if (IS_ERR(name.data))
+ return PTR_ERR(name.data);
+ name.len = namelen;
+ get_user(princhashlen, &ci->cc_princhash.cp_len);
+ if (princhashlen > 0) {
+ princhash.data = memdup_user(
+ &ci->cc_princhash.cp_data,
+ princhashlen);
+ if (IS_ERR(princhash.data)) {
+ kfree(name.data);
+ return PTR_ERR(princhash.data);
+ }
+ princhash.len = princhashlen;
+ } else
+ princhash.len = 0;
+ } else {
+ const struct cld_name __user *cnm;
+
+ cnm = &cmsg->cm_u.cm_name;
+ if (get_user(namelen, &cnm->cn_len))
+ return -EFAULT;
+ name.data = memdup_user(&cnm->cn_id, namelen);
+ if (IS_ERR(name.data))
+ return PTR_ERR(name.data);
+ name.len = namelen;
+ }
+ if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
+ name.len = name.len - 5;
+ memmove(name.data, name.data + 5, name.len);
+ cn->cn_has_legacy = true;
+ }
+ if (!nfs4_client_to_reclaim(name, princhash, nn)) {
+ kfree(name.data);
+ kfree(princhash.data);
+ return -EFAULT;
+ }
+ return nn->client_tracking_ops->msglen;
+ }
+ return -EFAULT;
+}
+
+static ssize_t
+cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+{
+ struct cld_upcall *tmp, *cup;
+ struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
+ struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
+ uint32_t xid;
+ struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
+ nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ int16_t status;
+
+ if (mlen != nn->client_tracking_ops->msglen) {
+ dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
+ nn->client_tracking_ops->msglen);
+ return -EINVAL;
+ }
+
+ /* copy just the xid so we can try to find that */
+ if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
+ dprintk("%s: error when copying xid from userspace", __func__);
+ return -EFAULT;
+ }
+
+ /*
+ * copy the status so we know whether to remove the upcall from the
+ * list (for -EINPROGRESS, we just want to make sure the xid is
+ * valid, not remove the upcall from the list)
+ */
+ if (get_user(status, &hdr->cm_status)) {
+ dprintk("%s: error when copying status from userspace", __func__);
+ return -EFAULT;
+ }
+
+ /* walk the list and find corresponding xid */
+ cup = NULL;
+ spin_lock(&cn->cn_lock);
+ list_for_each_entry(tmp, &cn->cn_list, cu_list) {
+ if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
+ cup = tmp;
+ if (status != -EINPROGRESS)
+ list_del_init(&cup->cu_list);
+ break;
+ }
+ }
+ spin_unlock(&cn->cn_lock);
+
+ /* couldn't find upcall? */
+ if (!cup) {
+ dprintk("%s: couldn't find upcall -- xid=%u\n", __func__, xid);
+ return -EINVAL;
+ }
+
+ if (status == -EINPROGRESS)
+ return __cld_pipe_inprogress_downcall(cmsg, nn);
+
+ if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
+ return -EFAULT;
+
+ complete(&cup->cu_done);
+ return mlen;
+}
+
+static void
+cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+ struct cld_msg *cmsg = msg->data;
+ struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
+ cu_u.cu_msg);
+
+ /* errno >= 0 means we got a downcall */
+ if (msg->errno >= 0)
+ return;
+
+ complete(&cup->cu_done);
+}
+
+static const struct rpc_pipe_ops cld_upcall_ops = {
+ .upcall = rpc_pipe_generic_upcall,
+ .downcall = cld_pipe_downcall,
+ .destroy_msg = cld_pipe_destroy_msg,
+};
+
+static struct dentry *
+nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe)
+{
+ struct dentry *dir, *dentry;
+
+ dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR);
+ if (dir == NULL)
+ return ERR_PTR(-ENOENT);
+ dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
+ dput(dir);
+ return dentry;
+}
+
+static void
+nfsd4_cld_unregister_sb(struct rpc_pipe *pipe)
+{
+ if (pipe->dentry)
+ rpc_unlink(pipe->dentry);
+}
+
+static struct dentry *
+nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe)
+{
+ struct super_block *sb;
+ struct dentry *dentry;
+
+ sb = rpc_get_sb_net(net);
+ if (!sb)
+ return NULL;
+ dentry = nfsd4_cld_register_sb(sb, pipe);
+ rpc_put_sb_net(net);
+ return dentry;
+}
+
+static void
+nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe)
+{
+ struct super_block *sb;
+
+ sb = rpc_get_sb_net(net);
+ if (sb) {
+ nfsd4_cld_unregister_sb(pipe);
+ rpc_put_sb_net(net);
+ }
+}
+
+/* Initialize rpc_pipefs pipe for communication with client tracking daemon */
+static int
+__nfsd4_init_cld_pipe(struct net *net)
+{
+ int ret;
+ struct dentry *dentry;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct cld_net *cn;
+
+ if (nn->cld_net)
+ return 0;
+
+ cn = kzalloc(sizeof(*cn), GFP_KERNEL);
+ if (!cn) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ cn->cn_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+ if (IS_ERR(cn->cn_pipe)) {
+ ret = PTR_ERR(cn->cn_pipe);
+ goto err;
+ }
+ spin_lock_init(&cn->cn_lock);
+ INIT_LIST_HEAD(&cn->cn_list);
+
+ dentry = nfsd4_cld_register_net(net, cn->cn_pipe);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto err_destroy_data;
+ }
+
+ cn->cn_pipe->dentry = dentry;
+ cn->cn_has_legacy = false;
+ nn->cld_net = cn;
+ return 0;
+
+err_destroy_data:
+ rpc_destroy_pipe_data(cn->cn_pipe);
+err:
+ kfree(cn);
+ printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n",
+ ret);
+ return ret;
+}
+
+static int
+nfsd4_init_cld_pipe(struct net *net)
+{
+ int status;
+
+ status = __nfsd4_init_cld_pipe(net);
+ if (!status)
+ pr_info("NFSD: Using old nfsdcld client tracking operations.\n");
+ return status;
+}
+
+static void
+nfsd4_remove_cld_pipe(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ nfsd4_cld_unregister_net(net, cn->cn_pipe);
+ rpc_destroy_pipe_data(cn->cn_pipe);
+ if (cn->cn_tfm)
+ crypto_free_shash(cn->cn_tfm);
+ kfree(nn->cld_net);
+ nn->cld_net = NULL;
+}
+
+static struct cld_upcall *
+alloc_cld_upcall(struct nfsd_net *nn)
+{
+ struct cld_upcall *new, *tmp;
+ struct cld_net *cn = nn->cld_net;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return new;
+
+ /* FIXME: hard cap on number in flight? */
+restart_search:
+ spin_lock(&cn->cn_lock);
+ list_for_each_entry(tmp, &cn->cn_list, cu_list) {
+ if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
+ cn->cn_xid++;
+ spin_unlock(&cn->cn_lock);
+ goto restart_search;
+ }
+ }
+ init_completion(&new->cu_done);
+ new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
+ put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
+ new->cu_net = cn;
+ list_add(&new->cu_list, &cn->cn_list);
+ spin_unlock(&cn->cn_lock);
+
+ dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
+
+ return new;
+}
+
+static void
+free_cld_upcall(struct cld_upcall *victim)
+{
+ struct cld_net *cn = victim->cu_net;
+
+ spin_lock(&cn->cn_lock);
+ list_del(&victim->cu_list);
+ spin_unlock(&cn->cn_lock);
+ kfree(victim);
+}
+
+/* Ask daemon to create a new record */
+static void
+nfsd4_cld_create(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ /* Don't upcall if it's already stored */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_Create;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret) {
+ ret = cup->cu_u.cu_msg.cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to create client "
+ "record on stable storage: %d\n", ret);
+}
+
+/* Ask daemon to create a new record */
+static void
+nfsd4_cld_create_v2(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ struct cld_msg_v2 *cmsg;
+ struct crypto_shash *tfm = cn->cn_tfm;
+ struct xdr_netobj cksum;
+ char *principal = NULL;
+
+ /* Don't upcall if it's already stored */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cmsg = &cup->cu_u.cu_msg_v2;
+ cmsg->cm_cmd = Cld_Create;
+ cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
+ memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+ if (clp->cl_cred.cr_raw_principal)
+ principal = clp->cl_cred.cr_raw_principal;
+ else if (clp->cl_cred.cr_principal)
+ principal = clp->cl_cred.cr_principal;
+ if (principal) {
+ cksum.len = crypto_shash_digestsize(tfm);
+ cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+ if (cksum.data == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = crypto_shash_tfm_digest(tfm, principal, strlen(principal),
+ cksum.data);
+ if (ret) {
+ kfree(cksum.data);
+ goto out;
+ }
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
+ memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+ cksum.data, cksum.len);
+ kfree(cksum.data);
+ } else
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
+
+ ret = cld_pipe_upcall(cn->cn_pipe, cmsg, nn);
+ if (!ret) {
+ ret = cmsg->cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+out:
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ pr_err("NFSD: Unable to create client record on stable storage: %d\n",
+ ret);
+}
+
+/* Ask daemon to create a new record */
+static void
+nfsd4_cld_remove(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ /* Don't upcall if it's already removed */
+ if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret) {
+ ret = cup->cu_u.cu_msg.cm_status;
+ clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to remove client "
+ "record from stable storage: %d\n", ret);
+}
+
+/*
+ * For older nfsdcld's that do not allow us to "slurp" the clients
+ * from the tracking database during startup.
+ *
+ * Check for presence of a record, and update its timestamp
+ */
+static int
+nfsd4_cld_check_v0(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+
+ /* Don't upcall if one was already stored during this grace pd */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ printk(KERN_ERR "NFSD: Unable to check client record on "
+ "stable storage: %d\n", -ENOMEM);
+ return -ENOMEM;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_Check;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret) {
+ ret = cup->cu_u.cu_msg.cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+ return ret;
+}
+
+/*
+ * For newer nfsdcld's that allow us to "slurp" the clients
+ * from the tracking database during startup.
+ *
+ * Check for presence of a record in the reclaim_str_hashtbl
+ */
+static int
+nfsd4_cld_check(struct nfs4_client *clp)
+{
+ struct nfs4_client_reclaim *crp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ int status;
+ char dname[HEXDIR_LEN];
+ struct xdr_netobj name;
+
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ /* look for it in the reclaim hashtable otherwise */
+ crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+ if (crp)
+ goto found;
+
+ if (cn->cn_has_legacy) {
+ status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+ if (status)
+ return -ENOENT;
+
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return -ENOENT;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ goto found;
+
+ }
+ return -ENOENT;
+found:
+ crp->cr_clp = clp;
+ return 0;
+}
+
+static int
+nfsd4_cld_check_v2(struct nfs4_client *clp)
+{
+ struct nfs4_client_reclaim *crp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ int status;
+ char dname[HEXDIR_LEN];
+ struct xdr_netobj name;
+ struct crypto_shash *tfm = cn->cn_tfm;
+ struct xdr_netobj cksum;
+ char *principal = NULL;
+
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ /* look for it in the reclaim hashtable otherwise */
+ crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+ if (crp)
+ goto found;
+
+ if (cn->cn_has_legacy) {
+ status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+ if (status)
+ return -ENOENT;
+
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data\n",
+ __func__);
+ return -ENOENT;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ goto found;
+
+ }
+ return -ENOENT;
+found:
+ if (crp->cr_princhash.len) {
+ if (clp->cl_cred.cr_raw_principal)
+ principal = clp->cl_cred.cr_raw_principal;
+ else if (clp->cl_cred.cr_principal)
+ principal = clp->cl_cred.cr_principal;
+ if (principal == NULL)
+ return -ENOENT;
+ cksum.len = crypto_shash_digestsize(tfm);
+ cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+ if (cksum.data == NULL)
+ return -ENOENT;
+ status = crypto_shash_tfm_digest(tfm, principal,
+ strlen(principal), cksum.data);
+ if (status) {
+ kfree(cksum.data);
+ return -ENOENT;
+ }
+ if (memcmp(crp->cr_princhash.data, cksum.data,
+ crp->cr_princhash.len)) {
+ kfree(cksum.data);
+ return -ENOENT;
+ }
+ kfree(cksum.data);
+ }
+ crp->cr_clp = clp;
+ return 0;
+}
+
+static int
+nfsd4_cld_grace_start(struct nfsd_net *nn)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret)
+ ret = cup->cu_u.cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ dprintk("%s: Unable to get clients from userspace: %d\n",
+ __func__, ret);
+ return ret;
+}
+
+/* For older nfsdcld's that need cm_gracetime */
+static void
+nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+ cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret)
+ ret = cup->cu_u.cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
+}
+
+/*
+ * For newer nfsdcld's that do not need cm_gracetime. We also need to call
+ * nfs4_release_reclaim() to clear out the reclaim_str_hashtbl.
+ */
+static void
+nfsd4_cld_grace_done(struct nfsd_net *nn)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret)
+ ret = cup->cu_u.cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ nfs4_release_reclaim(nn);
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
+}
+
+static int
+nfs4_cld_state_init(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int i;
+
+ nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
+ sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!nn->reclaim_str_hashtbl)
+ return -ENOMEM;
+
+ for (i = 0; i < CLIENT_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
+ nn->reclaim_str_hashtbl_size = 0;
+ nn->track_reclaim_completes = true;
+ atomic_set(&nn->nr_reclaim_complete, 0);
+
+ return 0;
+}
+
+static void
+nfs4_cld_state_shutdown(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nn->track_reclaim_completes = false;
+ kfree(nn->reclaim_str_hashtbl);
+}
+
+static bool
+cld_running(struct nfsd_net *nn)
+{
+ struct cld_net *cn = nn->cld_net;
+ struct rpc_pipe *pipe = cn->cn_pipe;
+
+ return pipe->nreaders || pipe->nwriters;
+}
+
+static int
+nfsd4_cld_get_version(struct nfsd_net *nn)
+{
+ int ret = 0;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+ uint8_t version;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret) {
+ ret = cup->cu_u.cu_msg.cm_status;
+ if (ret)
+ goto out_free;
+ version = cup->cu_u.cu_msg.cm_u.cm_version;
+ dprintk("%s: userspace returned version %u\n",
+ __func__, version);
+ if (version < 1)
+ version = 1;
+ else if (version > CLD_UPCALL_VERSION)
+ version = CLD_UPCALL_VERSION;
+
+ switch (version) {
+ case 1:
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+ break;
+ case 2:
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
+ break;
+ default:
+ break;
+ }
+ }
+out_free:
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ dprintk("%s: Unable to get version from userspace: %d\n",
+ __func__, ret);
+ return ret;
+}
+
+static int
+nfsd4_cld_tracking_init(struct net *net)
+{
+ int status;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool running;
+ int retries = 10;
+ struct crypto_shash *tfm;
+
+ status = nfs4_cld_state_init(net);
+ if (status)
+ return status;
+
+ status = __nfsd4_init_cld_pipe(net);
+ if (status)
+ goto err_shutdown;
+
+ /*
+ * rpc pipe upcalls take 30 seconds to time out, so we don't want to
+ * queue an upcall unless we know that nfsdcld is running (because we
+ * want this to fail fast so that nfsd4_client_tracking_init() can try
+ * the next client tracking method). nfsdcld should already be running
+ * before nfsd is started, so the wait here is for nfsdcld to open the
+ * pipefs file we just created.
+ */
+ while (!(running = cld_running(nn)) && retries--)
+ msleep(100);
+
+ if (!running) {
+ status = -ETIMEDOUT;
+ goto err_remove;
+ }
+ tfm = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(tfm)) {
+ status = PTR_ERR(tfm);
+ goto err_remove;
+ }
+ nn->cld_net->cn_tfm = tfm;
+
+ status = nfsd4_cld_get_version(nn);
+ if (status == -EOPNOTSUPP)
+ pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
+
+ status = nfsd4_cld_grace_start(nn);
+ if (status) {
+ if (status == -EOPNOTSUPP)
+ pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
+ nfs4_release_reclaim(nn);
+ goto err_remove;
+ } else
+ pr_info("NFSD: Using nfsdcld client tracking operations.\n");
+ return 0;
+
+err_remove:
+ nfsd4_remove_cld_pipe(net);
+err_shutdown:
+ nfs4_cld_state_shutdown(net);
+ return status;
+}
+
+static void
+nfsd4_cld_tracking_exit(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfs4_release_reclaim(nn);
+ nfsd4_remove_cld_pipe(net);
+ nfs4_cld_state_shutdown(net);
+}
+
+/* For older nfsdcld's */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
+ .init = nfsd4_init_cld_pipe,
+ .exit = nfsd4_remove_cld_pipe,
+ .create = nfsd4_cld_create,
+ .remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check_v0,
+ .grace_done = nfsd4_cld_grace_done_v0,
+ .version = 1,
+ .msglen = sizeof(struct cld_msg),
+};
+
+/* For newer nfsdcld's */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+ .init = nfsd4_cld_tracking_init,
+ .exit = nfsd4_cld_tracking_exit,
+ .create = nfsd4_cld_create,
+ .remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check,
+ .grace_done = nfsd4_cld_grace_done,
+ .version = 1,
+ .msglen = sizeof(struct cld_msg),
+};
+
+/* v2 create/check ops include the principal, if available */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
+ .init = nfsd4_cld_tracking_init,
+ .exit = nfsd4_cld_tracking_exit,
+ .create = nfsd4_cld_create_v2,
+ .remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check_v2,
+ .grace_done = nfsd4_cld_grace_done,
+ .version = 2,
+ .msglen = sizeof(struct cld_msg_v2),
+};
+
+/* upcall via usermodehelper */
+static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack";
+module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog),
+ S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program");
+
+static bool cltrack_legacy_disable;
+module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(cltrack_legacy_disable,
+ "Disable legacy recoverydir conversion. Default: false");
+
+#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR="
+#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR="
+#define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION="
+#define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START="
+
+static char *
+nfsd4_cltrack_legacy_topdir(void)
+{
+ int copied;
+ size_t len;
+ char *result;
+
+ if (cltrack_legacy_disable)
+ return NULL;
+
+ len = strlen(LEGACY_TOPDIR_ENV_PREFIX) +
+ strlen(nfs4_recoverydir()) + 1;
+
+ result = kmalloc(len, GFP_KERNEL);
+ if (!result)
+ return result;
+
+ copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s",
+ nfs4_recoverydir());
+ if (copied >= len) {
+ /* just return nothing if output was truncated */
+ kfree(result);
+ return NULL;
+ }
+
+ return result;
+}
+
+static char *
+nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
+{
+ int copied;
+ size_t len;
+ char *result;
+
+ if (cltrack_legacy_disable)
+ return NULL;
+
+ /* +1 is for '/' between "topdir" and "recdir" */
+ len = strlen(LEGACY_RECDIR_ENV_PREFIX) +
+ strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN;
+
+ result = kmalloc(len, GFP_KERNEL);
+ if (!result)
+ return result;
+
+ copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/",
+ nfs4_recoverydir());
+ if (copied > (len - HEXDIR_LEN)) {
+ /* just return nothing if output will be truncated */
+ kfree(result);
+ return NULL;
+ }
+
+ copied = nfs4_make_rec_clidname(result + copied, name);
+ if (copied) {
+ kfree(result);
+ return NULL;
+ }
+
+ return result;
+}
+
+static char *
+nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
+{
+ int copied;
+ size_t len;
+ char *result;
+
+ /* prefix + Y/N character + terminating NULL */
+ len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1;
+
+ result = kmalloc(len, GFP_KERNEL);
+ if (!result)
+ return result;
+
+ copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c",
+ clp->cl_minorversion ? 'Y' : 'N');
+ if (copied >= len) {
+ /* just return nothing if output was truncated */
+ kfree(result);
+ return NULL;
+ }
+
+ return result;
+}
+
+static char *
+nfsd4_cltrack_grace_start(time64_t grace_start)
+{
+ int copied;
+ size_t len;
+ char *result;
+
+ /* prefix + max width of int64_t string + terminating NULL */
+ len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1;
+
+ result = kmalloc(len, GFP_KERNEL);
+ if (!result)
+ return result;
+
+ copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%lld",
+ grace_start);
+ if (copied >= len) {
+ /* just return nothing if output was truncated */
+ kfree(result);
+ return NULL;
+ }
+
+ return result;
+}
+
+static int
+nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1)
+{
+ char *envp[3];
+ char *argv[4];
+ int ret;
+
+ if (unlikely(!cltrack_prog[0])) {
+ dprintk("%s: cltrack_prog is disabled\n", __func__);
+ return -EACCES;
+ }
+
+ dprintk("%s: cmd: %s\n", __func__, cmd);
+ dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
+ dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)");
+ dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)");
+
+ envp[0] = env0;
+ envp[1] = env1;
+ envp[2] = NULL;
+
+ argv[0] = (char *)cltrack_prog;
+ argv[1] = cmd;
+ argv[2] = arg;
+ argv[3] = NULL;
+
+ ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+ /*
+ * Disable the upcall mechanism if we're getting an ENOENT or EACCES
+ * error. The admin can re-enable it on the fly by using sysfs
+ * once the problem has been fixed.
+ */
+ if (ret == -ENOENT || ret == -EACCES) {
+ dprintk("NFSD: %s was not found or isn't executable (%d). "
+ "Setting cltrack_prog to blank string!",
+ cltrack_prog, ret);
+ cltrack_prog[0] = '\0';
+ }
+ dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret);
+
+ return ret;
+}
+
+static char *
+bin_to_hex_dup(const unsigned char *src, int srclen)
+{
+ char *buf;
+
+ /* +1 for terminating NULL */
+ buf = kzalloc((srclen * 2) + 1, GFP_KERNEL);
+ if (!buf)
+ return buf;
+
+ bin2hex(buf, src, srclen);
+ return buf;
+}
+
+static int
+nfsd4_umh_cltrack_init(struct net *net)
+{
+ int ret;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
+
+ /* XXX: The usermode helper s not working in container yet. */
+ if (net != &init_net) {
+ pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n");
+ kfree(grace_start);
+ return -EINVAL;
+ }
+
+ ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
+ kfree(grace_start);
+ if (!ret)
+ pr_info("NFSD: Using UMH upcall client tracking operations.\n");
+ return ret;
+}
+
+static void
+nfsd4_cltrack_upcall_lock(struct nfs4_client *clp)
+{
+ wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK,
+ TASK_UNINTERRUPTIBLE);
+}
+
+static void
+nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp)
+{
+ smp_mb__before_atomic();
+ clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK);
+}
+
+static void
+nfsd4_umh_cltrack_create(struct nfs4_client *clp)
+{
+ char *hexid, *has_session, *grace_start;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ /*
+ * With v4.0 clients, there's little difference in outcome between a
+ * create and check operation, and we can end up calling into this
+ * function multiple times per client (once for each openowner). So,
+ * for v4.0 clients skip upcalling once the client has been recorded
+ * on stable storage.
+ *
+ * For v4.1+ clients, the outcome of the two operations is different,
+ * so we must ensure that we upcall for the create operation. v4.1+
+ * clients call this on RECLAIM_COMPLETE though, so we should only end
+ * up doing a single create upcall per client.
+ */
+ if (clp->cl_minorversion == 0 &&
+ test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
+ if (!hexid) {
+ dprintk("%s: can't allocate memory for upcall!\n", __func__);
+ return;
+ }
+
+ has_session = nfsd4_cltrack_client_has_session(clp);
+ grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
+
+ nfsd4_cltrack_upcall_lock(clp);
+ if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start))
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ nfsd4_cltrack_upcall_unlock(clp);
+
+ kfree(has_session);
+ kfree(grace_start);
+ kfree(hexid);
+}
+
+static void
+nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
+{
+ char *hexid;
+
+ if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
+ if (!hexid) {
+ dprintk("%s: can't allocate memory for upcall!\n", __func__);
+ return;
+ }
+
+ nfsd4_cltrack_upcall_lock(clp);
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) &&
+ nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0)
+ clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ nfsd4_cltrack_upcall_unlock(clp);
+
+ kfree(hexid);
+}
+
+static int
+nfsd4_umh_cltrack_check(struct nfs4_client *clp)
+{
+ int ret;
+ char *hexid, *has_session, *legacy;
+
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
+ if (!hexid) {
+ dprintk("%s: can't allocate memory for upcall!\n", __func__);
+ return -ENOMEM;
+ }
+
+ has_session = nfsd4_cltrack_client_has_session(clp);
+ legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name);
+
+ nfsd4_cltrack_upcall_lock(clp);
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) {
+ ret = 0;
+ } else {
+ ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy);
+ if (ret == 0)
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+ nfsd4_cltrack_upcall_unlock(clp);
+ kfree(has_session);
+ kfree(legacy);
+ kfree(hexid);
+
+ return ret;
+}
+
+static void
+nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
+{
+ char *legacy;
+ char timestr[22]; /* FIXME: better way to determine max size? */
+
+ sprintf(timestr, "%lld", nn->boot_time);
+ legacy = nfsd4_cltrack_legacy_topdir();
+ nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
+ kfree(legacy);
+}
+
+static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
+ .init = nfsd4_umh_cltrack_init,
+ .exit = NULL,
+ .create = nfsd4_umh_cltrack_create,
+ .remove = nfsd4_umh_cltrack_remove,
+ .check = nfsd4_umh_cltrack_check,
+ .grace_done = nfsd4_umh_cltrack_grace_done,
+ .version = 1,
+ .msglen = 0,
+};
+
+int
+nfsd4_client_tracking_init(struct net *net)
+{
+ int status;
+ struct path path;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ /* just run the init if it the method is already decided */
+ if (nn->client_tracking_ops)
+ goto do_init;
+
+ /* First, try to use nfsdcld */
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+ status = nn->client_tracking_ops->init(net);
+ if (!status)
+ return status;
+ if (status != -ETIMEDOUT) {
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0;
+ status = nn->client_tracking_ops->init(net);
+ if (!status)
+ return status;
+ }
+
+ /*
+ * Next, try the UMH upcall.
+ */
+ nn->client_tracking_ops = &nfsd4_umh_tracking_ops;
+ status = nn->client_tracking_ops->init(net);
+ if (!status)
+ return status;
+
+ /*
+ * Finally, See if the recoverydir exists and is a directory.
+ * If it is, then use the legacy ops.
+ */
+ nn->client_tracking_ops = &nfsd4_legacy_tracking_ops;
+ status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
+ if (!status) {
+ status = d_is_dir(path.dentry);
+ path_put(&path);
+ if (!status) {
+ status = -EINVAL;
+ goto out;
+ }
+ }
+
+do_init:
+ status = nn->client_tracking_ops->init(net);
+out:
+ if (status) {
+ printk(KERN_WARNING "NFSD: Unable to initialize client "
+ "recovery tracking! (%d)\n", status);
+ nn->client_tracking_ops = NULL;
+ }
+ return status;
+}
+
+void
+nfsd4_client_tracking_exit(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ if (nn->client_tracking_ops) {
+ if (nn->client_tracking_ops->exit)
+ nn->client_tracking_ops->exit(net);
+ nn->client_tracking_ops = NULL;
+ }
+}
+
+void
+nfsd4_client_record_create(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ if (nn->client_tracking_ops)
+ nn->client_tracking_ops->create(clp);
+}
+
+void
+nfsd4_client_record_remove(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ if (nn->client_tracking_ops)
+ nn->client_tracking_ops->remove(clp);
+}
+
+int
+nfsd4_client_record_check(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ if (nn->client_tracking_ops)
+ return nn->client_tracking_ops->check(clp);
+
+ return -EOPNOTSUPP;
+}
+
+void
+nfsd4_record_grace_done(struct nfsd_net *nn)
+{
+ if (nn->client_tracking_ops)
+ nn->client_tracking_ops->grace_done(nn);
+}
+
+static int
+rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+ struct super_block *sb = ptr;
+ struct net *net = sb->s_fs_info;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ struct dentry *dentry;
+ int ret = 0;
+
+ if (!try_module_get(THIS_MODULE))
+ return 0;
+
+ if (!cn) {
+ module_put(THIS_MODULE);
+ return 0;
+ }
+
+ switch (event) {
+ case RPC_PIPEFS_MOUNT:
+ dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ break;
+ }
+ cn->cn_pipe->dentry = dentry;
+ break;
+ case RPC_PIPEFS_UMOUNT:
+ if (cn->cn_pipe->dentry)
+ nfsd4_cld_unregister_sb(cn->cn_pipe);
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+static struct notifier_block nfsd4_cld_block = {
+ .notifier_call = rpc_pipefs_event,
+};
+
+int
+register_cld_notifier(void)
+{
+ WARN_ON(!nfsd_net_id);
+ return rpc_pipefs_notifier_register(&nfsd4_cld_block);
+}
+
+void
+unregister_cld_notifier(void)
+{
+ rpc_pipefs_notifier_unregister(&nfsd4_cld_block);
+}