Adding upstream version 4.19.249.upstream/4.19.249

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-06 01:02:30 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-06 01:02:30 +0000
commit: 76cb841cb886eef6b3bee341a2266c76578724ad (patch)
tree: f5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /fs/overlayfs
parent: Initial commit. (diff)
download: linux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz
linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip
13 files changed, 10357 insertions, 0 deletions
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
new file mode 100644
index 000000000..2ef91be2a
--- /dev/null
+++ b/fs/overlayfs/Kconfig
@@ -0,0 +1,124 @@
+config OVERLAY_FS
+	tristate "Overlay filesystem support"
+	select EXPORTFS
+	help
+	  An overlay filesystem combines two filesystems - an 'upper' filesystem
+	  and a 'lower' filesystem.  When a name exists in both filesystems, the
+	  object in the 'upper' filesystem is visible while the object in the
+	  'lower' filesystem is either hidden or, in the case of directories,
+	  merged with the 'upper' object.
+
+	  For more information see Documentation/filesystems/overlayfs.txt
+
+config OVERLAY_FS_REDIRECT_DIR
+	bool "Overlayfs: turn on redirect directory feature by default"
+	depends on OVERLAY_FS
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  redirects when renaming directories by default.  In this case it is
+	  still possible to turn off redirects globally with the
+	  "redirect_dir=off" module option or on a filesystem instance basis
+	  with the "redirect_dir=off" mount option.
+
+	  Note, that redirects are not backward compatible.  That is, mounting
+	  an overlay which has redirects on a kernel that doesn't support this
+	  feature will have unexpected results.
+
+	  If unsure, say N.
+
+config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
+	bool "Overlayfs: follow redirects even if redirects are turned off"
+	default y
+	depends on OVERLAY_FS
+	help
+	  Disable this to get a possibly more secure configuration, but that
+	  might not be backward compatible with previous kernels.
+
+	  If backward compatibility is not an issue, then it is safe and
+	  recommended to say N here.
+
+	  For more information, see Documentation/filesystems/overlayfs.txt
+
+	  If unsure, say Y.
+
+config OVERLAY_FS_INDEX
+	bool "Overlayfs: turn on inodes index feature by default"
+	depends on OVERLAY_FS
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  the index directory to map lower inodes to upper inodes by default.
+	  In this case it is still possible to turn off index globally with the
+	  "index=off" module option or on a filesystem instance basis with the
+	  "index=off" mount option.
+
+	  The inodes index feature prevents breaking of lower hardlinks on copy
+	  up.
+
+	  Note, that the inodes index feature is not backward compatible.
+	  That is, mounting an overlay which has an inodes index on a kernel
+	  that doesn't support this feature will have unexpected results.
+
+	  If unsure, say N.
+
+config OVERLAY_FS_NFS_EXPORT
+	bool "Overlayfs: turn on NFS export feature by default"
+	depends on OVERLAY_FS
+	depends on OVERLAY_FS_INDEX
+	depends on !OVERLAY_FS_METACOPY
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  the index directory to decode overlay NFS file handles by default.
+	  In this case, it is still possible to turn off NFS export support
+	  globally with the "nfs_export=off" module option or on a filesystem
+	  instance basis with the "nfs_export=off" mount option.
+
+	  The NFS export feature creates an index on copy up of every file and
+	  directory.  This full index is used to detect overlay filesystems
+	  inconsistencies on lookup, like redirect from multiple upper dirs to
+	  the same lower dir.  The full index may incur some overhead on mount
+	  time, especially when verifying that directory file handles are not
+	  stale.
+
+	  Note, that the NFS export feature is not backward compatible.
+	  That is, mounting an overlay which has a full index on a kernel
+	  that doesn't support this feature will have unexpected results.
+
+	  Most users should say N here and enable this feature on a case-by-
+	  case basis with the "nfs_export=on" mount option.
+
+	  Say N unless you fully understand the consequences.
+
+config OVERLAY_FS_XINO_AUTO
+	bool "Overlayfs: auto enable inode number mapping"
+	default n
+	depends on OVERLAY_FS
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  unused high bits in undelying filesystem inode numbers to map all
+	  inodes to a unified address space.  The mapped 64bit inode numbers
+	  might not be compatible with applications that expect 32bit inodes.
+
+	  If compatibility with applications that expect 32bit inodes is not an
+	  issue, then it is safe and recommended to say Y here.
+
+	  For more information, see Documentation/filesystems/overlayfs.txt
+
+	  If unsure, say N.
+
+config OVERLAY_FS_METACOPY
+	bool "Overlayfs: turn on metadata only copy up feature by default"
+	depends on OVERLAY_FS
+	select OVERLAY_FS_REDIRECT_DIR
+	help
+	  If this config option is enabled then overlay filesystems will
+	  copy up only metadata where appropriate and data copy up will
+	  happen when a file is opened for WRITE operation. It is still
+	  possible to turn off this feature globally with the "metacopy=off"
+	  module option or on a filesystem instance basis with the
+	  "metacopy=off" mount option.
+
+	  Note, that this feature is not backward compatible.  That is,
+	  mounting an overlay which has metacopy only inodes on a kernel
+	  that doesn't support this feature will have unexpected results.
+
+	  If unsure, say N.
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
new file mode 100644
index 000000000..46e1ff8ac
--- /dev/null
+++ b/fs/overlayfs/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the overlay filesystem.
+#
+
+obj-$(CONFIG_OVERLAY_FS) += overlay.o
+
+overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
+		copy_up.o export.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
new file mode 100644
index 000000000..30abafcd4
--- /dev/null
+++ b/fs/overlayfs/copy_up.c
@@ -0,0 +1,906 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/splice.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+#include <linux/cred.h>
+#include <linux/namei.h>
+#include <linux/fdtable.h>
+#include <linux/ratelimit.h>
+#include <linux/exportfs.h>
+#include "overlayfs.h"
+
+#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
+
+static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
+{
+	pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
+	return 0;
+}
+
+static int ovl_ccup_get(char *buf, const struct kernel_param *param)
+{
+	return sprintf(buf, "N\n");
+}
+
+module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
+MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing");
+
+int ovl_copy_xattr(struct dentry *old, struct dentry *new)
+{
+	ssize_t list_size, size, value_size = 0;
+	char *buf, *name, *value = NULL;
+	int error = 0;
+	size_t slen;
+
+	if (!(old->d_inode->i_opflags & IOP_XATTR) ||
+	    !(new->d_inode->i_opflags & IOP_XATTR))
+		return 0;
+
+	list_size = vfs_listxattr(old, NULL, 0);
+	if (list_size <= 0) {
+		if (list_size == -EOPNOTSUPP)
+			return 0;
+		return list_size;
+	}
+
+	buf = kzalloc(list_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	list_size = vfs_listxattr(old, buf, list_size);
+	if (list_size <= 0) {
+		error = list_size;
+		goto out;
+	}
+
+	for (name = buf; list_size; name += slen) {
+		slen = strnlen(name, list_size) + 1;
+
+		/* underlying fs providing us with an broken xattr list? */
+		if (WARN_ON(slen > list_size)) {
+			error = -EIO;
+			break;
+		}
+		list_size -= slen;
+
+		if (ovl_is_private_xattr(name))
+			continue;
+
+		error = security_inode_copy_up_xattr(name);
+		if (error < 0 && error != -EOPNOTSUPP)
+			break;
+		if (error == 1) {
+			error = 0;
+			continue; /* Discard */
+		}
+retry:
+		size = vfs_getxattr(old, name, value, value_size);
+		if (size == -ERANGE)
+			size = vfs_getxattr(old, name, NULL, 0);
+
+		if (size < 0) {
+			error = size;
+			break;
+		}
+
+		if (size > value_size) {
+			void *new;
+
+			new = krealloc(value, size, GFP_KERNEL);
+			if (!new) {
+				error = -ENOMEM;
+				break;
+			}
+			value = new;
+			value_size = size;
+			goto retry;
+		}
+
+		error = vfs_setxattr(new, name, value, size, 0);
+		if (error)
+			break;
+	}
+	kfree(value);
+out:
+	kfree(buf);
+	return error;
+}
+
+static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
+{
+	struct file *old_file;
+	struct file *new_file;
+	loff_t old_pos = 0;
+	loff_t new_pos = 0;
+	int error = 0;
+
+	if (len == 0)
+		return 0;
+
+	old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
+	if (IS_ERR(old_file))
+		return PTR_ERR(old_file);
+
+	new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
+	if (IS_ERR(new_file)) {
+		error = PTR_ERR(new_file);
+		goto out_fput;
+	}
+
+	/* Try to use clone_file_range to clone up within the same fs */
+	error = do_clone_file_range(old_file, 0, new_file, 0, len);
+	if (!error)
+		goto out;
+	/* Couldn't clone, so now we try to copy the data */
+	error = 0;
+
+	/* FIXME: copy up sparse files efficiently */
+	while (len) {
+		size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
+		long bytes;
+
+		if (len < this_len)
+			this_len = len;
+
+		if (signal_pending_state(TASK_KILLABLE, current)) {
+			error = -EINTR;
+			break;
+		}
+
+		bytes = do_splice_direct(old_file, &old_pos,
+					 new_file, &new_pos,
+					 this_len, SPLICE_F_MOVE);
+		if (bytes <= 0) {
+			error = bytes;
+			break;
+		}
+		WARN_ON(old_pos != new_pos);
+
+		len -= bytes;
+	}
+out:
+	if (!error)
+		error = vfs_fsync(new_file, 0);
+	fput(new_file);
+out_fput:
+	fput(old_file);
+	return error;
+}
+
+static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
+{
+	struct iattr attr = {
+		.ia_valid = ATTR_SIZE,
+		.ia_size = stat->size,
+	};
+
+	return notify_change(upperdentry, &attr, NULL);
+}
+
+static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
+{
+	struct iattr attr = {
+		.ia_valid =
+		     ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
+		.ia_atime = stat->atime,
+		.ia_mtime = stat->mtime,
+	};
+
+	return notify_change(upperdentry, &attr, NULL);
+}
+
+int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
+{
+	int err = 0;
+
+	if (!S_ISLNK(stat->mode)) {
+		struct iattr attr = {
+			.ia_valid = ATTR_MODE,
+			.ia_mode = stat->mode,
+		};
+		err = notify_change(upperdentry, &attr, NULL);
+	}
+	if (!err) {
+		struct iattr attr = {
+			.ia_valid = ATTR_UID | ATTR_GID,
+			.ia_uid = stat->uid,
+			.ia_gid = stat->gid,
+		};
+		err = notify_change(upperdentry, &attr, NULL);
+	}
+	if (!err)
+		ovl_set_timestamps(upperdentry, stat);
+
+	return err;
+}
+
+struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
+{
+	struct ovl_fh *fh;
+	int fh_type, fh_len, dwords;
+	void *buf;
+	int buflen = MAX_HANDLE_SZ;
+	uuid_t *uuid = &real->d_sb->s_uuid;
+
+	buf = kmalloc(buflen, GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * We encode a non-connectable file handle for non-dir, because we
+	 * only need to find the lower inode number and we don't want to pay
+	 * the price or reconnecting the dentry.
+	 */
+	dwords = buflen >> 2;
+	fh_type = exportfs_encode_fh(real, buf, &dwords, 0);
+	buflen = (dwords << 2);
+
+	fh = ERR_PTR(-EIO);
+	if (WARN_ON(fh_type < 0) ||
+	    WARN_ON(buflen > MAX_HANDLE_SZ) ||
+	    WARN_ON(fh_type == FILEID_INVALID))
+		goto out;
+
+	BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
+	fh_len = offsetof(struct ovl_fh, fid) + buflen;
+	fh = kmalloc(fh_len, GFP_KERNEL);
+	if (!fh) {
+		fh = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	fh->version = OVL_FH_VERSION;
+	fh->magic = OVL_FH_MAGIC;
+	fh->type = fh_type;
+	fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+	/*
+	 * When we will want to decode an overlay dentry from this handle
+	 * and all layers are on the same fs, if we get a disconncted real
+	 * dentry when we decode fid, the only way to tell if we should assign
+	 * it to upperdentry or to lowerstack is by checking this flag.
+	 */
+	if (is_upper)
+		fh->flags |= OVL_FH_FLAG_PATH_UPPER;
+	fh->len = fh_len;
+	fh->uuid = *uuid;
+	memcpy(fh->fid, buf, buflen);
+
+out:
+	kfree(buf);
+	return fh;
+}
+
+int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+		   struct dentry *upper)
+{
+	const struct ovl_fh *fh = NULL;
+	int err;
+
+	/*
+	 * When lower layer doesn't support export operations store a 'null' fh,
+	 * so we can use the overlay.origin xattr to distignuish between a copy
+	 * up and a pure upper inode.
+	 */
+	if (ovl_can_decode_fh(lower->d_sb)) {
+		fh = ovl_encode_real_fh(lower, false);
+		if (IS_ERR(fh))
+			return PTR_ERR(fh);
+	}
+
+	/*
+	 * Do not fail when upper doesn't support xattrs.
+	 */
+	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
+				 fh ? fh->len : 0, 0);
+	kfree(fh);
+
+	return err;
+}
+
+/* Store file handle of @upper dir in @index dir entry */
+static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
+{
+	const struct ovl_fh *fh;
+	int err;
+
+	fh = ovl_encode_real_fh(upper, true);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
+	err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0);
+
+	kfree(fh);
+	return err;
+}
+
+/*
+ * Create and install index entry.
+ *
+ * Caller must hold i_mutex on indexdir.
+ */
+static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
+			    struct dentry *upper)
+{
+	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
+	struct inode *dir = d_inode(indexdir);
+	struct dentry *index = NULL;
+	struct dentry *temp = NULL;
+	struct qstr name = { };
+	int err;
+
+	/*
+	 * For now this is only used for creating index entry for directories,
+	 * because non-dir are copied up directly to index and then hardlinked
+	 * to upper dir.
+	 *
+	 * TODO: implement create index for non-dir, so we can call it when
+	 * encoding file handle for non-dir in case index does not exist.
+	 */
+	if (WARN_ON(!d_is_dir(dentry)))
+		return -EIO;
+
+	/* Directory not expected to be indexed before copy up */
+	if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
+		return -EIO;
+
+	err = ovl_get_index_name(origin, &name);
+	if (err)
+		return err;
+
+	temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0));
+	err = PTR_ERR(temp);
+	if (IS_ERR(temp))
+		goto free_name;
+
+	err = ovl_set_upper_fh(upper, temp);
+	if (err)
+		goto out;
+
+	index = lookup_one_len(name.name, indexdir, name.len);
+	if (IS_ERR(index)) {
+		err = PTR_ERR(index);
+	} else {
+		err = ovl_do_rename(dir, temp, dir, index, 0);
+		dput(index);
+	}
+out:
+	if (err)
+		ovl_cleanup(dir, temp);
+	dput(temp);
+free_name:
+	kfree(name.name);
+	return err;
+}
+
+struct ovl_copy_up_ctx {
+	struct dentry *parent;
+	struct dentry *dentry;
+	struct path lowerpath;
+	struct kstat stat;
+	struct kstat pstat;
+	const char *link;
+	struct dentry *destdir;
+	struct qstr destname;
+	struct dentry *workdir;
+	bool tmpfile;
+	bool origin;
+	bool indexed;
+	bool metacopy;
+};
+
+static int ovl_link_up(struct ovl_copy_up_ctx *c)
+{
+	int err;
+	struct dentry *upper;
+	struct dentry *upperdir = ovl_dentry_upper(c->parent);
+	struct inode *udir = d_inode(upperdir);
+
+	/* Mark parent "impure" because it may now contain non-pure upper */
+	err = ovl_set_impure(c->parent, upperdir);
+	if (err)
+		return err;
+
+	err = ovl_set_nlink_lower(c->dentry);
+	if (err)
+		return err;
+
+	inode_lock_nested(udir, I_MUTEX_PARENT);
+	upper = lookup_one_len(c->dentry->d_name.name, upperdir,
+			       c->dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (!IS_ERR(upper)) {
+		err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper);
+		dput(upper);
+
+		if (!err) {
+			/* Restore timestamps on parent (best effort) */
+			ovl_set_timestamps(upperdir, &c->pstat);
+			ovl_dentry_set_upper_alias(c->dentry);
+		}
+	}
+	inode_unlock(udir);
+	if (err)
+		return err;
+
+	err = ovl_set_nlink_upper(c->dentry);
+
+	return err;
+}
+
+static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
+			    struct dentry **newdentry)
+{
+	int err;
+	struct dentry *upper;
+	struct inode *udir = d_inode(c->destdir);
+
+	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+	if (IS_ERR(upper))
+		return PTR_ERR(upper);
+
+	if (c->tmpfile)
+		err = ovl_do_link(temp, udir, upper);
+	else
+		err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
+
+	if (!err)
+		*newdentry = dget(c->tmpfile ? upper : temp);
+	dput(upper);
+
+	return err;
+}
+
+static struct dentry *ovl_get_tmpfile(struct ovl_copy_up_ctx *c)
+{
+	int err;
+	struct dentry *temp;
+	const struct cred *old_creds = NULL;
+	struct cred *new_creds = NULL;
+	struct ovl_cattr cattr = {
+		/* Can't properly set mode on creation because of the umask */
+		.mode = c->stat.mode & S_IFMT,
+		.rdev = c->stat.rdev,
+		.link = c->link
+	};
+
+	err = security_inode_copy_up(c->dentry, &new_creds);
+	temp = ERR_PTR(err);
+	if (err < 0)
+		goto out;
+
+	if (new_creds)
+		old_creds = override_creds(new_creds);
+
+	if (c->tmpfile)
+		temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+	else
+		temp = ovl_create_temp(c->workdir, &cattr);
+out:
+	if (new_creds) {
+		revert_creds(old_creds);
+		put_cred(new_creds);
+	}
+
+	return temp;
+}
+
+static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
+{
+	int err;
+
+	/*
+	 * Copy up data first and then xattrs. Writing data after
+	 * xattrs will remove security.capability xattr automatically.
+	 */
+	if (S_ISREG(c->stat.mode) && !c->metacopy) {
+		struct path upperpath, datapath;
+
+		ovl_path_upper(c->dentry, &upperpath);
+		if (WARN_ON(upperpath.dentry != NULL))
+			return -EIO;
+		upperpath.dentry = temp;
+
+		ovl_path_lowerdata(c->dentry, &datapath);
+		err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+		if (err)
+			return err;
+	}
+
+	err = ovl_copy_xattr(c->lowerpath.dentry, temp);
+	if (err)
+		return err;
+
+	/*
+	 * Store identifier of lower inode in upper inode xattr to
+	 * allow lookup of the copy up origin inode.
+	 *
+	 * Don't set origin when we are breaking the association with a lower
+	 * hard link.
+	 */
+	if (c->origin) {
+		err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
+		if (err)
+			return err;
+	}
+
+	if (c->metacopy) {
+		err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
+					 NULL, 0, -EOPNOTSUPP);
+		if (err)
+			return err;
+	}
+
+	inode_lock(temp->d_inode);
+	if (c->metacopy)
+		err = ovl_set_size(temp, &c->stat);
+	if (!err)
+		err = ovl_set_attr(temp, &c->stat);
+	inode_unlock(temp->d_inode);
+
+	return err;
+}
+
+static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
+{
+	struct inode *udir = c->destdir->d_inode;
+	struct inode *inode;
+	struct dentry *newdentry = NULL;
+	struct dentry *temp;
+	int err;
+
+	temp = ovl_get_tmpfile(c);
+	if (IS_ERR(temp))
+		return PTR_ERR(temp);
+
+	err = ovl_copy_up_inode(c, temp);
+	if (err)
+		goto out;
+
+	if (S_ISDIR(c->stat.mode) && c->indexed) {
+		err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
+		if (err)
+			goto out;
+	}
+
+	if (c->tmpfile) {
+		inode_lock_nested(udir, I_MUTEX_PARENT);
+		err = ovl_install_temp(c, temp, &newdentry);
+		inode_unlock(udir);
+	} else {
+		err = ovl_install_temp(c, temp, &newdentry);
+	}
+	if (err)
+		goto out;
+
+	if (!c->metacopy)
+		ovl_set_upperdata(d_inode(c->dentry));
+	inode = d_inode(c->dentry);
+	ovl_inode_update(inode, newdentry);
+	if (S_ISDIR(inode->i_mode))
+		ovl_set_flag(OVL_WHITEOUTS, inode);
+
+out:
+	if (err && !c->tmpfile)
+		ovl_cleanup(d_inode(c->workdir), temp);
+	dput(temp);
+	return err;
+
+}
+
+/*
+ * Copy up a single dentry
+ *
+ * All renames start with copy up of source if necessary.  The actual
+ * rename will only proceed once the copy up was successful.  Copy up uses
+ * upper parent i_mutex for exclusion.  Since rename can change d_parent it
+ * is possible that the copy up will lock the old parent.  At that point
+ * the file will have already been copied up anyway.
+ */
+static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+{
+	int err;
+	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
+	bool to_index = false;
+
+	/*
+	 * Indexed non-dir is copied up directly to the index entry and then
+	 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
+	 * then index entry is created and then copied up dir installed.
+	 * Copying dir up to indexdir instead of workdir simplifies locking.
+	 */
+	if (ovl_need_index(c->dentry)) {
+		c->indexed = true;
+		if (S_ISDIR(c->stat.mode))
+			c->workdir = ovl_indexdir(c->dentry->d_sb);
+		else
+			to_index = true;
+	}
+
+	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
+		c->origin = true;
+
+	if (to_index) {
+		c->destdir = ovl_indexdir(c->dentry->d_sb);
+		err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
+		if (err)
+			return err;
+	} else if (WARN_ON(!c->parent)) {
+		/* Disconnected dentry must be copied up to index dir */
+		return -EIO;
+	} else {
+		/*
+		 * Mark parent "impure" because it may now contain non-pure
+		 * upper
+		 */
+		err = ovl_set_impure(c->parent, c->destdir);
+		if (err)
+			return err;
+	}
+
+	/* Should we copyup with O_TMPFILE or with workdir? */
+	if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
+		c->tmpfile = true;
+		err = ovl_copy_up_locked(c);
+	} else {
+		err = ovl_lock_rename_workdir(c->workdir, c->destdir);
+		if (!err) {
+			err = ovl_copy_up_locked(c);
+			unlock_rename(c->workdir, c->destdir);
+		}
+	}
+
+
+	if (err)
+		goto out;
+
+	if (c->indexed)
+		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+
+	if (to_index) {
+		/* Initialize nlink for copy up of disconnected dentry */
+		err = ovl_set_nlink_upper(c->dentry);
+	} else {
+		struct inode *udir = d_inode(c->destdir);
+
+		/* Restore timestamps on parent (best effort) */
+		inode_lock(udir);
+		ovl_set_timestamps(c->destdir, &c->pstat);
+		inode_unlock(udir);
+
+		ovl_dentry_set_upper_alias(c->dentry);
+	}
+
+out:
+	if (to_index)
+		kfree(c->destname.name);
+	return err;
+}
+
+static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
+				  int flags)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+	if (!ofs->config.metacopy)
+		return false;
+
+	if (!S_ISREG(mode))
+		return false;
+
+	if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
+		return false;
+
+	return true;
+}
+
+/* Copy up data of an inode which was copied up metadata only in the past. */
+static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
+{
+	struct path upperpath, datapath;
+	int err;
+	char *capability = NULL;
+	ssize_t uninitialized_var(cap_size);
+
+	ovl_path_upper(c->dentry, &upperpath);
+	if (WARN_ON(upperpath.dentry == NULL))
+		return -EIO;
+
+	ovl_path_lowerdata(c->dentry, &datapath);
+	if (WARN_ON(datapath.dentry == NULL))
+		return -EIO;
+
+	if (c->stat.size) {
+		err = cap_size = ovl_getxattr(upperpath.dentry, XATTR_NAME_CAPS,
+					      &capability, 0);
+		if (err < 0 && err != -ENODATA)
+			goto out;
+	}
+
+	err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+	if (err)
+		goto out_free;
+
+	/*
+	 * Writing to upper file will clear security.capability xattr. We
+	 * don't want that to happen for normal copy-up operation.
+	 */
+	if (capability) {
+		err = ovl_do_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
+				      capability, cap_size, 0);
+		if (err)
+			goto out_free;
+	}
+
+
+	err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
+	if (err)
+		goto out_free;
+
+	ovl_set_upperdata(d_inode(c->dentry));
+out_free:
+	kfree(capability);
+out:
+	return err;
+}
+
+static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
+			   int flags)
+{
+	int err;
+	DEFINE_DELAYED_CALL(done);
+	struct path parentpath;
+	struct ovl_copy_up_ctx ctx = {
+		.parent = parent,
+		.dentry = dentry,
+		.workdir = ovl_workdir(dentry),
+	};
+
+	if (WARN_ON(!ctx.workdir))
+		return -EROFS;
+
+	ovl_path_lower(dentry, &ctx.lowerpath);
+	err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
+			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+	if (err)
+		return err;
+
+	ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
+
+	if (parent) {
+		ovl_path_upper(parent, &parentpath);
+		ctx.destdir = parentpath.dentry;
+		ctx.destname = dentry->d_name;
+
+		err = vfs_getattr(&parentpath, &ctx.pstat,
+				  STATX_ATIME | STATX_MTIME,
+				  AT_STATX_SYNC_AS_STAT);
+		if (err)
+			return err;
+	}
+
+	/* maybe truncate regular file. this has no effect on dirs */
+	if (flags & O_TRUNC)
+		ctx.stat.size = 0;
+
+	if (S_ISLNK(ctx.stat.mode)) {
+		ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
+		if (IS_ERR(ctx.link))
+			return PTR_ERR(ctx.link);
+	}
+
+	err = ovl_copy_up_start(dentry, flags);
+	/* err < 0: interrupted, err > 0: raced with another copy-up */
+	if (unlikely(err)) {
+		if (err > 0)
+			err = 0;
+	} else {
+		if (!ovl_dentry_upper(dentry))
+			err = ovl_do_copy_up(&ctx);
+		if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
+			err = ovl_link_up(&ctx);
+		if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
+			err = ovl_copy_up_meta_inode_data(&ctx);
+		ovl_copy_up_end(dentry);
+	}
+	do_delayed_call(&done);
+
+	return err;
+}
+
+int ovl_copy_up_flags(struct dentry *dentry, int flags)
+{
+	int err = 0;
+	const struct cred *old_cred;
+	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
+
+	/*
+	 * With NFS export, copy up can get called for a disconnected non-dir.
+	 * In this case, we will copy up lower inode to index dir without
+	 * linking it to upper dir.
+	 */
+	if (WARN_ON(disconnected && d_is_dir(dentry)))
+		return -EIO;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	while (!err) {
+		struct dentry *next;
+		struct dentry *parent = NULL;
+
+		if (ovl_already_copied_up(dentry, flags))
+			break;
+
+		next = dget(dentry);
+		/* find the topmost dentry not yet copied up */
+		for (; !disconnected;) {
+			parent = dget_parent(next);
+
+			if (ovl_dentry_upper(parent))
+				break;
+
+			dput(next);
+			next = parent;
+		}
+
+		err = ovl_copy_up_one(parent, next, flags);
+
+		dput(parent);
+		dput(next);
+	}
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
+{
+	/* Copy up of disconnected dentry does not set upper alias */
+	if (ovl_already_copied_up(dentry, flags))
+		return false;
+
+	if (special_file(d_inode(dentry)->i_mode))
+		return false;
+
+	if (!ovl_open_flags_need_copy_up(flags))
+		return false;
+
+	return true;
+}
+
+int ovl_maybe_copy_up(struct dentry *dentry, int flags)
+{
+	int err = 0;
+
+	if (ovl_open_need_copy_up(dentry, flags)) {
+		err = ovl_want_write(dentry);
+		if (!err) {
+			err = ovl_copy_up_flags(dentry, flags);
+			ovl_drop_write(dentry);
+		}
+	}
+
+	return err;
+}
+
+int ovl_copy_up_with_data(struct dentry *dentry)
+{
+	return ovl_copy_up_flags(dentry, O_WRONLY);
+}
+
+int ovl_copy_up(struct dentry *dentry)
+{
+	return ovl_copy_up_flags(dentry, 0);
+}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
new file mode 100644
index 000000000..06dc64962
--- /dev/null
+++ b/fs/overlayfs/dir.c
@@ -0,0 +1,1259 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/cred.h>
+#include <linux/module.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/atomic.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+static unsigned short ovl_redirect_max = 256;
+module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
+MODULE_PARM_DESC(ovl_redirect_max,
+		 "Maximum length of absolute redirect xattr value");
+
+static int ovl_set_redirect(struct dentry *dentry, bool samedir);
+
+int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+{
+	int err;
+
+	dget(wdentry);
+	if (d_is_dir(wdentry))
+		err = ovl_do_rmdir(wdir, wdentry);
+	else
+		err = ovl_do_unlink(wdir, wdentry);
+	dput(wdentry);
+
+	if (err) {
+		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
+		       wdentry, err);
+	}
+
+	return err;
+}
+
+static struct dentry *ovl_lookup_temp(struct dentry *workdir)
+{
+	struct dentry *temp;
+	char name[20];
+	static atomic_t temp_id = ATOMIC_INIT(0);
+
+	/* counter is allowed to wrap, since temp dentries are ephemeral */
+	snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
+
+	temp = lookup_one_len(name, workdir, strlen(name));
+	if (!IS_ERR(temp) && temp->d_inode) {
+		pr_err("overlayfs: workdir/%s already exists\n", name);
+		dput(temp);
+		temp = ERR_PTR(-EIO);
+	}
+
+	return temp;
+}
+
+/* caller holds i_mutex on workdir */
+static struct dentry *ovl_whiteout(struct dentry *workdir)
+{
+	int err;
+	struct dentry *whiteout;
+	struct inode *wdir = workdir->d_inode;
+
+	whiteout = ovl_lookup_temp(workdir);
+	if (IS_ERR(whiteout))
+		return whiteout;
+
+	err = ovl_do_whiteout(wdir, whiteout);
+	if (err) {
+		dput(whiteout);
+		whiteout = ERR_PTR(err);
+	}
+
+	return whiteout;
+}
+
+/* Caller must hold i_mutex on both workdir and dir */
+int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+			     struct dentry *dentry)
+{
+	struct inode *wdir = workdir->d_inode;
+	struct dentry *whiteout;
+	int err;
+	int flags = 0;
+
+	whiteout = ovl_whiteout(workdir);
+	err = PTR_ERR(whiteout);
+	if (IS_ERR(whiteout))
+		return err;
+
+	if (d_is_dir(dentry))
+		flags = RENAME_EXCHANGE;
+
+	err = ovl_do_rename(wdir, whiteout, dir, dentry, flags);
+	if (err)
+		goto kill_whiteout;
+	if (flags)
+		ovl_cleanup(wdir, dentry);
+
+out:
+	dput(whiteout);
+	return err;
+
+kill_whiteout:
+	ovl_cleanup(wdir, whiteout);
+	goto out;
+}
+
+int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode)
+{
+	int err;
+	struct dentry *d, *dentry = *newdentry;
+
+	err = ovl_do_mkdir(dir, dentry, mode);
+	if (err)
+		return err;
+
+	if (likely(!d_unhashed(dentry)))
+		return 0;
+
+	/*
+	 * vfs_mkdir() may succeed and leave the dentry passed
+	 * to it unhashed and negative. If that happens, try to
+	 * lookup a new hashed and positive dentry.
+	 */
+	d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
+			   dentry->d_name.len);
+	if (IS_ERR(d)) {
+		pr_warn("overlayfs: failed lookup after mkdir (%pd2, err=%i).\n",
+			dentry, err);
+		return PTR_ERR(d);
+	}
+	dput(dentry);
+	*newdentry = d;
+
+	return 0;
+}
+
+struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
+			       struct ovl_cattr *attr)
+{
+	int err;
+
+	if (IS_ERR(newdentry))
+		return newdentry;
+
+	err = -ESTALE;
+	if (newdentry->d_inode)
+		goto out;
+
+	if (attr->hardlink) {
+		err = ovl_do_link(attr->hardlink, dir, newdentry);
+	} else {
+		switch (attr->mode & S_IFMT) {
+		case S_IFREG:
+			err = ovl_do_create(dir, newdentry, attr->mode);
+			break;
+
+		case S_IFDIR:
+			/* mkdir is special... */
+			err =  ovl_mkdir_real(dir, &newdentry, attr->mode);
+			break;
+
+		case S_IFCHR:
+		case S_IFBLK:
+		case S_IFIFO:
+		case S_IFSOCK:
+			err = ovl_do_mknod(dir, newdentry, attr->mode,
+					   attr->rdev);
+			break;
+
+		case S_IFLNK:
+			err = ovl_do_symlink(dir, newdentry, attr->link);
+			break;
+
+		default:
+			err = -EPERM;
+		}
+	}
+	if (!err && WARN_ON(!newdentry->d_inode)) {
+		/*
+		 * Not quite sure if non-instantiated dentry is legal or not.
+		 * VFS doesn't seem to care so check and warn here.
+		 */
+		err = -EIO;
+	}
+out:
+	if (err) {
+		dput(newdentry);
+		return ERR_PTR(err);
+	}
+	return newdentry;
+}
+
+struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr)
+{
+	return ovl_create_real(d_inode(workdir), ovl_lookup_temp(workdir),
+			       attr);
+}
+
+static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
+			       int xerr)
+{
+	int err;
+
+	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
+	if (!err)
+		ovl_dentry_set_opaque(dentry);
+
+	return err;
+}
+
+static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+{
+	/*
+	 * Fail with -EIO when trying to create opaque dir and upper doesn't
+	 * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
+	 * return a specific error for noxattr case.
+	 */
+	return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
+}
+
+/*
+ * Common operations required to be done after creation of file on upper.
+ * If @hardlink is false, then @inode is a pre-allocated inode, we may or
+ * may not use to instantiate the new dentry.
+ */
+static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
+			   struct dentry *newdentry, bool hardlink)
+{
+	struct ovl_inode_params oip = {
+		.upperdentry = newdentry,
+		.newinode = inode,
+	};
+
+	ovl_dir_modified(dentry->d_parent, false);
+	ovl_dentry_set_upper_alias(dentry);
+	if (!hardlink) {
+		/*
+		 * ovl_obtain_alias() can be called after ovl_create_real()
+		 * and before we get here, so we may get an inode from cache
+		 * with the same real upperdentry that is not the inode we
+		 * pre-allocated.  In this case we will use the cached inode
+		 * to instantiate the new dentry.
+		 *
+		 * XXX: if we ever use ovl_obtain_alias() to decode directory
+		 * file handles, need to use ovl_get_inode_locked() and
+		 * d_instantiate_new() here to prevent from creating two
+		 * hashed directory inode aliases.
+		 */
+		inode = ovl_get_inode(dentry->d_sb, &oip);
+		if (IS_ERR(inode))
+			return PTR_ERR(inode);
+	} else {
+		WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
+		dput(newdentry);
+		inc_nlink(inode);
+	}
+
+	d_instantiate(dentry, inode);
+	if (inode != oip.newinode) {
+		pr_warn_ratelimited("overlayfs: newly created inode found in cache (%pd2)\n",
+				    dentry);
+	}
+
+	/* Force lookup of new upper hardlink to find its lower */
+	if (hardlink)
+		d_drop(dentry);
+
+	return 0;
+}
+
+static bool ovl_type_merge(struct dentry *dentry)
+{
+	return OVL_TYPE_MERGE(ovl_path_type(dentry));
+}
+
+static bool ovl_type_origin(struct dentry *dentry)
+{
+	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
+}
+
+static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
+			    struct ovl_cattr *attr)
+{
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *newdentry;
+	int err;
+
+	if (!attr->hardlink && !IS_POSIXACL(udir))
+		attr->mode &= ~current_umask();
+
+	inode_lock_nested(udir, I_MUTEX_PARENT);
+	newdentry = ovl_create_real(udir,
+				    lookup_one_len(dentry->d_name.name,
+						   upperdir,
+						   dentry->d_name.len),
+				    attr);
+	err = PTR_ERR(newdentry);
+	if (IS_ERR(newdentry))
+		goto out_unlock;
+
+	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
+		/* Setting opaque here is just an optimization, allow to fail */
+		ovl_set_opaque(dentry, newdentry);
+	}
+
+	err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink);
+	if (err)
+		goto out_cleanup;
+out_unlock:
+	inode_unlock(udir);
+	return err;
+
+out_cleanup:
+	ovl_cleanup(udir, newdentry);
+	dput(newdentry);
+	goto out_unlock;
+}
+
+static struct dentry *ovl_clear_empty(struct dentry *dentry,
+				      struct list_head *list)
+{
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct inode *wdir = workdir->d_inode;
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct path upperpath;
+	struct dentry *upper;
+	struct dentry *opaquedir;
+	struct kstat stat;
+	int err;
+
+	if (WARN_ON(!workdir))
+		return ERR_PTR(-EROFS);
+
+	err = ovl_lock_rename_workdir(workdir, upperdir);
+	if (err)
+		goto out;
+
+	ovl_path_upper(dentry, &upperpath);
+	err = vfs_getattr(&upperpath, &stat,
+			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+	if (err)
+		goto out_unlock;
+
+	err = -ESTALE;
+	if (!S_ISDIR(stat.mode))
+		goto out_unlock;
+	upper = upperpath.dentry;
+	if (upper->d_parent->d_inode != udir)
+		goto out_unlock;
+
+	opaquedir = ovl_create_temp(workdir, OVL_CATTR(stat.mode));
+	err = PTR_ERR(opaquedir);
+	if (IS_ERR(opaquedir))
+		goto out_unlock;
+
+	err = ovl_copy_xattr(upper, opaquedir);
+	if (err)
+		goto out_cleanup;
+
+	err = ovl_set_opaque(dentry, opaquedir);
+	if (err)
+		goto out_cleanup;
+
+	inode_lock(opaquedir->d_inode);
+	err = ovl_set_attr(opaquedir, &stat);
+	inode_unlock(opaquedir->d_inode);
+	if (err)
+		goto out_cleanup;
+
+	err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
+	if (err)
+		goto out_cleanup;
+
+	ovl_cleanup_whiteouts(upper, list);
+	ovl_cleanup(wdir, upper);
+	unlock_rename(workdir, upperdir);
+
+	/* dentry's upper doesn't match now, get rid of it */
+	d_drop(dentry);
+
+	return opaquedir;
+
+out_cleanup:
+	ovl_cleanup(wdir, opaquedir);
+	dput(opaquedir);
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out:
+	return ERR_PTR(err);
+}
+
+static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
+			     const struct posix_acl *acl)
+{
+	void *buffer;
+	size_t size;
+	int err;
+
+	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
+		return 0;
+
+	size = posix_acl_to_xattr(NULL, acl, NULL, 0);
+	buffer = kmalloc(size, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+	err = size;
+	if (err < 0)
+		goto out_free;
+
+	err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
+out_free:
+	kfree(buffer);
+	return err;
+}
+
+static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
+				    struct ovl_cattr *cattr)
+{
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct inode *wdir = workdir->d_inode;
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *upper;
+	struct dentry *newdentry;
+	int err;
+	struct posix_acl *acl, *default_acl;
+	bool hardlink = !!cattr->hardlink;
+
+	if (WARN_ON(!workdir))
+		return -EROFS;
+
+	if (!hardlink) {
+		err = posix_acl_create(dentry->d_parent->d_inode,
+				       &cattr->mode, &default_acl, &acl);
+		if (err)
+			return err;
+	}
+
+	err = ovl_lock_rename_workdir(workdir, upperdir);
+	if (err)
+		goto out;
+
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto out_unlock;
+
+	err = -ESTALE;
+	if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
+		goto out_dput;
+
+	newdentry = ovl_create_temp(workdir, cattr);
+	err = PTR_ERR(newdentry);
+	if (IS_ERR(newdentry))
+		goto out_dput;
+
+	/*
+	 * mode could have been mutilated due to umask (e.g. sgid directory)
+	 */
+	if (!hardlink &&
+	    !S_ISLNK(cattr->mode) &&
+	    newdentry->d_inode->i_mode != cattr->mode) {
+		struct iattr attr = {
+			.ia_valid = ATTR_MODE,
+			.ia_mode = cattr->mode,
+		};
+		inode_lock(newdentry->d_inode);
+		err = notify_change(newdentry, &attr, NULL);
+		inode_unlock(newdentry->d_inode);
+		if (err)
+			goto out_cleanup;
+	}
+	if (!hardlink) {
+		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
+					acl);
+		if (err)
+			goto out_cleanup;
+
+		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
+					default_acl);
+		if (err)
+			goto out_cleanup;
+	}
+
+	if (!hardlink && S_ISDIR(cattr->mode)) {
+		err = ovl_set_opaque(dentry, newdentry);
+		if (err)
+			goto out_cleanup;
+
+		err = ovl_do_rename(wdir, newdentry, udir, upper,
+				    RENAME_EXCHANGE);
+		if (err)
+			goto out_cleanup;
+
+		ovl_cleanup(wdir, upper);
+	} else {
+		err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+		if (err)
+			goto out_cleanup;
+	}
+	err = ovl_instantiate(dentry, inode, newdentry, hardlink);
+	if (err) {
+		ovl_cleanup(udir, newdentry);
+		dput(newdentry);
+	}
+out_dput:
+	dput(upper);
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out:
+	if (!hardlink) {
+		posix_acl_release(acl);
+		posix_acl_release(default_acl);
+	}
+	return err;
+
+out_cleanup:
+	ovl_cleanup(wdir, newdentry);
+	dput(newdentry);
+	goto out_dput;
+}
+
+static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
+			      struct ovl_cattr *attr, bool origin)
+{
+	int err;
+	const struct cred *old_cred;
+	struct cred *override_cred;
+	struct dentry *parent = dentry->d_parent;
+
+	err = ovl_copy_up(parent);
+	if (err)
+		return err;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+
+	/*
+	 * When linking a file with copy up origin into a new parent, mark the
+	 * new parent dir "impure".
+	 */
+	if (origin) {
+		err = ovl_set_impure(parent, ovl_dentry_upper(parent));
+		if (err)
+			goto out_revert_creds;
+	}
+
+	err = -ENOMEM;
+	override_cred = prepare_creds();
+	if (override_cred) {
+		override_cred->fsuid = inode->i_uid;
+		override_cred->fsgid = inode->i_gid;
+		if (!attr->hardlink) {
+			err = security_dentry_create_files_as(dentry,
+					attr->mode, &dentry->d_name, old_cred,
+					override_cred);
+			if (err) {
+				put_cred(override_cred);
+				goto out_revert_creds;
+			}
+		}
+		put_cred(override_creds(override_cred));
+		put_cred(override_cred);
+
+		if (!ovl_dentry_is_whiteout(dentry))
+			err = ovl_create_upper(dentry, inode, attr);
+		else
+			err = ovl_create_over_whiteout(dentry, inode, attr);
+	}
+out_revert_creds:
+	revert_creds(old_cred);
+	return err;
+}
+
+static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
+			     const char *link)
+{
+	int err;
+	struct inode *inode;
+	struct ovl_cattr attr = {
+		.rdev = rdev,
+		.link = link,
+	};
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	/* Preallocate inode to be used by ovl_get_inode() */
+	err = -ENOMEM;
+	inode = ovl_new_inode(dentry->d_sb, mode, rdev);
+	if (!inode)
+		goto out_drop_write;
+
+	spin_lock(&inode->i_lock);
+	inode->i_state |= I_CREATING;
+	spin_unlock(&inode->i_lock);
+
+	inode_init_owner(inode, dentry->d_parent->d_inode, mode);
+	attr.mode = inode->i_mode;
+
+	err = ovl_create_or_link(dentry, inode, &attr, false);
+	/* Did we end up using the preallocated inode? */
+	if (inode != d_inode(dentry))
+		iput(inode);
+
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		      bool excl)
+{
+	return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
+}
+
+static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
+}
+
+static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+		     dev_t rdev)
+{
+	/* Don't allow creation of "whiteout" on overlay */
+	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
+		return -EPERM;
+
+	return ovl_create_object(dentry, mode, rdev, NULL);
+}
+
+static int ovl_symlink(struct inode *dir, struct dentry *dentry,
+		       const char *link)
+{
+	return ovl_create_object(dentry, S_IFLNK, 0, link);
+}
+
+static int ovl_set_link_redirect(struct dentry *dentry)
+{
+	const struct cred *old_cred;
+	int err;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	err = ovl_set_redirect(dentry, false);
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static int ovl_link(struct dentry *old, struct inode *newdir,
+		    struct dentry *new)
+{
+	int err;
+	bool locked = false;
+	struct inode *inode;
+
+	err = ovl_want_write(old);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(old);
+	if (err)
+		goto out_drop_write;
+
+	err = ovl_copy_up(new->d_parent);
+	if (err)
+		goto out_drop_write;
+
+	if (ovl_is_metacopy_dentry(old)) {
+		err = ovl_set_link_redirect(old);
+		if (err)
+			goto out_drop_write;
+	}
+
+	err = ovl_nlink_start(old, &locked);
+	if (err)
+		goto out_drop_write;
+
+	inode = d_inode(old);
+	ihold(inode);
+
+	err = ovl_create_or_link(new, inode,
+			&(struct ovl_cattr) {.hardlink = ovl_dentry_upper(old)},
+			ovl_type_origin(old));
+	if (err)
+		iput(inode);
+
+	ovl_nlink_end(old, locked);
+out_drop_write:
+	ovl_drop_write(old);
+out:
+	return err;
+}
+
+static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
+{
+	return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
+}
+
+static int ovl_remove_and_whiteout(struct dentry *dentry,
+				   struct list_head *list)
+{
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct dentry *upper;
+	struct dentry *opaquedir = NULL;
+	int err;
+
+	if (WARN_ON(!workdir))
+		return -EROFS;
+
+	if (!list_empty(list)) {
+		opaquedir = ovl_clear_empty(dentry, list);
+		err = PTR_ERR(opaquedir);
+		if (IS_ERR(opaquedir))
+			goto out;
+	}
+
+	err = ovl_lock_rename_workdir(workdir, upperdir);
+	if (err)
+		goto out_dput;
+
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto out_unlock;
+
+	err = -ESTALE;
+	if ((opaquedir && upper != opaquedir) ||
+	    (!opaquedir && ovl_dentry_upper(dentry) &&
+	     !ovl_matches_upper(dentry, upper))) {
+		goto out_dput_upper;
+	}
+
+	err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
+	if (err)
+		goto out_d_drop;
+
+	ovl_dir_modified(dentry->d_parent, true);
+out_d_drop:
+	d_drop(dentry);
+out_dput_upper:
+	dput(upper);
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out_dput:
+	dput(opaquedir);
+out:
+	return err;
+}
+
+static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
+			    struct list_head *list)
+{
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *dir = upperdir->d_inode;
+	struct dentry *upper;
+	struct dentry *opaquedir = NULL;
+	int err;
+
+	if (!list_empty(list)) {
+		opaquedir = ovl_clear_empty(dentry, list);
+		err = PTR_ERR(opaquedir);
+		if (IS_ERR(opaquedir))
+			goto out;
+	}
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto out_unlock;
+
+	err = -ESTALE;
+	if ((opaquedir && upper != opaquedir) ||
+	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
+		goto out_dput_upper;
+
+	if (is_dir)
+		err = vfs_rmdir(dir, upper);
+	else
+		err = vfs_unlink(dir, upper, NULL);
+	ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
+
+	/*
+	 * Keeping this dentry hashed would mean having to release
+	 * upperpath/lowerpath, which could only be done if we are the
+	 * sole user of this dentry.  Too tricky...  Just unhash for
+	 * now.
+	 */
+	if (!err)
+		d_drop(dentry);
+out_dput_upper:
+	dput(upper);
+out_unlock:
+	inode_unlock(dir);
+	dput(opaquedir);
+out:
+	return err;
+}
+
+static bool ovl_pure_upper(struct dentry *dentry)
+{
+	return !ovl_dentry_lower(dentry) &&
+	       !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
+}
+
+static int ovl_do_remove(struct dentry *dentry, bool is_dir)
+{
+	int err;
+	bool locked = false;
+	const struct cred *old_cred;
+	struct dentry *upperdentry;
+	bool lower_positive = ovl_lower_positive(dentry);
+	LIST_HEAD(list);
+
+	/* No need to clean pure upper removed by vfs_rmdir() */
+	if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) {
+		err = ovl_check_empty_dir(dentry, &list);
+		if (err)
+			goto out;
+	}
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(dentry->d_parent);
+	if (err)
+		goto out_drop_write;
+
+	err = ovl_nlink_start(dentry, &locked);
+	if (err)
+		goto out_drop_write;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	if (!lower_positive)
+		err = ovl_remove_upper(dentry, is_dir, &list);
+	else
+		err = ovl_remove_and_whiteout(dentry, &list);
+	revert_creds(old_cred);
+	if (!err) {
+		if (is_dir)
+			clear_nlink(dentry->d_inode);
+		else
+			drop_nlink(dentry->d_inode);
+	}
+	ovl_nlink_end(dentry, locked);
+
+	/*
+	 * Copy ctime
+	 *
+	 * Note: we fail to update ctime if there was no copy-up, only a
+	 * whiteout
+	 */
+	upperdentry = ovl_dentry_upper(dentry);
+	if (upperdentry)
+		ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
+
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	ovl_cache_free(&list);
+	return err;
+}
+
+static int ovl_unlink(struct inode *dir, struct dentry *dentry)
+{
+	return ovl_do_remove(dentry, false);
+}
+
+static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	return ovl_do_remove(dentry, true);
+}
+
+static bool ovl_type_merge_or_lower(struct dentry *dentry)
+{
+	enum ovl_path_type type = ovl_path_type(dentry);
+
+	return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
+}
+
+static bool ovl_can_move(struct dentry *dentry)
+{
+	return ovl_redirect_dir(dentry->d_sb) ||
+		!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
+}
+
+static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
+{
+	char *buf, *ret;
+	struct dentry *d, *tmp;
+	int buflen = ovl_redirect_max + 1;
+
+	if (!abs_redirect) {
+		ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
+			       GFP_KERNEL);
+		goto out;
+	}
+
+	buf = ret = kmalloc(buflen, GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	buflen--;
+	buf[buflen] = '\0';
+	for (d = dget(dentry); !IS_ROOT(d);) {
+		const char *name;
+		int thislen;
+
+		spin_lock(&d->d_lock);
+		name = ovl_dentry_get_redirect(d);
+		if (name) {
+			thislen = strlen(name);
+		} else {
+			name = d->d_name.name;
+			thislen = d->d_name.len;
+		}
+
+		/* If path is too long, fall back to userspace move */
+		if (thislen + (name[0] != '/') > buflen) {
+			ret = ERR_PTR(-EXDEV);
+			spin_unlock(&d->d_lock);
+			goto out_put;
+		}
+
+		buflen -= thislen;
+		memcpy(&buf[buflen], name, thislen);
+		spin_unlock(&d->d_lock);
+		tmp = dget_parent(d);
+
+		dput(d);
+		d = tmp;
+
+		/* Absolute redirect: finished */
+		if (buf[buflen] == '/')
+			break;
+		buflen--;
+		buf[buflen] = '/';
+	}
+	ret = kstrdup(&buf[buflen], GFP_KERNEL);
+out_put:
+	dput(d);
+	kfree(buf);
+out:
+	return ret ? ret : ERR_PTR(-ENOMEM);
+}
+
+static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
+{
+	struct dentry *lowerdentry;
+
+	if (!samedir)
+		return true;
+
+	if (d_is_dir(dentry))
+		return false;
+
+	/*
+	 * For non-dir hardlinked files, we need absolute redirects
+	 * in general as two upper hardlinks could be in different
+	 * dirs. We could put a relative redirect now and convert
+	 * it to absolute redirect later. But when nlink > 1 and
+	 * indexing is on, that means relative redirect needs to be
+	 * converted to absolute during copy up of another lower
+	 * hardllink as well.
+	 *
+	 * So without optimizing too much, just check if lower is
+	 * a hard link or not. If lower is hard link, put absolute
+	 * redirect.
+	 */
+	lowerdentry = ovl_dentry_lower(dentry);
+	return (d_inode(lowerdentry)->i_nlink > 1);
+}
+
+static int ovl_set_redirect(struct dentry *dentry, bool samedir)
+{
+	int err;
+	const char *redirect = ovl_dentry_get_redirect(dentry);
+	bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
+
+	if (redirect && (!absolute_redirect || redirect[0] == '/'))
+		return 0;
+
+	redirect = ovl_get_redirect(dentry, absolute_redirect);
+	if (IS_ERR(redirect))
+		return PTR_ERR(redirect);
+
+	err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
+				 OVL_XATTR_REDIRECT,
+				 redirect, strlen(redirect), -EXDEV);
+	if (!err) {
+		spin_lock(&dentry->d_lock);
+		ovl_dentry_set_redirect(dentry, redirect);
+		spin_unlock(&dentry->d_lock);
+	} else {
+		kfree(redirect);
+		pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
+				    err);
+		/* Fall back to userspace copy-up */
+		err = -EXDEV;
+	}
+	return err;
+}
+
+static int ovl_rename(struct inode *olddir, struct dentry *old,
+		      struct inode *newdir, struct dentry *new,
+		      unsigned int flags)
+{
+	int err;
+	bool locked = false;
+	struct dentry *old_upperdir;
+	struct dentry *new_upperdir;
+	struct dentry *olddentry;
+	struct dentry *newdentry;
+	struct dentry *trap;
+	bool old_opaque;
+	bool new_opaque;
+	bool cleanup_whiteout = false;
+	bool overwrite = !(flags & RENAME_EXCHANGE);
+	bool is_dir = d_is_dir(old);
+	bool new_is_dir = d_is_dir(new);
+	bool samedir = olddir == newdir;
+	struct dentry *opaquedir = NULL;
+	const struct cred *old_cred = NULL;
+	LIST_HEAD(list);
+
+	err = -EINVAL;
+	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
+		goto out;
+
+	flags &= ~RENAME_NOREPLACE;
+
+	/* Don't copy up directory trees */
+	err = -EXDEV;
+	if (!ovl_can_move(old))
+		goto out;
+	if (!overwrite && !ovl_can_move(new))
+		goto out;
+
+	if (overwrite && new_is_dir && !ovl_pure_upper(new)) {
+		err = ovl_check_empty_dir(new, &list);
+		if (err)
+			goto out;
+	}
+
+	if (overwrite) {
+		if (ovl_lower_positive(old)) {
+			if (!ovl_dentry_is_whiteout(new)) {
+				/* Whiteout source */
+				flags |= RENAME_WHITEOUT;
+			} else {
+				/* Switch whiteouts */
+				flags |= RENAME_EXCHANGE;
+			}
+		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
+			flags |= RENAME_EXCHANGE;
+			cleanup_whiteout = true;
+		}
+	}
+
+	err = ovl_want_write(old);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(old);
+	if (err)
+		goto out_drop_write;
+
+	err = ovl_copy_up(new->d_parent);
+	if (err)
+		goto out_drop_write;
+	if (!overwrite) {
+		err = ovl_copy_up(new);
+		if (err)
+			goto out_drop_write;
+	} else {
+		err = ovl_nlink_start(new, &locked);
+		if (err)
+			goto out_drop_write;
+	}
+
+	old_cred = ovl_override_creds(old->d_sb);
+
+	if (!list_empty(&list)) {
+		opaquedir = ovl_clear_empty(new, &list);
+		err = PTR_ERR(opaquedir);
+		if (IS_ERR(opaquedir)) {
+			opaquedir = NULL;
+			goto out_revert_creds;
+		}
+	}
+
+	old_upperdir = ovl_dentry_upper(old->d_parent);
+	new_upperdir = ovl_dentry_upper(new->d_parent);
+
+	if (!samedir) {
+		/*
+		 * When moving a merge dir or non-dir with copy up origin into
+		 * a new parent, we are marking the new parent dir "impure".
+		 * When ovl_iterate() iterates an "impure" upper dir, it will
+		 * lookup the origin inodes of the entries to fill d_ino.
+		 */
+		if (ovl_type_origin(old)) {
+			err = ovl_set_impure(new->d_parent, new_upperdir);
+			if (err)
+				goto out_revert_creds;
+		}
+		if (!overwrite && ovl_type_origin(new)) {
+			err = ovl_set_impure(old->d_parent, old_upperdir);
+			if (err)
+				goto out_revert_creds;
+		}
+	}
+
+	trap = lock_rename(new_upperdir, old_upperdir);
+
+	olddentry = lookup_one_len(old->d_name.name, old_upperdir,
+				   old->d_name.len);
+	err = PTR_ERR(olddentry);
+	if (IS_ERR(olddentry))
+		goto out_unlock;
+
+	err = -ESTALE;
+	if (!ovl_matches_upper(old, olddentry))
+		goto out_dput_old;
+
+	newdentry = lookup_one_len(new->d_name.name, new_upperdir,
+				   new->d_name.len);
+	err = PTR_ERR(newdentry);
+	if (IS_ERR(newdentry))
+		goto out_dput_old;
+
+	old_opaque = ovl_dentry_is_opaque(old);
+	new_opaque = ovl_dentry_is_opaque(new);
+
+	err = -ESTALE;
+	if (d_inode(new) && ovl_dentry_upper(new)) {
+		if (opaquedir) {
+			if (newdentry != opaquedir)
+				goto out_dput;
+		} else {
+			if (!ovl_matches_upper(new, newdentry))
+				goto out_dput;
+		}
+	} else {
+		if (!d_is_negative(newdentry)) {
+			if (!new_opaque || !ovl_is_whiteout(newdentry))
+				goto out_dput;
+		} else {
+			if (flags & RENAME_EXCHANGE)
+				goto out_dput;
+		}
+	}
+
+	if (olddentry == trap)
+		goto out_dput;
+	if (newdentry == trap)
+		goto out_dput;
+
+	if (olddentry->d_inode == newdentry->d_inode)
+		goto out_dput;
+
+	err = 0;
+	if (ovl_type_merge_or_lower(old))
+		err = ovl_set_redirect(old, samedir);
+	else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
+		err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
+	if (err)
+		goto out_dput;
+
+	if (!overwrite && ovl_type_merge_or_lower(new))
+		err = ovl_set_redirect(new, samedir);
+	else if (!overwrite && new_is_dir && !new_opaque &&
+		 ovl_type_merge(old->d_parent))
+		err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
+	if (err)
+		goto out_dput;
+
+	err = ovl_do_rename(old_upperdir->d_inode, olddentry,
+			    new_upperdir->d_inode, newdentry, flags);
+	if (err)
+		goto out_dput;
+
+	if (cleanup_whiteout)
+		ovl_cleanup(old_upperdir->d_inode, newdentry);
+
+	if (overwrite && d_inode(new)) {
+		if (new_is_dir)
+			clear_nlink(d_inode(new));
+		else
+			drop_nlink(d_inode(new));
+	}
+
+	ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
+			 (!overwrite && ovl_type_origin(new)));
+	ovl_dir_modified(new->d_parent, ovl_type_origin(old) ||
+			 (d_inode(new) && ovl_type_origin(new)));
+
+	/* copy ctime: */
+	ovl_copyattr(d_inode(olddentry), d_inode(old));
+	if (d_inode(new) && ovl_dentry_upper(new))
+		ovl_copyattr(d_inode(newdentry), d_inode(new));
+
+out_dput:
+	dput(newdentry);
+out_dput_old:
+	dput(olddentry);
+out_unlock:
+	unlock_rename(new_upperdir, old_upperdir);
+out_revert_creds:
+	revert_creds(old_cred);
+	ovl_nlink_end(new, locked);
+out_drop_write:
+	ovl_drop_write(old);
+out:
+	dput(opaquedir);
+	ovl_cache_free(&list);
+	return err;
+}
+
+const struct inode_operations ovl_dir_inode_operations = {
+	.lookup		= ovl_lookup,
+	.mkdir		= ovl_mkdir,
+	.symlink	= ovl_symlink,
+	.unlink		= ovl_unlink,
+	.rmdir		= ovl_rmdir,
+	.rename		= ovl_rename,
+	.link		= ovl_link,
+	.setattr	= ovl_setattr,
+	.create		= ovl_create,
+	.mknod		= ovl_mknod,
+	.permission	= ovl_permission,
+	.getattr	= ovl_getattr,
+	.listxattr	= ovl_listxattr,
+	.get_acl	= ovl_get_acl,
+	.update_time	= ovl_update_time,
+};
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
new file mode 100644
index 000000000..ba6c7c592
--- /dev/null
+++ b/fs/overlayfs/export.c
@@ -0,0 +1,850 @@
+/*
+ * Overlayfs NFS export support.
+ *
+ * Amir Goldstein <amir73il@gmail.com>
+ *
+ * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/cred.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+static int ovl_encode_maybe_copy_up(struct dentry *dentry)
+{
+	int err;
+
+	if (ovl_dentry_upper(dentry))
+		return 0;
+
+	err = ovl_want_write(dentry);
+	if (!err) {
+		err = ovl_copy_up(dentry);
+		ovl_drop_write(dentry);
+	}
+
+	if (err) {
+		pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n",
+				    dentry, err);
+	}
+
+	return err;
+}
+
+/*
+ * Before encoding a non-upper directory file handle from real layer N, we need
+ * to check if it will be possible to reconnect an overlay dentry from the real
+ * lower decoded dentry. This is done by following the overlay ancestry up to a
+ * "layer N connected" ancestor and verifying that all parents along the way are
+ * "layer N connectable". If an ancestor that is NOT "layer N connectable" is
+ * found, we need to copy up an ancestor, which is "layer N connectable", thus
+ * making that ancestor "layer N connected". For example:
+ *
+ * layer 1: /a
+ * layer 2: /a/b/c
+ *
+ * The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is
+ * copied up and renamed, upper dir /a will be indexed by lower dir /a from
+ * layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*)
+ * in ovl_lookup_real_ancestor() will not be able to lookup a connected overlay
+ * dentry from the connected lower dentry /a/b/c.
+ *
+ * To avoid this problem on decode time, we need to copy up an ancestor of
+ * /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is
+ * /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected"
+ * and when the time comes to decode the file handle from lower dentry /a/b/c,
+ * ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding
+ * a connected overlay dentry will be accomplished.
+ *
+ * (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an
+ * entry /a in the lower layers above layer N and find the indexed dir /a from
+ * layer 1. If that improvement is made, then the check for "layer N connected"
+ * will need to verify there are no redirects in lower layers above N. In the
+ * example above, /a will be "layer 2 connectable". However, if layer 2 dir /a
+ * is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable":
+ *
+ * layer 1: /A (redirect = /a)
+ * layer 2: /a/b/c
+ */
+
+/* Return the lowest layer for encoding a connectable file handle */
+static int ovl_connectable_layer(struct dentry *dentry)
+{
+	struct ovl_entry *oe = OVL_E(dentry);
+
+	/* We can get overlay root from root of any layer */
+	if (dentry == dentry->d_sb->s_root)
+		return oe->numlower;
+
+	/*
+	 * If it's an unindexed merge dir, then it's not connectable with any
+	 * lower layer
+	 */
+	if (ovl_dentry_upper(dentry) &&
+	    !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		return 0;
+
+	/* We can get upper/overlay path from indexed/lower dentry */
+	return oe->lowerstack[0].layer->idx;
+}
+
+/*
+ * @dentry is "connected" if all ancestors up to root or a "connected" ancestor
+ * have the same uppermost lower layer as the origin's layer. We may need to
+ * copy up a "connectable" ancestor to make it "connected". A "connected" dentry
+ * cannot become non "connected", so cache positive result in dentry flags.
+ *
+ * Return the connected origin layer or < 0 on error.
+ */
+static int ovl_connect_layer(struct dentry *dentry)
+{
+	struct dentry *next, *parent = NULL;
+	int origin_layer;
+	int err = 0;
+
+	if (WARN_ON(dentry == dentry->d_sb->s_root) ||
+	    WARN_ON(!ovl_dentry_lower(dentry)))
+		return -EIO;
+
+	origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx;
+	if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry))
+		return origin_layer;
+
+	/* Find the topmost origin layer connectable ancestor of @dentry */
+	next = dget(dentry);
+	for (;;) {
+		parent = dget_parent(next);
+		if (WARN_ON(parent == next)) {
+			err = -EIO;
+			break;
+		}
+
+		/*
+		 * If @parent is not origin layer connectable, then copy up
+		 * @next which is origin layer connectable and we are done.
+		 */
+		if (ovl_connectable_layer(parent) < origin_layer) {
+			err = ovl_encode_maybe_copy_up(next);
+			break;
+		}
+
+		/* If @parent is connected or indexed we are done */
+		if (ovl_dentry_test_flag(OVL_E_CONNECTED, parent) ||
+		    ovl_test_flag(OVL_INDEX, d_inode(parent)))
+			break;
+
+		dput(next);
+		next = parent;
+	}
+
+	dput(parent);
+	dput(next);
+
+	if (!err)
+		ovl_dentry_set_flag(OVL_E_CONNECTED, dentry);
+
+	return err ?: origin_layer;
+}
+
+/*
+ * We only need to encode origin if there is a chance that the same object was
+ * encoded pre copy up and then we need to stay consistent with the same
+ * encoding also after copy up. If non-pure upper is not indexed, then it was
+ * copied up before NFS export was enabled. In that case we don't need to worry
+ * about staying consistent with pre copy up encoding and we encode an upper
+ * file handle. Overlay root dentry is a private case of non-indexed upper.
+ *
+ * The following table summarizes the different file handle encodings used for
+ * different overlay object types:
+ *
+ *  Object type		| Encoding
+ * --------------------------------
+ *  Pure upper		| U
+ *  Non-indexed upper	| U
+ *  Indexed upper	| L (*)
+ *  Non-upper		| L (*)
+ *
+ * U = upper file handle
+ * L = lower file handle
+ *
+ * (*) Connecting an overlay dir from real lower dentry is not always
+ * possible when there are redirects in lower layers and non-indexed merge dirs.
+ * To mitigate those case, we may copy up the lower dir ancestor before encode
+ * a lower dir file handle.
+ *
+ * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
+ */
+static int ovl_check_encode_origin(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+	/* Upper file handle for pure upper */
+	if (!ovl_dentry_lower(dentry))
+		return 0;
+
+	/*
+	 * Upper file handle for non-indexed upper.
+	 *
+	 * Root is never indexed, so if there's an upper layer, encode upper for
+	 * root.
+	 */
+	if (ovl_dentry_upper(dentry) &&
+	    !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		return 0;
+
+	/*
+	 * Decoding a merge dir, whose origin's ancestor is under a redirected
+	 * lower dir or under a non-indexed upper is not always possible.
+	 * ovl_connect_layer() will try to make origin's layer "connected" by
+	 * copying up a "connectable" ancestor.
+	 */
+	if (d_is_dir(dentry) && ofs->upper_mnt)
+		return ovl_connect_layer(dentry);
+
+	/* Lower file handle for indexed and non-upper dir/non-dir */
+	return 1;
+}
+
+static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
+{
+	struct ovl_fh *fh = NULL;
+	int err, enc_lower;
+
+	/*
+	 * Check if we should encode a lower or upper file handle and maybe
+	 * copy up an ancestor to make lower file handle connectable.
+	 */
+	err = enc_lower = ovl_check_encode_origin(dentry);
+	if (enc_lower < 0)
+		goto fail;
+
+	/* Encode an upper or lower file handle */
+	fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
+				ovl_dentry_upper(dentry), !enc_lower);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
+	err = -EOVERFLOW;
+	if (fh->len > buflen)
+		goto fail;
+
+	memcpy(buf, (char *)fh, fh->len);
+	err = fh->len;
+
+out:
+	kfree(fh);
+	return err;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
+			    dentry, err, buflen, fh ? (int)fh->len : 0,
+			    fh ? fh->type : 0);
+	goto out;
+}
+
+static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
+{
+	int res, len = *max_len << 2;
+
+	res = ovl_d_to_fh(dentry, (char *)fid, len);
+	if (res <= 0)
+		return FILEID_INVALID;
+
+	len = res;
+
+	/* Round up to dwords */
+	*max_len = (len + 3) >> 2;
+	return OVL_FILEID;
+}
+
+static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
+			 struct inode *parent)
+{
+	struct dentry *dentry;
+	int type;
+
+	/* TODO: encode connectable file handles */
+	if (parent)
+		return FILEID_INVALID;
+
+	dentry = d_find_any_alias(inode);
+	if (WARN_ON(!dentry))
+		return FILEID_INVALID;
+
+	type = ovl_dentry_to_fh(dentry, fid, max_len);
+
+	dput(dentry);
+	return type;
+}
+
+/*
+ * Find or instantiate an overlay dentry from real dentries and index.
+ */
+static struct dentry *ovl_obtain_alias(struct super_block *sb,
+				       struct dentry *upper_alias,
+				       struct ovl_path *lowerpath,
+				       struct dentry *index)
+{
+	struct dentry *lower = lowerpath ? lowerpath->dentry : NULL;
+	struct dentry *upper = upper_alias ?: index;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct ovl_entry *oe;
+	struct ovl_inode_params oip = {
+		.lowerpath = lowerpath,
+		.index = index,
+		.numlower = !!lower
+	};
+
+	/* We get overlay directory dentries with ovl_lookup_real() */
+	if (d_is_dir(upper ?: lower))
+		return ERR_PTR(-EIO);
+
+	oip.upperdentry = dget(upper);
+	inode = ovl_get_inode(sb, &oip);
+	if (IS_ERR(inode)) {
+		dput(upper);
+		return ERR_CAST(inode);
+	}
+
+	if (upper)
+		ovl_set_flag(OVL_UPPERDATA, inode);
+
+	dentry = d_find_any_alias(inode);
+	if (!dentry) {
+		dentry = d_alloc_anon(inode->i_sb);
+		if (!dentry)
+			goto nomem;
+		oe = ovl_alloc_entry(lower ? 1 : 0);
+		if (!oe)
+			goto nomem;
+
+		if (lower) {
+			oe->lowerstack->dentry = dget(lower);
+			oe->lowerstack->layer = lowerpath->layer;
+		}
+		dentry->d_fsdata = oe;
+		if (upper_alias)
+			ovl_dentry_set_upper_alias(dentry);
+	}
+
+	return d_instantiate_anon(dentry, inode);
+
+nomem:
+	iput(inode);
+	dput(dentry);
+	return ERR_PTR(-ENOMEM);
+}
+
+/* Get the upper or lower dentry in stach whose on layer @idx */
+static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	int i;
+
+	if (!idx)
+		return ovl_dentry_upper(dentry);
+
+	for (i = 0; i < oe->numlower; i++) {
+		if (oe->lowerstack[i].layer->idx == idx)
+			return oe->lowerstack[i].dentry;
+	}
+
+	return NULL;
+}
+
+/*
+ * Lookup a child overlay dentry to get a connected overlay dentry whose real
+ * dentry is @real. If @real is on upper layer, we lookup a child overlay
+ * dentry with the same name as the real dentry. Otherwise, we need to consult
+ * index for lookup.
+ */
+static struct dentry *ovl_lookup_real_one(struct dentry *connected,
+					  struct dentry *real,
+					  struct ovl_layer *layer)
+{
+	struct inode *dir = d_inode(connected);
+	struct dentry *this, *parent = NULL;
+	struct name_snapshot name;
+	int err;
+
+	/*
+	 * Lookup child overlay dentry by real name. The dir mutex protects us
+	 * from racing with overlay rename. If the overlay dentry that is above
+	 * real has already been moved to a parent that is not under the
+	 * connected overlay dir, we return -ECHILD and restart the lookup of
+	 * connected real path from the top.
+	 */
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	err = -ECHILD;
+	parent = dget_parent(real);
+	if (ovl_dentry_real_at(connected, layer->idx) != parent)
+		goto fail;
+
+	/*
+	 * We also need to take a snapshot of real dentry name to protect us
+	 * from racing with underlying layer rename. In this case, we don't
+	 * care about returning ESTALE, only from dereferencing a free name
+	 * pointer because we hold no lock on the real dentry.
+	 */
+	take_dentry_name_snapshot(&name, real);
+	this = lookup_one_len(name.name, connected, strlen(name.name));
+	err = PTR_ERR(this);
+	if (IS_ERR(this)) {
+		goto fail;
+	} else if (!this || !this->d_inode) {
+		dput(this);
+		err = -ENOENT;
+		goto fail;
+	} else if (ovl_dentry_real_at(this, layer->idx) != real) {
+		dput(this);
+		err = -ESTALE;
+		goto fail;
+	}
+
+out:
+	release_dentry_name_snapshot(&name);
+	dput(parent);
+	inode_unlock(dir);
+	return this;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+			    real, layer->idx, connected, err);
+	this = ERR_PTR(err);
+	goto out;
+}
+
+static struct dentry *ovl_lookup_real(struct super_block *sb,
+				      struct dentry *real,
+				      struct ovl_layer *layer);
+
+/*
+ * Lookup an indexed or hashed overlay dentry by real inode.
+ */
+static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
+					    struct dentry *real,
+					    struct ovl_layer *layer)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
+	struct dentry *index = NULL;
+	struct dentry *this = NULL;
+	struct inode *inode;
+
+	/*
+	 * Decoding upper dir from index is expensive, so first try to lookup
+	 * overlay dentry in inode/dcache.
+	 */
+	inode = ovl_lookup_inode(sb, real, !layer->idx);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	if (inode) {
+		this = d_find_any_alias(inode);
+		iput(inode);
+	}
+
+	/*
+	 * For decoded lower dir file handle, lookup index by origin to check
+	 * if lower dir was copied up and and/or removed.
+	 */
+	if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) {
+		index = ovl_lookup_index(ofs, NULL, real, false);
+		if (IS_ERR(index))
+			return index;
+	}
+
+	/* Get connected upper overlay dir from index */
+	if (index) {
+		struct dentry *upper = ovl_index_upper(ofs, index);
+
+		dput(index);
+		if (IS_ERR_OR_NULL(upper))
+			return upper;
+
+		/*
+		 * ovl_lookup_real() in lower layer may call recursively once to
+		 * ovl_lookup_real() in upper layer. The first level call walks
+		 * back lower parents to the topmost indexed parent. The second
+		 * recursive call walks back from indexed upper to the topmost
+		 * connected/hashed upper parent (or up to root).
+		 */
+		this = ovl_lookup_real(sb, upper, &upper_layer);
+		dput(upper);
+	}
+
+	if (IS_ERR_OR_NULL(this))
+		return this;
+
+	if (ovl_dentry_real_at(this, layer->idx) != real) {
+		dput(this);
+		this = ERR_PTR(-EIO);
+	}
+
+	return this;
+}
+
+/*
+ * Lookup an indexed or hashed overlay dentry, whose real dentry is an
+ * ancestor of @real.
+ */
+static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
+					       struct dentry *real,
+					       struct ovl_layer *layer)
+{
+	struct dentry *next, *parent = NULL;
+	struct dentry *ancestor = ERR_PTR(-EIO);
+
+	if (real == layer->mnt->mnt_root)
+		return dget(sb->s_root);
+
+	/* Find the topmost indexed or hashed ancestor */
+	next = dget(real);
+	for (;;) {
+		parent = dget_parent(next);
+
+		/*
+		 * Lookup a matching overlay dentry in inode/dentry
+		 * cache or in index by real inode.
+		 */
+		ancestor = ovl_lookup_real_inode(sb, next, layer);
+		if (ancestor)
+			break;
+
+		if (parent == layer->mnt->mnt_root) {
+			ancestor = dget(sb->s_root);
+			break;
+		}
+
+		/*
+		 * If @real has been moved out of the layer root directory,
+		 * we will eventully hit the real fs root. This cannot happen
+		 * by legit overlay rename, so we return error in that case.
+		 */
+		if (parent == next) {
+			ancestor = ERR_PTR(-EXDEV);
+			break;
+		}
+
+		dput(next);
+		next = parent;
+	}
+
+	dput(parent);
+	dput(next);
+
+	return ancestor;
+}
+
+/*
+ * Lookup a connected overlay dentry whose real dentry is @real.
+ * If @real is on upper layer, we lookup a child overlay dentry with the same
+ * path the real dentry. Otherwise, we need to consult index for lookup.
+ */
+static struct dentry *ovl_lookup_real(struct super_block *sb,
+				      struct dentry *real,
+				      struct ovl_layer *layer)
+{
+	struct dentry *connected;
+	int err = 0;
+
+	connected = ovl_lookup_real_ancestor(sb, real, layer);
+	if (IS_ERR(connected))
+		return connected;
+
+	while (!err) {
+		struct dentry *next, *this;
+		struct dentry *parent = NULL;
+		struct dentry *real_connected = ovl_dentry_real_at(connected,
+								   layer->idx);
+
+		if (real_connected == real)
+			break;
+
+		/* Find the topmost dentry not yet connected */
+		next = dget(real);
+		for (;;) {
+			parent = dget_parent(next);
+
+			if (parent == real_connected)
+				break;
+
+			/*
+			 * If real has been moved out of 'real_connected',
+			 * we will not find 'real_connected' and hit the layer
+			 * root. In that case, we need to restart connecting.
+			 * This game can go on forever in the worst case. We
+			 * may want to consider taking s_vfs_rename_mutex if
+			 * this happens more than once.
+			 */
+			if (parent == layer->mnt->mnt_root) {
+				dput(connected);
+				connected = dget(sb->s_root);
+				break;
+			}
+
+			/*
+			 * If real file has been moved out of the layer root
+			 * directory, we will eventully hit the real fs root.
+			 * This cannot happen by legit overlay rename, so we
+			 * return error in that case.
+			 */
+			if (parent == next) {
+				err = -EXDEV;
+				break;
+			}
+
+			dput(next);
+			next = parent;
+		}
+
+		if (!err) {
+			this = ovl_lookup_real_one(connected, next, layer);
+			if (IS_ERR(this))
+				err = PTR_ERR(this);
+
+			/*
+			 * Lookup of child in overlay can fail when racing with
+			 * overlay rename of child away from 'connected' parent.
+			 * In this case, we need to restart the lookup from the
+			 * top, because we cannot trust that 'real_connected' is
+			 * still an ancestor of 'real'. There is a good chance
+			 * that the renamed overlay ancestor is now in cache, so
+			 * ovl_lookup_real_ancestor() will find it and we can
+			 * continue to connect exactly from where lookup failed.
+			 */
+			if (err == -ECHILD) {
+				this = ovl_lookup_real_ancestor(sb, real,
+								layer);
+				err = PTR_ERR_OR_ZERO(this);
+			}
+			if (!err) {
+				dput(connected);
+				connected = this;
+			}
+		}
+
+		dput(parent);
+		dput(next);
+	}
+
+	if (err)
+		goto fail;
+
+	return connected;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+			    real, layer->idx, connected, err);
+	dput(connected);
+	return ERR_PTR(err);
+}
+
+/*
+ * Get an overlay dentry from upper/lower real dentries and index.
+ */
+static struct dentry *ovl_get_dentry(struct super_block *sb,
+				     struct dentry *upper,
+				     struct ovl_path *lowerpath,
+				     struct dentry *index)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
+	struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer;
+	struct dentry *real = upper ?: (index ?: lowerpath->dentry);
+
+	/*
+	 * Obtain a disconnected overlay dentry from a non-dir real dentry
+	 * and index.
+	 */
+	if (!d_is_dir(real))
+		return ovl_obtain_alias(sb, upper, lowerpath, index);
+
+	/* Removed empty directory? */
+	if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real))
+		return ERR_PTR(-ENOENT);
+
+	/*
+	 * If real dentry is connected and hashed, get a connected overlay
+	 * dentry whose real dentry is @real.
+	 */
+	return ovl_lookup_real(sb, real, layer);
+}
+
+static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
+					struct ovl_fh *fh)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct dentry *dentry;
+	struct dentry *upper;
+
+	if (!ofs->upper_mnt)
+		return ERR_PTR(-EACCES);
+
+	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
+	if (IS_ERR_OR_NULL(upper))
+		return upper;
+
+	dentry = ovl_get_dentry(sb, upper, NULL, NULL);
+	dput(upper);
+
+	return dentry;
+}
+
+static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
+					struct ovl_fh *fh)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct ovl_path origin = { };
+	struct ovl_path *stack = &origin;
+	struct dentry *dentry = NULL;
+	struct dentry *index = NULL;
+	struct inode *inode;
+	int err;
+
+	/* First lookup overlay inode in inode cache by origin fh */
+	err = ovl_check_origin_fh(ofs, fh, false, NULL, &stack);
+	if (err)
+		return ERR_PTR(err);
+
+	if (!d_is_dir(origin.dentry) ||
+	    !(origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
+		inode = ovl_lookup_inode(sb, origin.dentry, false);
+		err = PTR_ERR(inode);
+		if (IS_ERR(inode))
+			goto out_err;
+		if (inode) {
+			dentry = d_find_any_alias(inode);
+			iput(inode);
+			if (dentry)
+				goto out;
+		}
+	}
+
+	/* Then lookup indexed upper/whiteout by origin fh */
+	if (ofs->indexdir) {
+		index = ovl_get_index_fh(ofs, fh);
+		err = PTR_ERR(index);
+		if (IS_ERR(index)) {
+			index = NULL;
+			goto out_err;
+		}
+	}
+
+	/* Then try to get a connected upper dir by index */
+	if (index && d_is_dir(index)) {
+		struct dentry *upper = ovl_index_upper(ofs, index);
+
+		err = PTR_ERR(upper);
+		if (IS_ERR_OR_NULL(upper))
+			goto out_err;
+
+		dentry = ovl_get_dentry(sb, upper, NULL, NULL);
+		dput(upper);
+		goto out;
+	}
+
+	/* Find origin.dentry again with ovl_acceptable() layer check */
+	if (d_is_dir(origin.dentry)) {
+		dput(origin.dentry);
+		origin.dentry = NULL;
+		err = ovl_check_origin_fh(ofs, fh, true, NULL, &stack);
+		if (err)
+			goto out_err;
+	}
+	if (index) {
+		err = ovl_verify_origin(index, origin.dentry, false);
+		if (err)
+			goto out_err;
+	}
+
+	/* Get a connected non-upper dir or disconnected non-dir */
+	dentry = ovl_get_dentry(sb, NULL, &origin, index);
+
+out:
+	dput(origin.dentry);
+	dput(index);
+	return dentry;
+
+out_err:
+	dentry = ERR_PTR(err);
+	goto out;
+}
+
+static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
+				       int fh_len, int fh_type)
+{
+	struct dentry *dentry = NULL;
+	struct ovl_fh *fh = (struct ovl_fh *) fid;
+	int len = fh_len << 2;
+	unsigned int flags = 0;
+	int err;
+
+	err = -EINVAL;
+	if (fh_type != OVL_FILEID)
+		goto out_err;
+
+	err = ovl_check_fh_len(fh, len);
+	if (err)
+		goto out_err;
+
+	flags = fh->flags;
+	dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
+		 ovl_upper_fh_to_d(sb, fh) :
+		 ovl_lower_fh_to_d(sb, fh);
+	err = PTR_ERR(dentry);
+	if (IS_ERR(dentry) && err != -ESTALE)
+		goto out_err;
+
+	return dentry;
+
+out_err:
+	pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
+			    len, fh_type, flags, err);
+	return ERR_PTR(err);
+}
+
+static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
+				       int fh_len, int fh_type)
+{
+	pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
+	return ERR_PTR(-EACCES);
+}
+
+static int ovl_get_name(struct dentry *parent, char *name,
+			struct dentry *child)
+{
+	/*
+	 * ovl_fh_to_dentry() returns connected dir overlay dentries and
+	 * ovl_fh_to_parent() is not implemented, so we should not get here.
+	 */
+	WARN_ON_ONCE(1);
+	return -EIO;
+}
+
+static struct dentry *ovl_get_parent(struct dentry *dentry)
+{
+	/*
+	 * ovl_fh_to_dentry() returns connected dir overlay dentries, so we
+	 * should not get here.
+	 */
+	WARN_ON_ONCE(1);
+	return ERR_PTR(-EIO);
+}
+
+const struct export_operations ovl_export_operations = {
+	.encode_fh	= ovl_encode_fh,
+	.fh_to_dentry	= ovl_fh_to_dentry,
+	.fh_to_parent	= ovl_fh_to_parent,
+	.get_name	= ovl_get_name,
+	.get_parent	= ovl_get_parent,
+};
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
new file mode 100644
index 000000000..73c3e2c21
--- /dev/null
+++ b/fs/overlayfs/file.c
@@ -0,0 +1,653 @@
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/xattr.h>
+#include <linux/uio.h>
+#include <linux/uaccess.h>
+#include "overlayfs.h"
+
+static char ovl_whatisit(struct inode *inode, struct inode *realinode)
+{
+	if (realinode != ovl_inode_upper(inode))
+		return 'l';
+	if (ovl_has_upperdata(inode))
+		return 'u';
+	else
+		return 'm';
+}
+
+/* No atime modificaton nor notify on underlying */
+#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
+
+static struct file *ovl_open_realfile(const struct file *file,
+				      struct inode *realinode)
+{
+	struct inode *inode = file_inode(file);
+	struct file *realfile;
+	const struct cred *old_cred;
+	int flags = file->f_flags | OVL_OPEN_FLAGS;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	realfile = open_with_fake_path(&file->f_path, flags, realinode,
+				       current_cred());
+	revert_creds(old_cred);
+
+	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
+		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
+		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
+
+	return realfile;
+}
+
+#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
+
+static int ovl_change_flags(struct file *file, unsigned int flags)
+{
+	struct inode *inode = file_inode(file);
+	int err;
+
+	flags |= OVL_OPEN_FLAGS;
+
+	/* If some flag changed that cannot be changed then something's amiss */
+	if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
+		return -EIO;
+
+	flags &= OVL_SETFL_MASK;
+
+	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
+		return -EPERM;
+
+	if (flags & O_DIRECT) {
+		if (!file->f_mapping->a_ops ||
+		    !file->f_mapping->a_ops->direct_IO)
+			return -EINVAL;
+	}
+
+	if (file->f_op->check_flags) {
+		err = file->f_op->check_flags(flags);
+		if (err)
+			return err;
+	}
+
+	spin_lock(&file->f_lock);
+	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
+	spin_unlock(&file->f_lock);
+
+	return 0;
+}
+
+static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
+			       bool allow_meta)
+{
+	struct inode *inode = file_inode(file);
+	struct inode *realinode;
+
+	real->flags = 0;
+	real->file = file->private_data;
+
+	if (allow_meta)
+		realinode = ovl_inode_real(inode);
+	else
+		realinode = ovl_inode_realdata(inode);
+
+	/* Has it been copied up since we'd opened it? */
+	if (unlikely(file_inode(real->file) != realinode)) {
+		real->flags = FDPUT_FPUT;
+		real->file = ovl_open_realfile(file, realinode);
+
+		return PTR_ERR_OR_ZERO(real->file);
+	}
+
+	/* Did the flags change since open? */
+	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
+		return ovl_change_flags(real->file, file->f_flags);
+
+	return 0;
+}
+
+static int ovl_real_fdget(const struct file *file, struct fd *real)
+{
+	return ovl_real_fdget_meta(file, real, false);
+}
+
+static int ovl_open(struct inode *inode, struct file *file)
+{
+	struct file *realfile;
+	int err;
+
+	err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
+	if (err)
+		return err;
+
+	/* No longer need these flags, so don't pass them on to underlying fs */
+	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+
+	realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
+	if (IS_ERR(realfile))
+		return PTR_ERR(realfile);
+
+	file->private_data = realfile;
+
+	return 0;
+}
+
+static int ovl_release(struct inode *inode, struct file *file)
+{
+	fput(file->private_data);
+
+	return 0;
+}
+
+static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file_inode(file);
+	struct fd real;
+	const struct cred *old_cred;
+	loff_t ret;
+
+	/*
+	 * The two special cases below do not need to involve real fs,
+	 * so we can optimizing concurrent callers.
+	 */
+	if (offset == 0) {
+		if (whence == SEEK_CUR)
+			return file->f_pos;
+
+		if (whence == SEEK_SET)
+			return vfs_setpos(file, 0, 0);
+	}
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		return ret;
+
+	/*
+	 * Overlay file f_pos is the master copy that is preserved
+	 * through copy up and modified on read/write, but only real
+	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
+	 * limitations that are more strict than ->s_maxbytes for specific
+	 * files, so we use the real file to perform seeks.
+	 */
+	inode_lock(inode);
+	real.file->f_pos = file->f_pos;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	ret = vfs_llseek(real.file, offset, whence);
+	revert_creds(old_cred);
+
+	file->f_pos = real.file->f_pos;
+	inode_unlock(inode);
+
+	fdput(real);
+
+	return ret;
+}
+
+static void ovl_file_accessed(struct file *file)
+{
+	struct inode *inode, *upperinode;
+
+	if (file->f_flags & O_NOATIME)
+		return;
+
+	inode = file_inode(file);
+	upperinode = ovl_inode_upper(inode);
+
+	if (!upperinode)
+		return;
+
+	if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
+	     !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
+		inode->i_mtime = upperinode->i_mtime;
+		inode->i_ctime = upperinode->i_ctime;
+	}
+
+	touch_atime(&file->f_path);
+}
+
+static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
+{
+	int ifl = iocb->ki_flags;
+	rwf_t flags = 0;
+
+	if (ifl & IOCB_NOWAIT)
+		flags |= RWF_NOWAIT;
+	if (ifl & IOCB_HIPRI)
+		flags |= RWF_HIPRI;
+	if (ifl & IOCB_DSYNC)
+		flags |= RWF_DSYNC;
+	if (ifl & IOCB_SYNC)
+		flags |= RWF_SYNC;
+
+	return flags;
+}
+
+static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	struct fd real;
+	const struct cred *old_cred;
+	ssize_t ret;
+
+	if (!iov_iter_count(iter))
+		return 0;
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		return ret;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+			    ovl_iocb_to_rwf(iocb));
+	revert_creds(old_cred);
+
+	ovl_file_accessed(file);
+
+	fdput(real);
+
+	return ret;
+}
+
+static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	struct fd real;
+	const struct cred *old_cred;
+	ssize_t ret;
+
+	if (!iov_iter_count(iter))
+		return 0;
+
+	inode_lock(inode);
+	/* Update mode */
+	ovl_copyattr(ovl_inode_real(inode), inode);
+	ret = file_remove_privs(file);
+	if (ret)
+		goto out_unlock;
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		goto out_unlock;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	file_start_write(real.file);
+	ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+			     ovl_iocb_to_rwf(iocb));
+	file_end_write(real.file);
+	revert_creds(old_cred);
+
+	/* Update size */
+	ovl_copyattr(ovl_inode_real(inode), inode);
+
+	fdput(real);
+
+out_unlock:
+	inode_unlock(inode);
+
+	return ret;
+}
+
+static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	struct fd real;
+	const struct cred *old_cred;
+	int ret;
+
+	ret = ovl_real_fdget_meta(file, &real, !datasync);
+	if (ret)
+		return ret;
+
+	/* Don't sync lower file for fear of receiving EROFS error */
+	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
+		old_cred = ovl_override_creds(file_inode(file)->i_sb);
+		ret = vfs_fsync_range(real.file, start, end, datasync);
+		revert_creds(old_cred);
+	}
+
+	fdput(real);
+
+	return ret;
+}
+
+static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct file *realfile = file->private_data;
+	const struct cred *old_cred;
+	int ret;
+
+	if (!realfile->f_op->mmap)
+		return -ENODEV;
+
+	if (WARN_ON(file != vma->vm_file))
+		return -EIO;
+
+	vma->vm_file = get_file(realfile);
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = call_mmap(vma->vm_file, vma);
+	revert_creds(old_cred);
+
+	if (ret) {
+		/* Drop reference count from new vm_file value */
+		fput(realfile);
+	} else {
+		/* Drop reference count from previous vm_file value */
+		fput(file);
+	}
+
+	ovl_file_accessed(file);
+
+	return ret;
+}
+
+static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+{
+	struct inode *inode = file_inode(file);
+	struct fd real;
+	const struct cred *old_cred;
+	int ret;
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		return ret;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = vfs_fallocate(real.file, mode, offset, len);
+	revert_creds(old_cred);
+
+	/* Update size */
+	ovl_copyattr(ovl_inode_real(inode), inode);
+
+	fdput(real);
+
+	return ret;
+}
+
+static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
+{
+	struct fd real;
+	const struct cred *old_cred;
+	int ret;
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		return ret;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = vfs_fadvise(real.file, offset, len, advice);
+	revert_creds(old_cred);
+
+	fdput(real);
+
+	return ret;
+}
+
+static long ovl_real_ioctl(struct file *file, unsigned int cmd,
+			   unsigned long arg)
+{
+	struct fd real;
+	const struct cred *old_cred;
+	long ret;
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		return ret;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = vfs_ioctl(real.file, cmd, arg);
+	revert_creds(old_cred);
+
+	fdput(real);
+
+	return ret;
+}
+
+static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
+				unsigned long arg, unsigned int iflags)
+{
+	long ret;
+	struct inode *inode = file_inode(file);
+	unsigned int old_iflags;
+
+	if (!inode_owner_or_capable(inode))
+		return -EACCES;
+
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
+
+	inode_lock(inode);
+
+	/* Check the capability before cred override */
+	ret = -EPERM;
+	old_iflags = READ_ONCE(inode->i_flags);
+	if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
+	    !capable(CAP_LINUX_IMMUTABLE))
+		goto unlock;
+
+	ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
+	if (ret)
+		goto unlock;
+
+	ret = ovl_real_ioctl(file, cmd, arg);
+
+	ovl_copyflags(ovl_inode_real(inode), inode);
+unlock:
+	inode_unlock(inode);
+
+	mnt_drop_write_file(file);
+
+	return ret;
+
+}
+
+static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
+{
+	unsigned int iflags = 0;
+
+	if (flags & FS_SYNC_FL)
+		iflags |= S_SYNC;
+	if (flags & FS_APPEND_FL)
+		iflags |= S_APPEND;
+	if (flags & FS_IMMUTABLE_FL)
+		iflags |= S_IMMUTABLE;
+	if (flags & FS_NOATIME_FL)
+		iflags |= S_NOATIME;
+
+	return iflags;
+}
+
+static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
+				  unsigned long arg)
+{
+	unsigned int flags;
+
+	if (get_user(flags, (int __user *) arg))
+		return -EFAULT;
+
+	return ovl_ioctl_set_flags(file, cmd, arg,
+				   ovl_fsflags_to_iflags(flags));
+}
+
+static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
+{
+	unsigned int iflags = 0;
+
+	if (xflags & FS_XFLAG_SYNC)
+		iflags |= S_SYNC;
+	if (xflags & FS_XFLAG_APPEND)
+		iflags |= S_APPEND;
+	if (xflags & FS_XFLAG_IMMUTABLE)
+		iflags |= S_IMMUTABLE;
+	if (xflags & FS_XFLAG_NOATIME)
+		iflags |= S_NOATIME;
+
+	return iflags;
+}
+
+static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
+				   unsigned long arg)
+{
+	struct fsxattr fa;
+
+	memset(&fa, 0, sizeof(fa));
+	if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
+		return -EFAULT;
+
+	return ovl_ioctl_set_flags(file, cmd, arg,
+				   ovl_fsxflags_to_iflags(fa.fsx_xflags));
+}
+
+static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long ret;
+
+	switch (cmd) {
+	case FS_IOC_GETFLAGS:
+	case FS_IOC_FSGETXATTR:
+		ret = ovl_real_ioctl(file, cmd, arg);
+		break;
+
+	case FS_IOC_SETFLAGS:
+		ret = ovl_ioctl_set_fsflags(file, cmd, arg);
+		break;
+
+	case FS_IOC_FSSETXATTR:
+		ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
+		break;
+
+	default:
+		ret = -ENOTTY;
+	}
+
+	return ret;
+}
+
+static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
+			     unsigned long arg)
+{
+	switch (cmd) {
+	case FS_IOC32_GETFLAGS:
+		cmd = FS_IOC_GETFLAGS;
+		break;
+
+	case FS_IOC32_SETFLAGS:
+		cmd = FS_IOC_SETFLAGS;
+		break;
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return ovl_ioctl(file, cmd, arg);
+}
+
+enum ovl_copyop {
+	OVL_COPY,
+	OVL_CLONE,
+	OVL_DEDUPE,
+};
+
+static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+			    struct file *file_out, loff_t pos_out,
+			    u64 len, unsigned int flags, enum ovl_copyop op)
+{
+	struct inode *inode_out = file_inode(file_out);
+	struct fd real_in, real_out;
+	const struct cred *old_cred;
+	ssize_t ret;
+
+	ret = ovl_real_fdget(file_out, &real_out);
+	if (ret)
+		return ret;
+
+	ret = ovl_real_fdget(file_in, &real_in);
+	if (ret) {
+		fdput(real_out);
+		return ret;
+	}
+
+	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
+	switch (op) {
+	case OVL_COPY:
+		ret = vfs_copy_file_range(real_in.file, pos_in,
+					  real_out.file, pos_out, len, flags);
+		break;
+
+	case OVL_CLONE:
+		ret = vfs_clone_file_range(real_in.file, pos_in,
+					   real_out.file, pos_out, len);
+		break;
+
+	case OVL_DEDUPE:
+		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
+						real_out.file, pos_out, len);
+		break;
+	}
+	revert_creds(old_cred);
+
+	/* Update size */
+	ovl_copyattr(ovl_inode_real(inode_out), inode_out);
+
+	fdput(real_in);
+	fdput(real_out);
+
+	return ret;
+}
+
+static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   size_t len, unsigned int flags)
+{
+	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
+			    OVL_COPY);
+}
+
+static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out, u64 len)
+{
+	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
+			    OVL_CLONE);
+}
+
+static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
+				 struct file *file_out, loff_t pos_out, u64 len)
+{
+	/*
+	 * Don't copy up because of a dedupe request, this wouldn't make sense
+	 * most of the time (data would be duplicated instead of deduplicated).
+	 */
+	if (!ovl_inode_upper(file_inode(file_in)) ||
+	    !ovl_inode_upper(file_inode(file_out)))
+		return -EPERM;
+
+	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
+			    OVL_DEDUPE);
+}
+
+const struct file_operations ovl_file_operations = {
+	.open		= ovl_open,
+	.release	= ovl_release,
+	.llseek		= ovl_llseek,
+	.read_iter	= ovl_read_iter,
+	.write_iter	= ovl_write_iter,
+	.fsync		= ovl_fsync,
+	.mmap		= ovl_mmap,
+	.fallocate	= ovl_fallocate,
+	.fadvise	= ovl_fadvise,
+	.unlocked_ioctl	= ovl_ioctl,
+	.compat_ioctl	= ovl_compat_ioctl,
+
+	.copy_file_range	= ovl_copy_file_range,
+	.clone_file_range	= ovl_clone_file_range,
+	.dedupe_file_range	= ovl_dedupe_file_range,
+};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
new file mode 100644
index 000000000..08e60a6df
--- /dev/null
+++ b/fs/overlayfs/inode.c
@@ -0,0 +1,982 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+
+int ovl_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	int err;
+	bool full_copy_up = false;
+	struct dentry *upperdentry;
+	const struct cred *old_cred;
+
+	err = setattr_prepare(dentry, attr);
+	if (err)
+		return err;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	if (attr->ia_valid & ATTR_SIZE) {
+		struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+
+		err = -ETXTBSY;
+		if (atomic_read(&realinode->i_writecount) < 0)
+			goto out_drop_write;
+
+		/* Truncate should trigger data copy up as well */
+		full_copy_up = true;
+	}
+
+	if (!full_copy_up)
+		err = ovl_copy_up(dentry);
+	else
+		err = ovl_copy_up_with_data(dentry);
+	if (!err) {
+		struct inode *winode = NULL;
+
+		upperdentry = ovl_dentry_upper(dentry);
+
+		if (attr->ia_valid & ATTR_SIZE) {
+			winode = d_inode(upperdentry);
+			err = get_write_access(winode);
+			if (err)
+				goto out_drop_write;
+		}
+
+		if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+			attr->ia_valid &= ~ATTR_MODE;
+
+		inode_lock(upperdentry->d_inode);
+		old_cred = ovl_override_creds(dentry->d_sb);
+		err = notify_change(upperdentry, attr, NULL);
+		revert_creds(old_cred);
+		if (!err)
+			ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
+		inode_unlock(upperdentry->d_inode);
+
+		if (winode)
+			put_write_access(winode);
+	}
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
+			   struct ovl_layer *lower_layer)
+{
+	bool samefs = ovl_same_sb(dentry->d_sb);
+	unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
+
+	if (samefs) {
+		/*
+		 * When all layers are on the same fs, all real inode
+		 * number are unique, so we use the overlay st_dev,
+		 * which is friendly to du -x.
+		 */
+		stat->dev = dentry->d_sb->s_dev;
+		return 0;
+	} else if (xinobits) {
+		unsigned int shift = 64 - xinobits;
+		/*
+		 * All inode numbers of underlying fs should not be using the
+		 * high xinobits, so we use high xinobits to partition the
+		 * overlay st_ino address space. The high bits holds the fsid
+		 * (upper fsid is 0). This way overlay inode numbers are unique
+		 * and all inodes use overlay st_dev. Inode numbers are also
+		 * persistent for a given layer configuration.
+		 */
+		if (stat->ino >> shift) {
+			pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
+					    dentry, stat->ino, xinobits);
+		} else {
+			if (lower_layer)
+				stat->ino |= ((u64)lower_layer->fsid) << shift;
+
+			stat->dev = dentry->d_sb->s_dev;
+			return 0;
+		}
+	}
+
+	/* The inode could not be mapped to a unified st_ino address space */
+	if (S_ISDIR(dentry->d_inode->i_mode)) {
+		/*
+		 * Always use the overlay st_dev for directories, so 'find
+		 * -xdev' will scan the entire overlay mount and won't cross the
+		 * overlay mount boundaries.
+		 *
+		 * If not all layers are on the same fs the pair {real st_ino;
+		 * overlay st_dev} is not unique, so use the non persistent
+		 * overlay st_ino for directories.
+		 */
+		stat->dev = dentry->d_sb->s_dev;
+		stat->ino = dentry->d_inode->i_ino;
+	} else if (lower_layer && lower_layer->fsid) {
+		/*
+		 * For non-samefs setup, if we cannot map all layers st_ino
+		 * to a unified address space, we need to make sure that st_dev
+		 * is unique per lower fs. Upper layer uses real st_dev and
+		 * lower layers use the unique anonymous bdev assigned to the
+		 * lower fs.
+		 */
+		stat->dev = lower_layer->fs->pseudo_dev;
+	}
+
+	return 0;
+}
+
+int ovl_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int flags)
+{
+	struct dentry *dentry = path->dentry;
+	enum ovl_path_type type;
+	struct path realpath;
+	const struct cred *old_cred;
+	bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
+	bool samefs = ovl_same_sb(dentry->d_sb);
+	struct ovl_layer *lower_layer = NULL;
+	int err;
+	bool metacopy_blocks = false;
+
+	metacopy_blocks = ovl_is_metacopy_dentry(dentry);
+
+	type = ovl_path_real(dentry, &realpath);
+	old_cred = ovl_override_creds(dentry->d_sb);
+	err = vfs_getattr(&realpath, stat, request_mask, flags);
+	if (err)
+		goto out;
+
+	/*
+	 * For non-dir or same fs, we use st_ino of the copy up origin.
+	 * This guaranties constant st_dev/st_ino across copy up.
+	 * With xino feature and non-samefs, we use st_ino of the copy up
+	 * origin masked with high bits that represent the layer id.
+	 *
+	 * If lower filesystem supports NFS file handles, this also guaranties
+	 * persistent st_ino across mount cycle.
+	 */
+	if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) {
+		if (!OVL_TYPE_UPPER(type)) {
+			lower_layer = ovl_layer_lower(dentry);
+		} else if (OVL_TYPE_ORIGIN(type)) {
+			struct kstat lowerstat;
+			u32 lowermask = STATX_INO | STATX_BLOCKS |
+					(!is_dir ? STATX_NLINK : 0);
+
+			ovl_path_lower(dentry, &realpath);
+			err = vfs_getattr(&realpath, &lowerstat,
+					  lowermask, flags);
+			if (err)
+				goto out;
+
+			/*
+			 * Lower hardlinks may be broken on copy up to different
+			 * upper files, so we cannot use the lower origin st_ino
+			 * for those different files, even for the same fs case.
+			 *
+			 * Similarly, several redirected dirs can point to the
+			 * same dir on a lower layer. With the "verify_lower"
+			 * feature, we do not use the lower origin st_ino, if
+			 * we haven't verified that this redirect is unique.
+			 *
+			 * With inodes index enabled, it is safe to use st_ino
+			 * of an indexed origin. The index validates that the
+			 * upper hardlink is not broken and that a redirected
+			 * dir is the only redirect to that origin.
+			 */
+			if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
+			    (!ovl_verify_lower(dentry->d_sb) &&
+			     (is_dir || lowerstat.nlink == 1))) {
+				lower_layer = ovl_layer_lower(dentry);
+				/*
+				 * Cannot use origin st_dev;st_ino because
+				 * origin inode content may differ from overlay
+				 * inode content.
+				 */
+				if (samefs || lower_layer->fsid)
+					stat->ino = lowerstat.ino;
+			}
+
+			/*
+			 * If we are querying a metacopy dentry and lower
+			 * dentry is data dentry, then use the blocks we
+			 * queried just now. We don't have to do additional
+			 * vfs_getattr(). If lower itself is metacopy, then
+			 * additional vfs_getattr() is unavoidable.
+			 */
+			if (metacopy_blocks &&
+			    realpath.dentry == ovl_dentry_lowerdata(dentry)) {
+				stat->blocks = lowerstat.blocks;
+				metacopy_blocks = false;
+			}
+		}
+
+		if (metacopy_blocks) {
+			/*
+			 * If lower is not same as lowerdata or if there was
+			 * no origin on upper, we can end up here.
+			 */
+			struct kstat lowerdatastat;
+			u32 lowermask = STATX_BLOCKS;
+
+			ovl_path_lowerdata(dentry, &realpath);
+			err = vfs_getattr(&realpath, &lowerdatastat,
+					  lowermask, flags);
+			if (err)
+				goto out;
+			stat->blocks = lowerdatastat.blocks;
+		}
+	}
+
+	err = ovl_map_dev_ino(dentry, stat, lower_layer);
+	if (err)
+		goto out;
+
+	/*
+	 * It's probably not worth it to count subdirs to get the
+	 * correct link count.  nlink=1 seems to pacify 'find' and
+	 * other utilities.
+	 */
+	if (is_dir && OVL_TYPE_MERGE(type))
+		stat->nlink = 1;
+
+	/*
+	 * Return the overlay inode nlinks for indexed upper inodes.
+	 * Overlay inode nlink counts the union of the upper hardlinks
+	 * and non-covered lower hardlinks. It does not include the upper
+	 * index hardlink.
+	 */
+	if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		stat->nlink = dentry->d_inode->i_nlink;
+
+out:
+	revert_creds(old_cred);
+
+	return err;
+}
+
+int ovl_permission(struct inode *inode, int mask)
+{
+	struct inode *upperinode = ovl_inode_upper(inode);
+	struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
+	const struct cred *old_cred;
+	int err;
+
+	/* Careful in RCU walk mode */
+	if (!realinode) {
+		WARN_ON(!(mask & MAY_NOT_BLOCK));
+		return -ECHILD;
+	}
+
+	/*
+	 * Check overlay inode with the creds of task and underlying inode
+	 * with creds of mounter
+	 */
+	err = generic_permission(inode, mask);
+	if (err)
+		return err;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	if (!upperinode &&
+	    !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+		mask &= ~(MAY_WRITE | MAY_APPEND);
+		/* Make sure mounter can read file for copy up later */
+		mask |= MAY_READ;
+	}
+	err = inode_permission(realinode, mask);
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static const char *ovl_get_link(struct dentry *dentry,
+				struct inode *inode,
+				struct delayed_call *done)
+{
+	const struct cred *old_cred;
+	const char *p;
+
+	if (!dentry)
+		return ERR_PTR(-ECHILD);
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	p = vfs_get_link(ovl_dentry_real(dentry), done);
+	revert_creds(old_cred);
+	return p;
+}
+
+bool ovl_is_private_xattr(const char *name)
+{
+	return strncmp(name, OVL_XATTR_PREFIX,
+		       sizeof(OVL_XATTR_PREFIX) - 1) == 0;
+}
+
+int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+		  const void *value, size_t size, int flags)
+{
+	int err;
+	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+	struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
+	const struct cred *old_cred;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	if (!value && !upperdentry) {
+		old_cred = ovl_override_creds(dentry->d_sb);
+		err = vfs_getxattr(realdentry, name, NULL, 0);
+		revert_creds(old_cred);
+		if (err < 0)
+			goto out_drop_write;
+	}
+
+	if (!upperdentry) {
+		err = ovl_copy_up(dentry);
+		if (err)
+			goto out_drop_write;
+
+		realdentry = ovl_dentry_upper(dentry);
+	}
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	if (value)
+		err = vfs_setxattr(realdentry, name, value, size, flags);
+	else {
+		WARN_ON(flags != XATTR_REPLACE);
+		err = vfs_removexattr(realdentry, name);
+	}
+	revert_creds(old_cred);
+
+	/* copy c/mtime */
+	ovl_copyattr(d_inode(realdentry), inode);
+
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
+		  void *value, size_t size)
+{
+	ssize_t res;
+	const struct cred *old_cred;
+	struct dentry *realdentry =
+		ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	res = vfs_getxattr(realdentry, name, value, size);
+	revert_creds(old_cred);
+	return res;
+}
+
+static bool ovl_can_list(const char *s)
+{
+	/* List all non-trusted xatts */
+	if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+		return true;
+
+	/* Never list trusted.overlay, list other trusted for superuser only */
+	return !ovl_is_private_xattr(s) &&
+	       ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+}
+
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+	struct dentry *realdentry = ovl_dentry_real(dentry);
+	ssize_t res;
+	size_t len;
+	char *s;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	res = vfs_listxattr(realdentry, list, size);
+	revert_creds(old_cred);
+	if (res <= 0 || size == 0)
+		return res;
+
+	/* filter out private xattrs */
+	for (s = list, len = res; len;) {
+		size_t slen = strnlen(s, len) + 1;
+
+		/* underlying fs providing us with an broken xattr list? */
+		if (WARN_ON(slen > len))
+			return -EIO;
+
+		len -= slen;
+		if (!ovl_can_list(s)) {
+			res -= slen;
+			memmove(s, s + slen, len);
+		} else {
+			s += slen;
+		}
+	}
+
+	return res;
+}
+
+struct posix_acl *ovl_get_acl(struct inode *inode, int type)
+{
+	struct inode *realinode = ovl_inode_real(inode);
+	const struct cred *old_cred;
+	struct posix_acl *acl;
+
+	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
+		return NULL;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	acl = get_acl(realinode, type);
+	revert_creds(old_cred);
+
+	return acl;
+}
+
+int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
+{
+	if (flags & S_ATIME) {
+		struct ovl_fs *ofs = inode->i_sb->s_fs_info;
+		struct path upperpath = {
+			.mnt = ofs->upper_mnt,
+			.dentry = ovl_upperdentry_dereference(OVL_I(inode)),
+		};
+
+		if (upperpath.dentry) {
+			touch_atime(&upperpath);
+			inode->i_atime = d_inode(upperpath.dentry)->i_atime;
+		}
+	}
+	return 0;
+}
+
+static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		      u64 start, u64 len)
+{
+	int err;
+	struct inode *realinode = ovl_inode_real(inode);
+	const struct cred *old_cred;
+
+	if (!realinode->i_op->fiemap)
+		return -EOPNOTSUPP;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+
+	if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
+		filemap_write_and_wait(realinode->i_mapping);
+
+	err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static const struct inode_operations ovl_file_inode_operations = {
+	.setattr	= ovl_setattr,
+	.permission	= ovl_permission,
+	.getattr	= ovl_getattr,
+	.listxattr	= ovl_listxattr,
+	.get_acl	= ovl_get_acl,
+	.update_time	= ovl_update_time,
+	.fiemap		= ovl_fiemap,
+};
+
+static const struct inode_operations ovl_symlink_inode_operations = {
+	.setattr	= ovl_setattr,
+	.get_link	= ovl_get_link,
+	.getattr	= ovl_getattr,
+	.listxattr	= ovl_listxattr,
+	.update_time	= ovl_update_time,
+};
+
+static const struct inode_operations ovl_special_inode_operations = {
+	.setattr	= ovl_setattr,
+	.permission	= ovl_permission,
+	.getattr	= ovl_getattr,
+	.listxattr	= ovl_listxattr,
+	.get_acl	= ovl_get_acl,
+	.update_time	= ovl_update_time,
+};
+
+static const struct address_space_operations ovl_aops = {
+	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
+	.direct_IO		= noop_direct_IO,
+};
+
+/*
+ * It is possible to stack overlayfs instance on top of another
+ * overlayfs instance as lower layer. We need to annonate the
+ * stackable i_mutex locks according to stack level of the super
+ * block instance. An overlayfs instance can never be in stack
+ * depth 0 (there is always a real fs below it).  An overlayfs
+ * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
+ *
+ * For example, here is a snip from /proc/lockdep_chains after
+ * dir_iterate of nested overlayfs:
+ *
+ * [...] &ovl_i_mutex_dir_key[depth]   (stack_depth=2)
+ * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
+ * [...] &type->i_mutex_dir_key        (stack_depth=0)
+ */
+#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
+
+static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
+#ifdef CONFIG_LOCKDEP
+	static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
+	static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
+	static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING];
+
+	int depth = inode->i_sb->s_stack_depth - 1;
+
+	if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
+		depth = 0;
+
+	if (S_ISDIR(inode->i_mode))
+		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
+	else
+		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
+
+	lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]);
+#endif
+}
+
+static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
+			   unsigned long ino, int fsid)
+{
+	int xinobits = ovl_xino_bits(inode->i_sb);
+
+	/*
+	 * When d_ino is consistent with st_ino (samefs or i_ino has enough
+	 * bits to encode layer), set the same value used for st_ino to i_ino,
+	 * so inode number exposed via /proc/locks and a like will be
+	 * consistent with d_ino and st_ino values. An i_ino value inconsistent
+	 * with d_ino also causes nfsd readdirplus to fail.  When called from
+	 * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
+	 * upper inode i_ino on ovl_inode_init() or ovl_inode_update().
+	 */
+	if (ovl_same_sb(inode->i_sb) || xinobits) {
+		inode->i_ino = ino;
+		if (xinobits && fsid && !(ino >> (64 - xinobits)))
+			inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
+	} else {
+		inode->i_ino = get_next_ino();
+	}
+	inode->i_mode = mode;
+	inode->i_flags |= S_NOCMTIME;
+#ifdef CONFIG_FS_POSIX_ACL
+	inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
+#endif
+
+	ovl_lockdep_annotate_inode_mutex_key(inode);
+
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+		inode->i_op = &ovl_file_inode_operations;
+		inode->i_fop = &ovl_file_operations;
+		inode->i_mapping->a_ops = &ovl_aops;
+		break;
+
+	case S_IFDIR:
+		inode->i_op = &ovl_dir_inode_operations;
+		inode->i_fop = &ovl_dir_operations;
+		break;
+
+	case S_IFLNK:
+		inode->i_op = &ovl_symlink_inode_operations;
+		break;
+
+	default:
+		inode->i_op = &ovl_special_inode_operations;
+		init_special_inode(inode, mode, rdev);
+		break;
+	}
+}
+
+/*
+ * With inodes index enabled, an overlay inode nlink counts the union of upper
+ * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure
+ * upper inode, the following nlink modifying operations can happen:
+ *
+ * 1. Lower hardlink copy up
+ * 2. Upper hardlink created, unlinked or renamed over
+ * 3. Lower hardlink whiteout or renamed over
+ *
+ * For the first, copy up case, the union nlink does not change, whether the
+ * operation succeeds or fails, but the upper inode nlink may change.
+ * Therefore, before copy up, we store the union nlink value relative to the
+ * lower inode nlink in the index inode xattr trusted.overlay.nlink.
+ *
+ * For the second, upper hardlink case, the union nlink should be incremented
+ * or decremented IFF the operation succeeds, aligned with nlink change of the
+ * upper inode. Therefore, before link/unlink/rename, we store the union nlink
+ * value relative to the upper inode nlink in the index inode.
+ *
+ * For the last, lower cover up case, we simplify things by preceding the
+ * whiteout or cover up with copy up. This makes sure that there is an index
+ * upper inode where the nlink xattr can be stored before the copied up upper
+ * entry is unlink.
+ */
+#define OVL_NLINK_ADD_UPPER	(1 << 0)
+
+/*
+ * On-disk format for indexed nlink:
+ *
+ * nlink relative to the upper inode - "U[+-]NUM"
+ * nlink relative to the lower inode - "L[+-]NUM"
+ */
+
+static int ovl_set_nlink_common(struct dentry *dentry,
+				struct dentry *realdentry, const char *format)
+{
+	struct inode *inode = d_inode(dentry);
+	struct inode *realinode = d_inode(realdentry);
+	char buf[13];
+	int len;
+
+	len = snprintf(buf, sizeof(buf), format,
+		       (int) (inode->i_nlink - realinode->i_nlink));
+
+	if (WARN_ON(len >= sizeof(buf)))
+		return -EIO;
+
+	return ovl_do_setxattr(ovl_dentry_upper(dentry),
+			       OVL_XATTR_NLINK, buf, len, 0);
+}
+
+int ovl_set_nlink_upper(struct dentry *dentry)
+{
+	return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i");
+}
+
+int ovl_set_nlink_lower(struct dentry *dentry)
+{
+	return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
+}
+
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+			   struct dentry *upperdentry,
+			   unsigned int fallback)
+{
+	int nlink_diff;
+	int nlink;
+	char buf[13];
+	int err;
+
+	if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
+		return fallback;
+
+	err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
+	if (err < 0)
+		goto fail;
+
+	buf[err] = '\0';
+	if ((buf[0] != 'L' && buf[0] != 'U') ||
+	    (buf[1] != '+' && buf[1] != '-'))
+		goto fail;
+
+	err = kstrtoint(buf + 1, 10, &nlink_diff);
+	if (err < 0)
+		goto fail;
+
+	nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink;
+	nlink += nlink_diff;
+
+	if (nlink <= 0)
+		goto fail;
+
+	return nlink;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
+			    upperdentry, err);
+	return fallback;
+}
+
+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
+{
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (inode)
+		ovl_fill_inode(inode, mode, rdev, 0, 0);
+
+	return inode;
+}
+
+static int ovl_inode_test(struct inode *inode, void *data)
+{
+	return inode->i_private == data;
+}
+
+static int ovl_inode_set(struct inode *inode, void *data)
+{
+	inode->i_private = data;
+	return 0;
+}
+
+static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
+			     struct dentry *upperdentry, bool strict)
+{
+	/*
+	 * For directories, @strict verify from lookup path performs consistency
+	 * checks, so NULL lower/upper in dentry must match NULL lower/upper in
+	 * inode. Non @strict verify from NFS handle decode path passes NULL for
+	 * 'unknown' lower/upper.
+	 */
+	if (S_ISDIR(inode->i_mode) && strict) {
+		/* Real lower dir moved to upper layer under us? */
+		if (!lowerdentry && ovl_inode_lower(inode))
+			return false;
+
+		/* Lookup of an uncovered redirect origin? */
+		if (!upperdentry && ovl_inode_upper(inode))
+			return false;
+	}
+
+	/*
+	 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL.
+	 * This happens when finding a copied up overlay inode for a renamed
+	 * or hardlinked overlay dentry and lower dentry cannot be followed
+	 * by origin because lower fs does not support file handles.
+	 */
+	if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry))
+		return false;
+
+	/*
+	 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL.
+	 * This happens when finding a lower alias for a copied up hard link.
+	 */
+	if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry))
+		return false;
+
+	return true;
+}
+
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
+			       bool is_upper)
+{
+	struct inode *inode, *key = d_inode(real);
+
+	inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
+	if (!inode)
+		return NULL;
+
+	if (!ovl_verify_inode(inode, is_upper ? NULL : real,
+			      is_upper ? real : NULL, false)) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+
+	return inode;
+}
+
+bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir)
+{
+	struct inode *key = d_inode(dir);
+	struct inode *trap;
+	bool res;
+
+	trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
+	if (!trap)
+		return false;
+
+	res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) &&
+				  !ovl_inode_lower(trap);
+
+	iput(trap);
+	return res;
+}
+
+/*
+ * Create an inode cache entry for layer root dir, that will intentionally
+ * fail ovl_verify_inode(), so any lookup that will find some layer root
+ * will fail.
+ */
+struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir)
+{
+	struct inode *key = d_inode(dir);
+	struct inode *trap;
+
+	if (!d_is_dir(dir))
+		return ERR_PTR(-ENOTDIR);
+
+	trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test,
+			    ovl_inode_set, key);
+	if (!trap)
+		return ERR_PTR(-ENOMEM);
+
+	if (!(trap->i_state & I_NEW)) {
+		/* Conflicting layer roots? */
+		iput(trap);
+		return ERR_PTR(-ELOOP);
+	}
+
+	trap->i_mode = S_IFDIR;
+	trap->i_flags = S_DEAD;
+	unlock_new_inode(trap);
+
+	return trap;
+}
+
+/*
+ * Does overlay inode need to be hashed by lower inode?
+ */
+static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
+			     struct dentry *lower, struct dentry *index)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	/* No, if pure upper */
+	if (!lower)
+		return false;
+
+	/* Yes, if already indexed */
+	if (index)
+		return true;
+
+	/* Yes, if won't be copied up */
+	if (!ofs->upper_mnt)
+		return true;
+
+	/* No, if lower hardlink is or will be broken on copy up */
+	if ((upper || !ovl_indexdir(sb)) &&
+	    !d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+		return false;
+
+	/* No, if non-indexed upper with NFS export */
+	if (sb->s_export_op && upper)
+		return false;
+
+	/* Otherwise, hash by lower inode for fsnotify */
+	return true;
+}
+
+static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode,
+			       struct inode *key)
+{
+	return newinode ? inode_insert5(newinode, (unsigned long) key,
+					 ovl_inode_test, ovl_inode_set, key) :
+			  iget5_locked(sb, (unsigned long) key,
+				       ovl_inode_test, ovl_inode_set, key);
+}
+
+struct inode *ovl_get_inode(struct super_block *sb,
+			    struct ovl_inode_params *oip)
+{
+	struct dentry *upperdentry = oip->upperdentry;
+	struct ovl_path *lowerpath = oip->lowerpath;
+	struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
+	struct inode *inode;
+	struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL;
+	bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
+					oip->index);
+	int fsid = bylower ? lowerpath->layer->fsid : 0;
+	bool is_dir, metacopy = false;
+	unsigned long ino = 0;
+	int err = oip->newinode ? -EEXIST : -ENOMEM;
+
+	if (!realinode)
+		realinode = d_inode(lowerdentry);
+
+	/*
+	 * Copy up origin (lower) may exist for non-indexed upper, but we must
+	 * not use lower as hash key if this is a broken hardlink.
+	 */
+	is_dir = S_ISDIR(realinode->i_mode);
+	if (upperdentry || bylower) {
+		struct inode *key = d_inode(bylower ? lowerdentry :
+						      upperdentry);
+		unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
+
+		inode = ovl_iget5(sb, oip->newinode, key);
+		if (!inode)
+			goto out_err;
+		if (!(inode->i_state & I_NEW)) {
+			/*
+			 * Verify that the underlying files stored in the inode
+			 * match those in the dentry.
+			 */
+			if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
+					      true)) {
+				iput(inode);
+				err = -ESTALE;
+				goto out_err;
+			}
+
+			dput(upperdentry);
+			kfree(oip->redirect);
+			goto out;
+		}
+
+		/* Recalculate nlink for non-dir due to indexing */
+		if (!is_dir)
+			nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
+		set_nlink(inode, nlink);
+		ino = key->i_ino;
+	} else {
+		/* Lower hardlink that will be broken on copy up */
+		inode = new_inode(sb);
+		if (!inode) {
+			err = -ENOMEM;
+			goto out_err;
+		}
+		ino = realinode->i_ino;
+		fsid = lowerpath->layer->fsid;
+	}
+	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
+	ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata);
+
+	if (upperdentry && ovl_is_impuredir(upperdentry))
+		ovl_set_flag(OVL_IMPURE, inode);
+
+	if (oip->index)
+		ovl_set_flag(OVL_INDEX, inode);
+
+	if (upperdentry) {
+		err = ovl_check_metacopy_xattr(upperdentry);
+		if (err < 0)
+			goto out_err;
+		metacopy = err;
+		if (!metacopy)
+			ovl_set_flag(OVL_UPPERDATA, inode);
+	}
+
+	OVL_I(inode)->redirect = oip->redirect;
+
+	if (bylower)
+		ovl_set_flag(OVL_CONST_INO, inode);
+
+	/* Check for non-merge dir that may have whiteouts */
+	if (is_dir) {
+		if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
+		    ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
+			ovl_set_flag(OVL_WHITEOUTS, inode);
+		}
+	}
+
+	if (inode->i_state & I_NEW)
+		unlock_new_inode(inode);
+out:
+	return inode;
+
+out_err:
+	pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err);
+	inode = ERR_PTR(err);
+	goto out;
+}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
new file mode 100644
index 000000000..badf03926
--- /dev/null
+++ b/fs/overlayfs/namei.c
@@ -0,0 +1,1164 @@
+/*
+ * Copyright (C) 2011 Novell Inc.
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/cred.h>
+#include <linux/ctype.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/ratelimit.h>
+#include <linux/mount.h>
+#include <linux/exportfs.h>
+#include "overlayfs.h"
+
+struct ovl_lookup_data {
+	struct super_block *sb;
+	struct qstr name;
+	bool is_dir;
+	bool opaque;
+	bool stop;
+	bool last;
+	char *redirect;
+	bool metacopy;
+};
+
+static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
+			      size_t prelen, const char *post)
+{
+	int res;
+	char *buf;
+
+	buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
+	if (IS_ERR_OR_NULL(buf))
+		return PTR_ERR(buf);
+
+	if (buf[0] == '/') {
+		/*
+		 * One of the ancestor path elements in an absolute path
+		 * lookup in ovl_lookup_layer() could have been opaque and
+		 * that will stop further lookup in lower layers (d->stop=true)
+		 * But we have found an absolute redirect in decendant path
+		 * element and that should force continue lookup in lower
+		 * layers (reset d->stop).
+		 */
+		d->stop = false;
+	} else {
+		res = strlen(buf) + 1;
+		memmove(buf + prelen, buf, res);
+		memcpy(buf, d->name.name, prelen);
+	}
+
+	strcat(buf, post);
+	kfree(d->redirect);
+	d->redirect = buf;
+	d->name.name = d->redirect;
+	d->name.len = strlen(d->redirect);
+
+	return 0;
+}
+
+static int ovl_acceptable(void *ctx, struct dentry *dentry)
+{
+	/*
+	 * A non-dir origin may be disconnected, which is fine, because
+	 * we only need it for its unique inode number.
+	 */
+	if (!d_is_dir(dentry))
+		return 1;
+
+	/* Don't decode a deleted empty directory */
+	if (d_unhashed(dentry))
+		return 0;
+
+	/* Check if directory belongs to the layer we are decoding from */
+	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
+}
+
+/*
+ * Check validity of an overlay file handle buffer.
+ *
+ * Return 0 for a valid file handle.
+ * Return -ENODATA for "origin unknown".
+ * Return <0 for an invalid file handle.
+ */
+int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+{
+	if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
+		return -EINVAL;
+
+	if (fh->magic != OVL_FH_MAGIC)
+		return -EINVAL;
+
+	/* Treat larger version and unknown flags as "origin unknown" */
+	if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+		return -ENODATA;
+
+	/* Treat endianness mismatch as "origin unknown" */
+	if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+		return -ENODATA;
+
+	return 0;
+}
+
+static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
+{
+	int res, err;
+	struct ovl_fh *fh = NULL;
+
+	res = vfs_getxattr(dentry, name, NULL, 0);
+	if (res < 0) {
+		if (res == -ENODATA || res == -EOPNOTSUPP)
+			return NULL;
+		goto fail;
+	}
+	/* Zero size value means "copied up but origin unknown" */
+	if (res == 0)
+		return NULL;
+
+	fh = kzalloc(res, GFP_KERNEL);
+	if (!fh)
+		return ERR_PTR(-ENOMEM);
+
+	res = vfs_getxattr(dentry, name, fh, res);
+	if (res < 0)
+		goto fail;
+
+	err = ovl_check_fh_len(fh, res);
+	if (err < 0) {
+		if (err == -ENODATA)
+			goto out;
+		goto invalid;
+	}
+
+	return fh;
+
+out:
+	kfree(fh);
+	return NULL;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+	goto out;
+invalid:
+	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+	goto out;
+}
+
+struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
+				  bool connected)
+{
+	struct dentry *real;
+	int bytes;
+
+	/*
+	 * Make sure that the stored uuid matches the uuid of the lower
+	 * layer where file handle will be decoded.
+	 */
+	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
+		return NULL;
+
+	bytes = (fh->len - offsetof(struct ovl_fh, fid));
+	real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
+				  bytes >> 2, (int)fh->type,
+				  connected ? ovl_acceptable : NULL, mnt);
+	if (IS_ERR(real)) {
+		/*
+		 * Treat stale file handle to lower file as "origin unknown".
+		 * upper file handle could become stale when upper file is
+		 * unlinked and this information is needed to handle stale
+		 * index entries correctly.
+		 */
+		if (real == ERR_PTR(-ESTALE) &&
+		    !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
+			real = NULL;
+		return real;
+	}
+
+	if (ovl_dentry_weird(real)) {
+		dput(real);
+		return NULL;
+	}
+
+	return real;
+}
+
+static bool ovl_is_opaquedir(struct dentry *dentry)
+{
+	return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
+}
+
+static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
+			     const char *name, unsigned int namelen,
+			     size_t prelen, const char *post,
+			     struct dentry **ret)
+{
+	struct dentry *this;
+	int err;
+	bool last_element = !post[0];
+
+	this = lookup_one_len_unlocked(name, base, namelen);
+	if (IS_ERR(this)) {
+		err = PTR_ERR(this);
+		this = NULL;
+		if (err == -ENOENT || err == -ENAMETOOLONG)
+			goto out;
+		goto out_err;
+	}
+	if (!this->d_inode)
+		goto put_and_out;
+
+	if (ovl_dentry_weird(this)) {
+		/* Don't support traversing automounts and other weirdness */
+		err = -EREMOTE;
+		goto out_err;
+	}
+	if (ovl_is_whiteout(this)) {
+		d->stop = d->opaque = true;
+		goto put_and_out;
+	}
+	/*
+	 * This dentry should be a regular file if previous layer lookup
+	 * found a metacopy dentry.
+	 */
+	if (last_element && d->metacopy && !d_is_reg(this)) {
+		d->stop = true;
+		goto put_and_out;
+	}
+	if (!d_can_lookup(this)) {
+		if (d->is_dir || !last_element) {
+			d->stop = true;
+			goto put_and_out;
+		}
+		err = ovl_check_metacopy_xattr(this);
+		if (err < 0)
+			goto out_err;
+
+		d->metacopy = err;
+		d->stop = !d->metacopy;
+		if (!d->metacopy || d->last)
+			goto out;
+	} else {
+		if (ovl_lookup_trap_inode(d->sb, this)) {
+			/* Caught in a trap of overlapping layers */
+			err = -ELOOP;
+			goto out_err;
+		}
+
+		if (last_element)
+			d->is_dir = true;
+		if (d->last)
+			goto out;
+
+		if (ovl_is_opaquedir(this)) {
+			d->stop = true;
+			if (last_element)
+				d->opaque = true;
+			goto out;
+		}
+	}
+	err = ovl_check_redirect(this, d, prelen, post);
+	if (err)
+		goto out_err;
+out:
+	*ret = this;
+	return 0;
+
+put_and_out:
+	dput(this);
+	this = NULL;
+	goto out;
+
+out_err:
+	dput(this);
+	return err;
+}
+
+static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
+			    struct dentry **ret)
+{
+	/* Counting down from the end, since the prefix can change */
+	size_t rem = d->name.len - 1;
+	struct dentry *dentry = NULL;
+	int err;
+
+	if (d->name.name[0] != '/')
+		return ovl_lookup_single(base, d, d->name.name, d->name.len,
+					 0, "", ret);
+
+	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
+		const char *s = d->name.name + d->name.len - rem;
+		const char *next = strchrnul(s, '/');
+		size_t thislen = next - s;
+		bool end = !next[0];
+
+		/* Verify we did not go off the rails */
+		if (WARN_ON(s[-1] != '/'))
+			return -EIO;
+
+		err = ovl_lookup_single(base, d, s, thislen,
+					d->name.len - rem, next, &base);
+		dput(dentry);
+		if (err)
+			return err;
+		dentry = base;
+		if (end)
+			break;
+
+		rem -= thislen + 1;
+
+		if (WARN_ON(rem >= d->name.len))
+			return -EIO;
+	}
+	*ret = dentry;
+	return 0;
+}
+
+
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
+			struct dentry *upperdentry, struct ovl_path **stackp)
+{
+	struct dentry *origin = NULL;
+	int i;
+
+	for (i = 0; i < ofs->numlower; i++) {
+		origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
+					    connected);
+		if (origin)
+			break;
+	}
+
+	if (!origin)
+		return -ESTALE;
+	else if (IS_ERR(origin))
+		return PTR_ERR(origin);
+
+	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
+	    ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
+		goto invalid;
+
+	if (!*stackp)
+		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
+	if (!*stackp) {
+		dput(origin);
+		return -ENOMEM;
+	}
+	**stackp = (struct ovl_path){
+		.dentry = origin,
+		.layer = &ofs->lower_layers[i]
+	};
+
+	return 0;
+
+invalid:
+	pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
+			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
+			    d_inode(origin)->i_mode & S_IFMT);
+	dput(origin);
+	return -EIO;
+}
+
+static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
+			    struct ovl_path **stackp, unsigned int *ctrp)
+{
+	struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
+	int err;
+
+	if (IS_ERR_OR_NULL(fh))
+		return PTR_ERR(fh);
+
+	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
+	kfree(fh);
+
+	if (err) {
+		if (err == -ESTALE)
+			return 0;
+		return err;
+	}
+
+	if (WARN_ON(*ctrp))
+		return -EIO;
+
+	*ctrp = 1;
+	return 0;
+}
+
+/*
+ * Verify that @fh matches the file handle stored in xattr @name.
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+static int ovl_verify_fh(struct dentry *dentry, const char *name,
+			 const struct ovl_fh *fh)
+{
+	struct ovl_fh *ofh = ovl_get_fh(dentry, name);
+	int err = 0;
+
+	if (!ofh)
+		return -ENODATA;
+
+	if (IS_ERR(ofh))
+		return PTR_ERR(ofh);
+
+	if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+		err = -ESTALE;
+
+	kfree(ofh);
+	return err;
+}
+
+/*
+ * Verify that @real dentry matches the file handle stored in xattr @name.
+ *
+ * If @set is true and there is no stored file handle, encode @real and store
+ * file handle in xattr @name.
+ *
+ * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
+ */
+int ovl_verify_set_fh(struct dentry *dentry, const char *name,
+		      struct dentry *real, bool is_upper, bool set)
+{
+	struct inode *inode;
+	struct ovl_fh *fh;
+	int err;
+
+	fh = ovl_encode_real_fh(real, is_upper);
+	err = PTR_ERR(fh);
+	if (IS_ERR(fh)) {
+		fh = NULL;
+		goto fail;
+	}
+
+	err = ovl_verify_fh(dentry, name, fh);
+	if (set && err == -ENODATA)
+		err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
+	if (err)
+		goto fail;
+
+out:
+	kfree(fh);
+	return err;
+
+fail:
+	inode = d_inode(real);
+	pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+			    is_upper ? "upper" : "origin", real,
+			    inode ? inode->i_ino : 0, err);
+	goto out;
+}
+
+/* Get upper dentry from index */
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
+{
+	struct ovl_fh *fh;
+	struct dentry *upper;
+
+	if (!d_is_dir(index))
+		return dget(index);
+
+	fh = ovl_get_fh(index, OVL_XATTR_UPPER);
+	if (IS_ERR_OR_NULL(fh))
+		return ERR_CAST(fh);
+
+	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
+	kfree(fh);
+
+	if (IS_ERR_OR_NULL(upper))
+		return upper ?: ERR_PTR(-ESTALE);
+
+	if (!d_is_dir(upper)) {
+		pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
+				    index, upper);
+		dput(upper);
+		return ERR_PTR(-EIO);
+	}
+
+	return upper;
+}
+
+/* Is this a leftover from create/whiteout of directory index entry? */
+static bool ovl_is_temp_index(struct dentry *index)
+{
+	return index->d_name.name[0] == '#';
+}
+
+/*
+ * Verify that an index entry name matches the origin file handle stored in
+ * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
+ * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
+ */
+int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
+{
+	struct ovl_fh *fh = NULL;
+	size_t len;
+	struct ovl_path origin = { };
+	struct ovl_path *stack = &origin;
+	struct dentry *upper = NULL;
+	int err;
+
+	if (!d_inode(index))
+		return 0;
+
+	/* Cleanup leftover from index create/cleanup attempt */
+	err = -ESTALE;
+	if (ovl_is_temp_index(index))
+		goto fail;
+
+	err = -EINVAL;
+	if (index->d_name.len < sizeof(struct ovl_fh)*2)
+		goto fail;
+
+	err = -ENOMEM;
+	len = index->d_name.len / 2;
+	fh = kzalloc(len, GFP_KERNEL);
+	if (!fh)
+		goto fail;
+
+	err = -EINVAL;
+	if (hex2bin((u8 *)fh, index->d_name.name, len))
+		goto fail;
+
+	err = ovl_check_fh_len(fh, len);
+	if (err)
+		goto fail;
+
+	/*
+	 * Whiteout index entries are used as an indication that an exported
+	 * overlay file handle should be treated as stale (i.e. after unlink
+	 * of the overlay inode). These entries contain no origin xattr.
+	 */
+	if (ovl_is_whiteout(index))
+		goto out;
+
+	/*
+	 * Verifying directory index entries are not stale is expensive, so
+	 * only verify stale dir index if NFS export is enabled.
+	 */
+	if (d_is_dir(index) && !ofs->config.nfs_export)
+		goto out;
+
+	/*
+	 * Directory index entries should have 'upper' xattr pointing to the
+	 * real upper dir. Non-dir index entries are hardlinks to the upper
+	 * real inode. For non-dir index, we can read the copy up origin xattr
+	 * directly from the index dentry, but for dir index we first need to
+	 * decode the upper directory.
+	 */
+	upper = ovl_index_upper(ofs, index);
+	if (IS_ERR_OR_NULL(upper)) {
+		err = PTR_ERR(upper);
+		/*
+		 * Directory index entries with no 'upper' xattr need to be
+		 * removed. When dir index entry has a stale 'upper' xattr,
+		 * we assume that upper dir was removed and we treat the dir
+		 * index as orphan entry that needs to be whited out.
+		 */
+		if (err == -ESTALE)
+			goto orphan;
+		else if (!err)
+			err = -ESTALE;
+		goto fail;
+	}
+
+	err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
+	dput(upper);
+	if (err)
+		goto fail;
+
+	/* Check if non-dir index is orphan and don't warn before cleaning it */
+	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
+		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
+		if (err)
+			goto fail;
+
+		if (ovl_get_nlink(origin.dentry, index, 0) == 0)
+			goto orphan;
+	}
+
+out:
+	dput(origin.dentry);
+	kfree(fh);
+	return err;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
+			    index, d_inode(index)->i_mode & S_IFMT, err);
+	goto out;
+
+orphan:
+	pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
+			    index, d_inode(index)->i_mode & S_IFMT,
+			    d_inode(index)->i_nlink);
+	err = -ENOENT;
+	goto out;
+}
+
+static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
+{
+	char *n, *s;
+
+	n = kcalloc(fh->len, 2, GFP_KERNEL);
+	if (!n)
+		return -ENOMEM;
+
+	s  = bin2hex(n, fh, fh->len);
+	*name = (struct qstr) QSTR_INIT(n, s - n);
+
+	return 0;
+
+}
+
+/*
+ * Lookup in indexdir for the index entry of a lower real inode or a copy up
+ * origin inode. The index entry name is the hex representation of the lower
+ * inode file handle.
+ *
+ * If the index dentry in negative, then either no lower aliases have been
+ * copied up yet, or aliases have been copied up in older kernels and are
+ * not indexed.
+ *
+ * If the index dentry for a copy up origin inode is positive, but points
+ * to an inode different than the upper inode, then either the upper inode
+ * has been copied up and not indexed or it was indexed, but since then
+ * index dir was cleared. Either way, that index cannot be used to indentify
+ * the overlay inode.
+ */
+int ovl_get_index_name(struct dentry *origin, struct qstr *name)
+{
+	struct ovl_fh *fh;
+	int err;
+
+	fh = ovl_encode_real_fh(origin, false);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
+	err = ovl_get_index_name_fh(fh, name);
+
+	kfree(fh);
+	return err;
+}
+
+/* Lookup index by file handle for NFS export */
+struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
+{
+	struct dentry *index;
+	struct qstr name;
+	int err;
+
+	err = ovl_get_index_name_fh(fh, &name);
+	if (err)
+		return ERR_PTR(err);
+
+	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+	kfree(name.name);
+	if (IS_ERR(index)) {
+		if (PTR_ERR(index) == -ENOENT)
+			index = NULL;
+		return index;
+	}
+
+	if (d_is_negative(index))
+		err = 0;
+	else if (ovl_is_whiteout(index))
+		err = -ESTALE;
+	else if (ovl_dentry_weird(index))
+		err = -EIO;
+	else
+		return index;
+
+	dput(index);
+	return ERR_PTR(err);
+}
+
+struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+				struct dentry *origin, bool verify)
+{
+	struct dentry *index;
+	struct inode *inode;
+	struct qstr name;
+	bool is_dir = d_is_dir(origin);
+	int err;
+
+	err = ovl_get_index_name(origin, &name);
+	if (err)
+		return ERR_PTR(err);
+
+	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+	if (IS_ERR(index)) {
+		err = PTR_ERR(index);
+		if (err == -ENOENT) {
+			index = NULL;
+			goto out;
+		}
+		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
+				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
+				    d_inode(origin)->i_ino, name.len, name.name,
+				    err);
+		goto out;
+	}
+
+	inode = d_inode(index);
+	if (d_is_negative(index)) {
+		goto out_dput;
+	} else if (ovl_is_whiteout(index) && !verify) {
+		/*
+		 * When index lookup is called with !verify for decoding an
+		 * overlay file handle, a whiteout index implies that decode
+		 * should treat file handle as stale and no need to print a
+		 * warning about it.
+		 */
+		dput(index);
+		index = ERR_PTR(-ESTALE);
+		goto out;
+	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
+		   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
+		/*
+		 * Index should always be of the same file type as origin
+		 * except for the case of a whiteout index. A whiteout
+		 * index should only exist if all lower aliases have been
+		 * unlinked, which means that finding a lower origin on lookup
+		 * whose index is a whiteout should be treated as an error.
+		 */
+		pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
+				    index, d_inode(index)->i_mode & S_IFMT,
+				    d_inode(origin)->i_mode & S_IFMT);
+		goto fail;
+	} else if (is_dir && verify) {
+		if (!upper) {
+			pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
+					    origin, index);
+			goto fail;
+		}
+
+		/* Verify that dir index 'upper' xattr points to upper dir */
+		err = ovl_verify_upper(index, upper, false);
+		if (err) {
+			if (err == -ESTALE) {
+				pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
+						    upper, origin, index);
+			}
+			goto fail;
+		}
+	} else if (upper && d_inode(upper) != inode) {
+		goto out_dput;
+	}
+out:
+	kfree(name.name);
+	return index;
+
+out_dput:
+	dput(index);
+	index = NULL;
+	goto out;
+
+fail:
+	dput(index);
+	index = ERR_PTR(-EIO);
+	goto out;
+}
+
+/*
+ * Returns next layer in stack starting from top.
+ * Returns -1 if this is the last layer.
+ */
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	BUG_ON(idx < 0);
+	if (idx == 0) {
+		ovl_path_upper(dentry, path);
+		if (path->dentry)
+			return oe->numlower ? 1 : -1;
+		idx++;
+	}
+	BUG_ON(idx > oe->numlower);
+	path->dentry = oe->lowerstack[idx - 1].dentry;
+	path->mnt = oe->lowerstack[idx - 1].layer->mnt;
+
+	return (idx < oe->numlower) ? idx + 1 : -1;
+}
+
+/* Fix missing 'origin' xattr */
+static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
+			  struct dentry *upper)
+{
+	int err;
+
+	if (ovl_check_origin_xattr(upper))
+		return 0;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		return err;
+
+	err = ovl_set_origin(dentry, lower, upper);
+	if (!err)
+		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
+
+	ovl_drop_write(dentry);
+	return err;
+}
+
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+			  unsigned int flags)
+{
+	struct ovl_entry *oe;
+	const struct cred *old_cred;
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
+	struct ovl_path *stack = NULL, *origin_path = NULL;
+	struct dentry *upperdir, *upperdentry = NULL;
+	struct dentry *origin = NULL;
+	struct dentry *index = NULL;
+	unsigned int ctr = 0;
+	struct inode *inode = NULL;
+	bool upperopaque = false;
+	char *upperredirect = NULL;
+	struct dentry *this;
+	unsigned int i;
+	int err;
+	bool metacopy = false;
+	struct ovl_lookup_data d = {
+		.sb = dentry->d_sb,
+		.name = dentry->d_name,
+		.is_dir = false,
+		.opaque = false,
+		.stop = false,
+		.last = ofs->config.redirect_follow ? false : !poe->numlower,
+		.redirect = NULL,
+		.metacopy = false,
+	};
+
+	if (dentry->d_name.len > ofs->namelen)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	upperdir = ovl_dentry_upper(dentry->d_parent);
+	if (upperdir) {
+		err = ovl_lookup_layer(upperdir, &d, &upperdentry);
+		if (err)
+			goto out;
+
+		if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) {
+			dput(upperdentry);
+			err = -EREMOTE;
+			goto out;
+		}
+		if (upperdentry && !d.is_dir) {
+			unsigned int origin_ctr = 0;
+
+			/*
+			 * Lookup copy up origin by decoding origin file handle.
+			 * We may get a disconnected dentry, which is fine,
+			 * because we only need to hold the origin inode in
+			 * cache and use its inode number.  We may even get a
+			 * connected dentry, that is not under any of the lower
+			 * layers root.  That is also fine for using it's inode
+			 * number - it's the same as if we held a reference
+			 * to a dentry in lower layer that was moved under us.
+			 */
+			err = ovl_check_origin(ofs, upperdentry, &origin_path,
+					       &origin_ctr);
+			if (err)
+				goto out_put_upper;
+
+			if (d.metacopy)
+				metacopy = true;
+		}
+
+		if (d.redirect) {
+			err = -ENOMEM;
+			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
+			if (!upperredirect)
+				goto out_put_upper;
+			if (d.redirect[0] == '/')
+				poe = roe;
+		}
+		upperopaque = d.opaque;
+	}
+
+	if (!d.stop && poe->numlower) {
+		err = -ENOMEM;
+		stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
+				GFP_KERNEL);
+		if (!stack)
+			goto out_put_upper;
+	}
+
+	for (i = 0; !d.stop && i < poe->numlower; i++) {
+		struct ovl_path lower = poe->lowerstack[i];
+
+		if (!ofs->config.redirect_follow)
+			d.last = i == poe->numlower - 1;
+		else
+			d.last = lower.layer->idx == roe->numlower;
+
+		err = ovl_lookup_layer(lower.dentry, &d, &this);
+		if (err)
+			goto out_put;
+
+		if (!this)
+			continue;
+
+		/*
+		 * If no origin fh is stored in upper of a merge dir, store fh
+		 * of lower dir and set upper parent "impure".
+		 */
+		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
+			err = ovl_fix_origin(dentry, this, upperdentry);
+			if (err) {
+				dput(this);
+				goto out_put;
+			}
+		}
+
+		/*
+		 * When "verify_lower" feature is enabled, do not merge with a
+		 * lower dir that does not match a stored origin xattr. In any
+		 * case, only verified origin is used for index lookup.
+		 *
+		 * For non-dir dentry, if index=on, then ensure origin
+		 * matches the dentry found using path based lookup,
+		 * otherwise error out.
+		 */
+		if (upperdentry && !ctr &&
+		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
+		     (!d.is_dir && ofs->config.index && origin_path))) {
+			err = ovl_verify_origin(upperdentry, this, false);
+			if (err) {
+				dput(this);
+				if (d.is_dir)
+					break;
+				goto out_put;
+			}
+			origin = this;
+		}
+
+		if (d.metacopy)
+			metacopy = true;
+		/*
+		 * Do not store intermediate metacopy dentries in chain,
+		 * except top most lower metacopy dentry
+		 */
+		if (d.metacopy && ctr) {
+			dput(this);
+			continue;
+		}
+
+		stack[ctr].dentry = this;
+		stack[ctr].layer = lower.layer;
+		ctr++;
+
+		/*
+		 * Following redirects can have security consequences: it's like
+		 * a symlink into the lower layer without the permission checks.
+		 * This is only a problem if the upper layer is untrusted (e.g
+		 * comes from an USB drive).  This can allow a non-readable file
+		 * or directory to become readable.
+		 *
+		 * Only following redirects when redirects are enabled disables
+		 * this attack vector when not necessary.
+		 */
+		err = -EPERM;
+		if (d.redirect && !ofs->config.redirect_follow) {
+			pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
+					    dentry);
+			goto out_put;
+		}
+
+		if (d.stop)
+			break;
+
+		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
+			poe = roe;
+			/* Find the current layer on the root dentry */
+			i = lower.layer->idx - 1;
+		}
+	}
+
+	if (metacopy) {
+		/*
+		 * Found a metacopy dentry but did not find corresponding
+		 * data dentry
+		 */
+		if (d.metacopy) {
+			err = -EIO;
+			goto out_put;
+		}
+
+		err = -EPERM;
+		if (!ofs->config.metacopy) {
+			pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
+					    dentry);
+			goto out_put;
+		}
+	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
+		if (WARN_ON(stack != NULL)) {
+			err = -EIO;
+			goto out_put;
+		}
+		stack = origin_path;
+		ctr = 1;
+		origin_path = NULL;
+	}
+
+	/*
+	 * Lookup index by lower inode and verify it matches upper inode.
+	 * We only trust dir index if we verified that lower dir matches
+	 * origin, otherwise dir index entries may be inconsistent and we
+	 * ignore them.
+	 *
+	 * For non-dir upper metacopy dentry, we already set "origin" if we
+	 * verified that lower matched upper origin. If upper origin was
+	 * not present (because lower layer did not support fh encode/decode),
+	 * or indexing is not enabled, do not set "origin" and skip looking up
+	 * index. This case should be handled in same way as a non-dir upper
+	 * without ORIGIN is handled.
+	 *
+	 * Always lookup index of non-dir non-metacopy and non-upper.
+	 */
+	if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
+		origin = stack[0].dentry;
+
+	if (origin && ovl_indexdir(dentry->d_sb) &&
+	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
+		index = ovl_lookup_index(ofs, upperdentry, origin, true);
+		if (IS_ERR(index)) {
+			err = PTR_ERR(index);
+			index = NULL;
+			goto out_put;
+		}
+	}
+
+	oe = ovl_alloc_entry(ctr);
+	err = -ENOMEM;
+	if (!oe)
+		goto out_put;
+
+	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
+	dentry->d_fsdata = oe;
+
+	if (upperopaque)
+		ovl_dentry_set_opaque(dentry);
+
+	if (upperdentry)
+		ovl_dentry_set_upper_alias(dentry);
+	else if (index) {
+		upperdentry = dget(index);
+		upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
+		if (IS_ERR(upperredirect)) {
+			err = PTR_ERR(upperredirect);
+			upperredirect = NULL;
+			goto out_free_oe;
+		}
+	}
+
+	if (upperdentry || ctr) {
+		struct ovl_inode_params oip = {
+			.upperdentry = upperdentry,
+			.lowerpath = stack,
+			.index = index,
+			.numlower = ctr,
+			.redirect = upperredirect,
+			.lowerdata = (ctr > 1 && !d.is_dir) ?
+				      stack[ctr - 1].dentry : NULL,
+		};
+
+		inode = ovl_get_inode(dentry->d_sb, &oip);
+		err = PTR_ERR(inode);
+		if (IS_ERR(inode))
+			goto out_free_oe;
+	}
+
+	revert_creds(old_cred);
+	if (origin_path) {
+		dput(origin_path->dentry);
+		kfree(origin_path);
+	}
+	dput(index);
+	kfree(stack);
+	kfree(d.redirect);
+	return d_splice_alias(inode, dentry);
+
+out_free_oe:
+	dentry->d_fsdata = NULL;
+	kfree(oe);
+out_put:
+	dput(index);
+	for (i = 0; i < ctr; i++)
+		dput(stack[i].dentry);
+	kfree(stack);
+out_put_upper:
+	if (origin_path) {
+		dput(origin_path->dentry);
+		kfree(origin_path);
+	}
+	dput(upperdentry);
+	kfree(upperredirect);
+out:
+	kfree(d.redirect);
+	revert_creds(old_cred);
+	return ERR_PTR(err);
+}
+
+bool ovl_lower_positive(struct dentry *dentry)
+{
+	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+	const struct qstr *name = &dentry->d_name;
+	const struct cred *old_cred;
+	unsigned int i;
+	bool positive = false;
+	bool done = false;
+
+	/*
+	 * If dentry is negative, then lower is positive iff this is a
+	 * whiteout.
+	 */
+	if (!dentry->d_inode)
+		return ovl_dentry_is_opaque(dentry);
+
+	/* Negative upper -> positive lower */
+	if (!ovl_dentry_upper(dentry))
+		return true;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	/* Positive upper -> have to look up lower to see whether it exists */
+	for (i = 0; !done && !positive && i < poe->numlower; i++) {
+		struct dentry *this;
+		struct dentry *lowerdir = poe->lowerstack[i].dentry;
+
+		this = lookup_one_len_unlocked(name->name, lowerdir,
+					       name->len);
+		if (IS_ERR(this)) {
+			switch (PTR_ERR(this)) {
+			case -ENOENT:
+			case -ENAMETOOLONG:
+				break;
+
+			default:
+				/*
+				 * Assume something is there, we just couldn't
+				 * access it.
+				 */
+				positive = true;
+				break;
+			}
+		} else {
+			if (this->d_inode) {
+				positive = !ovl_is_whiteout(this);
+				done = true;
+			}
+			dput(this);
+		}
+	}
+	revert_creds(old_cred);
+
+	return positive;
+}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
new file mode 100644
index 000000000..8dcede7df
--- /dev/null
+++ b/fs/overlayfs/overlayfs.h
@@ -0,0 +1,426 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/uuid.h>
+#include <linux/fs.h>
+#include "ovl_entry.h"
+
+enum ovl_path_type {
+	__OVL_PATH_UPPER	= (1 << 0),
+	__OVL_PATH_MERGE	= (1 << 1),
+	__OVL_PATH_ORIGIN	= (1 << 2),
+};
+
+#define OVL_TYPE_UPPER(type)	((type) & __OVL_PATH_UPPER)
+#define OVL_TYPE_MERGE(type)	((type) & __OVL_PATH_MERGE)
+#define OVL_TYPE_ORIGIN(type)	((type) & __OVL_PATH_ORIGIN)
+
+#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
+#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
+#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
+#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
+#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
+#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
+#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
+#define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy"
+
+enum ovl_inode_flag {
+	/* Pure upper dir that may contain non pure upper entries */
+	OVL_IMPURE,
+	/* Non-merge dir that may contain whiteout entries */
+	OVL_WHITEOUTS,
+	OVL_INDEX,
+	OVL_UPPERDATA,
+	/* Inode number will remain constant over copy up. */
+	OVL_CONST_INO,
+};
+
+enum ovl_entry_flag {
+	OVL_E_UPPER_ALIAS,
+	OVL_E_OPAQUE,
+	OVL_E_CONNECTED,
+};
+
+/*
+ * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
+ * where:
+ * origin.fh	- exported file handle of the lower file
+ * origin.uuid	- uuid of the lower filesystem
+ */
+#define OVL_FH_VERSION	0
+#define OVL_FH_MAGIC	0xfb
+
+/* CPU byte order required for fid decoding:  */
+#define OVL_FH_FLAG_BIG_ENDIAN	(1 << 0)
+#define OVL_FH_FLAG_ANY_ENDIAN	(1 << 1)
+/* Is the real inode encoded in fid an upper inode? */
+#define OVL_FH_FLAG_PATH_UPPER	(1 << 2)
+
+#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \
+			 OVL_FH_FLAG_PATH_UPPER)
+
+#if defined(__LITTLE_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN 0
+#elif defined(__BIG_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN
+#else
+#error Endianness not defined
+#endif
+
+/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */
+#define OVL_FILEID	0xfb
+
+/* On-disk and in-memeory format for redirect by file handle */
+struct ovl_fh {
+	u8 version;	/* 0 */
+	u8 magic;	/* 0xfb */
+	u8 len;		/* size of this header + size of fid */
+	u8 flags;	/* OVL_FH_FLAG_* */
+	u8 type;	/* fid_type of fid */
+	uuid_t uuid;	/* uuid of filesystem */
+	u8 fid[0];	/* file identifier */
+} __packed;
+
+static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int err = vfs_rmdir(dir, dentry);
+
+	pr_debug("rmdir(%pd2) = %i\n", dentry, err);
+	return err;
+}
+
+static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int err = vfs_unlink(dir, dentry, NULL);
+
+	pr_debug("unlink(%pd2) = %i\n", dentry, err);
+	return err;
+}
+
+static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
+			      struct dentry *new_dentry)
+{
+	int err = vfs_link(old_dentry, dir, new_dentry, NULL);
+
+	pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err);
+	return err;
+}
+
+static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
+				umode_t mode)
+{
+	int err = vfs_create(dir, dentry, mode, true);
+
+	pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return err;
+}
+
+static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
+			       umode_t mode)
+{
+	int err = vfs_mkdir(dir, dentry, mode);
+	pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return err;
+}
+
+static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
+			       umode_t mode, dev_t dev)
+{
+	int err = vfs_mknod(dir, dentry, mode, dev);
+
+	pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
+	return err;
+}
+
+static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
+				 const char *oldname)
+{
+	int err = vfs_symlink(dir, dentry, oldname);
+
+	pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
+	return err;
+}
+
+static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
+				  const void *value, size_t size, int flags)
+{
+	int err = vfs_setxattr(dentry, name, value, size, flags);
+	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
+		 dentry, name, min((int)size, 48), value, size, flags, err);
+	return err;
+}
+
+static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
+{
+	int err = vfs_removexattr(dentry, name);
+	pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
+	return err;
+}
+
+static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
+				struct inode *newdir, struct dentry *newdentry,
+				unsigned int flags)
+{
+	int err;
+
+	pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
+	err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
+	if (err) {
+		pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
+			 olddentry, newdentry, err);
+	}
+	return err;
+}
+
+static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
+{
+	int err = vfs_whiteout(dir, dentry);
+	pr_debug("whiteout(%pd2) = %i\n", dentry, err);
+	return err;
+}
+
+static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
+{
+	struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
+	int err = PTR_ERR_OR_ZERO(ret);
+
+	pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return ret;
+}
+
+static inline bool ovl_open_flags_need_copy_up(int flags)
+{
+	if (!flags)
+		return false;
+
+	return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
+}
+
+/* util.c */
+int ovl_want_write(struct dentry *dentry);
+void ovl_drop_write(struct dentry *dentry);
+struct dentry *ovl_workdir(struct dentry *dentry);
+const struct cred *ovl_override_creds(struct super_block *sb);
+struct super_block *ovl_same_sb(struct super_block *sb);
+int ovl_can_decode_fh(struct super_block *sb);
+struct dentry *ovl_indexdir(struct super_block *sb);
+bool ovl_index_all(struct super_block *sb);
+bool ovl_verify_lower(struct super_block *sb);
+struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
+bool ovl_dentry_remote(struct dentry *dentry);
+bool ovl_dentry_weird(struct dentry *dentry);
+enum ovl_path_type ovl_path_type(struct dentry *dentry);
+void ovl_path_upper(struct dentry *dentry, struct path *path);
+void ovl_path_lower(struct dentry *dentry, struct path *path);
+void ovl_path_lowerdata(struct dentry *dentry, struct path *path);
+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
+struct dentry *ovl_dentry_upper(struct dentry *dentry);
+struct dentry *ovl_dentry_lower(struct dentry *dentry);
+struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
+struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
+struct dentry *ovl_dentry_real(struct dentry *dentry);
+struct dentry *ovl_i_dentry_upper(struct inode *inode);
+struct inode *ovl_inode_upper(struct inode *inode);
+struct inode *ovl_inode_lower(struct inode *inode);
+struct inode *ovl_inode_lowerdata(struct inode *inode);
+struct inode *ovl_inode_real(struct inode *inode);
+struct inode *ovl_inode_realdata(struct inode *inode);
+struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
+void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
+void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
+void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry);
+bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
+bool ovl_dentry_is_opaque(struct dentry *dentry);
+bool ovl_dentry_is_whiteout(struct dentry *dentry);
+void ovl_dentry_set_opaque(struct dentry *dentry);
+bool ovl_dentry_has_upper_alias(struct dentry *dentry);
+void ovl_dentry_set_upper_alias(struct dentry *dentry);
+bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
+bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags);
+bool ovl_has_upperdata(struct inode *inode);
+void ovl_set_upperdata(struct inode *inode);
+bool ovl_redirect_dir(struct super_block *sb);
+const char *ovl_dentry_get_redirect(struct dentry *dentry);
+void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+		    struct dentry *lowerdentry, struct dentry *lowerdata);
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
+void ovl_dir_modified(struct dentry *dentry, bool impurity);
+u64 ovl_dentry_version_get(struct dentry *dentry);
+bool ovl_is_whiteout(struct dentry *dentry);
+struct file *ovl_path_open(struct path *path, int flags);
+int ovl_copy_up_start(struct dentry *dentry, int flags);
+void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_already_copied_up(struct dentry *dentry, int flags);
+bool ovl_check_origin_xattr(struct dentry *dentry);
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+		       const char *name, const void *value, size_t size,
+		       int xerr);
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+void ovl_set_flag(unsigned long flag, struct inode *inode);
+void ovl_clear_flag(unsigned long flag, struct inode *inode);
+bool ovl_test_flag(unsigned long flag, struct inode *inode);
+bool ovl_inuse_trylock(struct dentry *dentry);
+void ovl_inuse_unlock(struct dentry *dentry);
+bool ovl_is_inuse(struct dentry *dentry);
+bool ovl_need_index(struct dentry *dentry);
+int ovl_nlink_start(struct dentry *dentry, bool *locked);
+void ovl_nlink_end(struct dentry *dentry, bool locked);
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
+int ovl_check_metacopy_xattr(struct dentry *dentry);
+bool ovl_is_metacopy_dentry(struct dentry *dentry);
+char *ovl_get_redirect_xattr(struct dentry *dentry, int padding);
+ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value,
+		     size_t padding);
+
+static inline bool ovl_is_impuredir(struct dentry *dentry)
+{
+	return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
+}
+
+static inline unsigned int ovl_xino_bits(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->xino_bits;
+}
+
+
+/* namei.c */
+int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
+struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
+				  bool connected);
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
+			struct dentry *upperdentry, struct ovl_path **stackp);
+int ovl_verify_set_fh(struct dentry *dentry, const char *name,
+		      struct dentry *real, bool is_upper, bool set);
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
+int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
+int ovl_get_index_name(struct dentry *origin, struct qstr *name);
+struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
+struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+				struct dentry *origin, bool verify);
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+			  unsigned int flags);
+bool ovl_lower_positive(struct dentry *dentry);
+
+static inline int ovl_verify_origin(struct dentry *upper,
+				    struct dentry *origin, bool set)
+{
+	return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set);
+}
+
+static inline int ovl_verify_upper(struct dentry *index,
+				    struct dentry *upper, bool set)
+{
+	return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set);
+}
+
+/* readdir.c */
+extern const struct file_operations ovl_dir_operations;
+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
+void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
+void ovl_cache_free(struct list_head *list);
+void ovl_dir_cache_free(struct inode *inode);
+int ovl_check_d_type_supported(struct path *realpath);
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+			 struct dentry *dentry, int level);
+int ovl_indexdir_cleanup(struct ovl_fs *ofs);
+
+/* inode.c */
+int ovl_set_nlink_upper(struct dentry *dentry);
+int ovl_set_nlink_lower(struct dentry *dentry);
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+			   struct dentry *upperdentry,
+			   unsigned int fallback);
+int ovl_setattr(struct dentry *dentry, struct iattr *attr);
+int ovl_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int flags);
+int ovl_permission(struct inode *inode, int mask);
+int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+		  const void *value, size_t size, int flags);
+int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
+		  void *value, size_t size);
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
+struct posix_acl *ovl_get_acl(struct inode *inode, int type);
+int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
+bool ovl_is_private_xattr(const char *name);
+
+struct ovl_inode_params {
+	struct inode *newinode;
+	struct dentry *upperdentry;
+	struct ovl_path *lowerpath;
+	struct dentry *index;
+	unsigned int numlower;
+	char *redirect;
+	struct dentry *lowerdata;
+};
+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
+			       bool is_upper);
+bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir);
+struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir);
+struct inode *ovl_get_inode(struct super_block *sb,
+			    struct ovl_inode_params *oip);
+static inline void ovl_copyattr(struct inode *from, struct inode *to)
+{
+	to->i_uid = from->i_uid;
+	to->i_gid = from->i_gid;
+	to->i_mode = from->i_mode;
+	to->i_atime = from->i_atime;
+	to->i_mtime = from->i_mtime;
+	to->i_ctime = from->i_ctime;
+	i_size_write(to, i_size_read(from));
+}
+
+static inline void ovl_copyflags(struct inode *from, struct inode *to)
+{
+	unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
+
+	inode_set_flags(to, from->i_flags & mask, mask);
+}
+
+/* dir.c */
+extern const struct inode_operations ovl_dir_inode_operations;
+int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+			     struct dentry *dentry);
+struct ovl_cattr {
+	dev_t rdev;
+	umode_t mode;
+	const char *link;
+	struct dentry *hardlink;
+};
+
+#define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) })
+
+int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode);
+struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
+			       struct ovl_cattr *attr);
+int ovl_cleanup(struct inode *dir, struct dentry *dentry);
+struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
+
+/* file.c */
+extern const struct file_operations ovl_file_operations;
+
+/* copy_up.c */
+int ovl_copy_up(struct dentry *dentry);
+int ovl_copy_up_with_data(struct dentry *dentry);
+int ovl_copy_up_flags(struct dentry *dentry, int flags);
+int ovl_maybe_copy_up(struct dentry *dentry, int flags);
+int ovl_copy_xattr(struct dentry *old, struct dentry *new);
+int ovl_set_attr(struct dentry *upper, struct kstat *stat);
+struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
+int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+		   struct dentry *upper);
+
+/* export.c */
+extern const struct export_operations ovl_export_operations;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
new file mode 100644
index 000000000..1a1adc697
--- /dev/null
+++ b/fs/overlayfs/ovl_entry.h
@@ -0,0 +1,122 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+struct ovl_config {
+	char *lowerdir;
+	char *upperdir;
+	char *workdir;
+	bool default_permissions;
+	bool redirect_dir;
+	bool redirect_follow;
+	const char *redirect_mode;
+	bool index;
+	bool nfs_export;
+	int xino;
+	bool metacopy;
+};
+
+struct ovl_sb {
+	struct super_block *sb;
+	dev_t pseudo_dev;
+};
+
+struct ovl_layer {
+	struct vfsmount *mnt;
+	/* Trap in ovl inode cache */
+	struct inode *trap;
+	struct ovl_sb *fs;
+	/* Index of this layer in fs root (upper idx == 0) */
+	int idx;
+	/* One fsid per unique underlying sb (upper fsid == 0) */
+	int fsid;
+};
+
+struct ovl_path {
+	struct ovl_layer *layer;
+	struct dentry *dentry;
+};
+
+/* private information held for overlayfs's superblock */
+struct ovl_fs {
+	struct vfsmount *upper_mnt;
+	unsigned int numlower;
+	/* Number of unique lower sb that differ from upper sb */
+	unsigned int numlowerfs;
+	struct ovl_layer *lower_layers;
+	struct ovl_sb *lower_fs;
+	/* workbasedir is the path at workdir= mount option */
+	struct dentry *workbasedir;
+	/* workdir is the 'work' directory under workbasedir */
+	struct dentry *workdir;
+	/* index directory listing overlay inodes by origin file handle */
+	struct dentry *indexdir;
+	long namelen;
+	/* pathnames of lower and upper dirs, for show_options */
+	struct ovl_config config;
+	/* creds of process who forced instantiation of super block */
+	const struct cred *creator_cred;
+	bool tmpfile;
+	bool noxattr;
+	/* Did we take the inuse lock? */
+	bool upperdir_locked;
+	bool workdir_locked;
+	/* Traps in ovl inode cache */
+	struct inode *upperdir_trap;
+	struct inode *workbasedir_trap;
+	struct inode *workdir_trap;
+	struct inode *indexdir_trap;
+	/* Inode numbers in all layers do not use the high xino_bits */
+	unsigned int xino_bits;
+};
+
+/* private information held for every overlayfs dentry */
+struct ovl_entry {
+	union {
+		struct {
+			unsigned long flags;
+		};
+		struct rcu_head rcu;
+	};
+	unsigned numlower;
+	struct ovl_path lowerstack[];
+};
+
+struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
+
+static inline struct ovl_entry *OVL_E(struct dentry *dentry)
+{
+	return (struct ovl_entry *) dentry->d_fsdata;
+}
+
+struct ovl_inode {
+	union {
+		struct ovl_dir_cache *cache;	/* directory */
+		struct inode *lowerdata;	/* regular file */
+	};
+	const char *redirect;
+	u64 version;
+	unsigned long flags;
+	struct inode vfs_inode;
+	struct dentry *__upperdentry;
+	struct inode *lower;
+
+	/* synchronize copy up and more */
+	struct mutex lock;
+};
+
+static inline struct ovl_inode *OVL_I(struct inode *inode)
+{
+	return container_of(inode, struct ovl_inode, vfs_inode);
+}
+
+static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
+{
+	return READ_ONCE(oi->__upperdentry);
+}
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
new file mode 100644
index 000000000..11b7941c5
--- /dev/null
+++ b/fs/overlayfs/readdir.c
@@ -0,0 +1,1161 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/xattr.h>
+#include <linux/rbtree.h>
+#include <linux/security.h>
+#include <linux/cred.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+struct ovl_cache_entry {
+	unsigned int len;
+	unsigned int type;
+	u64 real_ino;
+	u64 ino;
+	struct list_head l_node;
+	struct rb_node node;
+	struct ovl_cache_entry *next_maybe_whiteout;
+	bool is_upper;
+	bool is_whiteout;
+	char name[];
+};
+
+struct ovl_dir_cache {
+	long refcount;
+	u64 version;
+	struct list_head entries;
+	struct rb_root root;
+};
+
+struct ovl_readdir_data {
+	struct dir_context ctx;
+	struct dentry *dentry;
+	bool is_lowest;
+	struct rb_root *root;
+	struct list_head *list;
+	struct list_head middle;
+	struct ovl_cache_entry *first_maybe_whiteout;
+	int count;
+	int err;
+	bool is_upper;
+	bool d_type_supported;
+};
+
+struct ovl_dir_file {
+	bool is_real;
+	bool is_upper;
+	struct ovl_dir_cache *cache;
+	struct list_head *cursor;
+	struct file *realfile;
+	struct file *upperfile;
+};
+
+static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
+{
+	return rb_entry(n, struct ovl_cache_entry, node);
+}
+
+static bool ovl_cache_entry_find_link(const char *name, int len,
+				      struct rb_node ***link,
+				      struct rb_node **parent)
+{
+	bool found = false;
+	struct rb_node **newp = *link;
+
+	while (!found && *newp) {
+		int cmp;
+		struct ovl_cache_entry *tmp;
+
+		*parent = *newp;
+		tmp = ovl_cache_entry_from_node(*newp);
+		cmp = strncmp(name, tmp->name, len);
+		if (cmp > 0)
+			newp = &tmp->node.rb_right;
+		else if (cmp < 0 || len < tmp->len)
+			newp = &tmp->node.rb_left;
+		else
+			found = true;
+	}
+	*link = newp;
+
+	return found;
+}
+
+static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
+						    const char *name, int len)
+{
+	struct rb_node *node = root->rb_node;
+	int cmp;
+
+	while (node) {
+		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
+
+		cmp = strncmp(name, p->name, len);
+		if (cmp > 0)
+			node = p->node.rb_right;
+		else if (cmp < 0 || len < p->len)
+			node = p->node.rb_left;
+		else
+			return p;
+	}
+
+	return NULL;
+}
+
+static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
+			   struct ovl_cache_entry *p)
+{
+	/* Don't care if not doing ovl_iter() */
+	if (!rdd->dentry)
+		return false;
+
+	/* Always recalc d_ino when remapping lower inode numbers */
+	if (ovl_xino_bits(rdd->dentry->d_sb))
+		return true;
+
+	/* Always recalc d_ino for parent */
+	if (strcmp(p->name, "..") == 0)
+		return true;
+
+	/* If this is lower, then native d_ino will do */
+	if (!rdd->is_upper)
+		return false;
+
+	/*
+	 * Recalc d_ino for '.' and for all entries if dir is impure (contains
+	 * copied up entries)
+	 */
+	if ((p->name[0] == '.' && p->len == 1) ||
+	    ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
+		return true;
+
+	return false;
+}
+
+static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
+						   const char *name, int len,
+						   u64 ino, unsigned int d_type)
+{
+	struct ovl_cache_entry *p;
+	size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
+
+	p = kmalloc(size, GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	memcpy(p->name, name, len);
+	p->name[len] = '\0';
+	p->len = len;
+	p->type = d_type;
+	p->real_ino = ino;
+	p->ino = ino;
+	/* Defer setting d_ino for upper entry to ovl_iterate() */
+	if (ovl_calc_d_ino(rdd, p))
+		p->ino = 0;
+	p->is_upper = rdd->is_upper;
+	p->is_whiteout = false;
+
+	if (d_type == DT_CHR) {
+		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
+		rdd->first_maybe_whiteout = p;
+	}
+	return p;
+}
+
+static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
+				  const char *name, int len, u64 ino,
+				  unsigned int d_type)
+{
+	struct rb_node **newp = &rdd->root->rb_node;
+	struct rb_node *parent = NULL;
+	struct ovl_cache_entry *p;
+
+	if (ovl_cache_entry_find_link(name, len, &newp, &parent))
+		return 0;
+
+	p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
+	if (p == NULL) {
+		rdd->err = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	list_add_tail(&p->l_node, rdd->list);
+	rb_link_node(&p->node, parent, newp);
+	rb_insert_color(&p->node, rdd->root);
+
+	return 0;
+}
+
+static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
+			   const char *name, int namelen,
+			   loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct ovl_cache_entry *p;
+
+	p = ovl_cache_entry_find(rdd->root, name, namelen);
+	if (p) {
+		list_move_tail(&p->l_node, &rdd->middle);
+	} else {
+		p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+		if (p == NULL)
+			rdd->err = -ENOMEM;
+		else
+			list_add_tail(&p->l_node, &rdd->middle);
+	}
+
+	return rdd->err;
+}
+
+void ovl_cache_free(struct list_head *list)
+{
+	struct ovl_cache_entry *p;
+	struct ovl_cache_entry *n;
+
+	list_for_each_entry_safe(p, n, list, l_node)
+		kfree(p);
+
+	INIT_LIST_HEAD(list);
+}
+
+void ovl_dir_cache_free(struct inode *inode)
+{
+	struct ovl_dir_cache *cache = ovl_dir_cache(inode);
+
+	if (cache) {
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+	}
+}
+
+static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
+{
+	struct ovl_dir_cache *cache = od->cache;
+
+	WARN_ON(cache->refcount <= 0);
+	cache->refcount--;
+	if (!cache->refcount) {
+		if (ovl_dir_cache(d_inode(dentry)) == cache)
+			ovl_set_dir_cache(d_inode(dentry), NULL);
+
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+	}
+}
+
+static int ovl_fill_merge(struct dir_context *ctx, const char *name,
+			  int namelen, loff_t offset, u64 ino,
+			  unsigned int d_type)
+{
+	struct ovl_readdir_data *rdd =
+		container_of(ctx, struct ovl_readdir_data, ctx);
+
+	rdd->count++;
+	if (!rdd->is_lowest)
+		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
+	else
+		return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
+}
+
+static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
+{
+	int err;
+	struct ovl_cache_entry *p;
+	struct dentry *dentry;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(rdd->dentry->d_sb);
+
+	err = down_write_killable(&dir->d_inode->i_rwsem);
+	if (!err) {
+		while (rdd->first_maybe_whiteout) {
+			p = rdd->first_maybe_whiteout;
+			rdd->first_maybe_whiteout = p->next_maybe_whiteout;
+			dentry = lookup_one_len(p->name, dir, p->len);
+			if (!IS_ERR(dentry)) {
+				p->is_whiteout = ovl_is_whiteout(dentry);
+				dput(dentry);
+			}
+		}
+		inode_unlock(dir->d_inode);
+	}
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static inline int ovl_dir_read(struct path *realpath,
+			       struct ovl_readdir_data *rdd)
+{
+	struct file *realfile;
+	int err;
+
+	realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
+	if (IS_ERR(realfile))
+		return PTR_ERR(realfile);
+
+	rdd->first_maybe_whiteout = NULL;
+	rdd->ctx.pos = 0;
+	do {
+		rdd->count = 0;
+		rdd->err = 0;
+		err = iterate_dir(realfile, &rdd->ctx);
+		if (err >= 0)
+			err = rdd->err;
+	} while (!err && rdd->count);
+
+	if (!err && rdd->first_maybe_whiteout && rdd->dentry)
+		err = ovl_check_whiteouts(realpath->dentry, rdd);
+
+	fput(realfile);
+
+	return err;
+}
+
+/*
+ * Can we iterate real dir directly?
+ *
+ * Non-merge dir may contain whiteouts from a time it was a merge upper, before
+ * lower dir was removed under it and possibly before it was rotated from upper
+ * to lower layer.
+ */
+static bool ovl_dir_is_real(struct dentry *dir)
+{
+	return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir));
+}
+
+static void ovl_dir_reset(struct file *file)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct ovl_dir_cache *cache = od->cache;
+	struct dentry *dentry = file->f_path.dentry;
+	bool is_real;
+
+	if (cache && ovl_dentry_version_get(dentry) != cache->version) {
+		ovl_cache_put(od, dentry);
+		od->cache = NULL;
+		od->cursor = NULL;
+	}
+	is_real = ovl_dir_is_real(dentry);
+	if (od->is_real != is_real) {
+		/* is_real can only become false when dir is copied up */
+		if (WARN_ON(is_real))
+			return;
+		od->is_real = false;
+	}
+}
+
+static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
+	struct rb_root *root)
+{
+	int err;
+	struct path realpath;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.dentry = dentry,
+		.list = list,
+		.root = root,
+		.is_lowest = false,
+	};
+	int idx, next;
+
+	for (idx = 0; idx != -1; idx = next) {
+		next = ovl_path_next(idx, dentry, &realpath);
+		rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
+
+		if (next != -1) {
+			err = ovl_dir_read(&realpath, &rdd);
+			if (err)
+				break;
+		} else {
+			/*
+			 * Insert lowest layer entries before upper ones, this
+			 * allows offsets to be reasonably constant
+			 */
+			list_add(&rdd.middle, rdd.list);
+			rdd.is_lowest = true;
+			err = ovl_dir_read(&realpath, &rdd);
+			list_del(&rdd.middle);
+		}
+	}
+	return err;
+}
+
+static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
+{
+	struct list_head *p;
+	loff_t off = 0;
+
+	list_for_each(p, &od->cache->entries) {
+		if (off >= pos)
+			break;
+		off++;
+	}
+	/* Cursor is safe since the cache is stable */
+	od->cursor = p;
+}
+
+static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
+{
+	int res;
+	struct ovl_dir_cache *cache;
+
+	cache = ovl_dir_cache(d_inode(dentry));
+	if (cache && ovl_dentry_version_get(dentry) == cache->version) {
+		WARN_ON(!cache->refcount);
+		cache->refcount++;
+		return cache;
+	}
+	ovl_set_dir_cache(d_inode(dentry), NULL);
+
+	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
+	if (!cache)
+		return ERR_PTR(-ENOMEM);
+
+	cache->refcount = 1;
+	INIT_LIST_HEAD(&cache->entries);
+	cache->root = RB_ROOT;
+
+	res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
+	if (res) {
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+		return ERR_PTR(res);
+	}
+
+	cache->version = ovl_dentry_version_get(dentry);
+	ovl_set_dir_cache(d_inode(dentry), cache);
+
+	return cache;
+}
+
+/* Map inode number to lower fs unique range */
+static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
+			       const char *name, int namelen)
+{
+	if (ino >> (64 - xinobits)) {
+		pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
+				    namelen, name, ino, xinobits);
+		return ino;
+	}
+
+	return ino | ((u64)fsid) << (64 - xinobits);
+}
+
+/*
+ * Set d_ino for upper entries. Non-upper entries should always report
+ * the uppermost real inode ino and should not call this function.
+ *
+ * When not all layer are on same fs, report real ino also for upper.
+ *
+ * When all layers are on the same fs, and upper has a reference to
+ * copy up origin, call vfs_getattr() on the overlay entry to make
+ * sure that d_ino will be consistent with st_ino from stat(2).
+ */
+static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
+
+{
+	struct dentry *dir = path->dentry;
+	struct dentry *this = NULL;
+	enum ovl_path_type type;
+	u64 ino = p->real_ino;
+	int xinobits = ovl_xino_bits(dir->d_sb);
+	int err = 0;
+
+	if (!ovl_same_sb(dir->d_sb) && !xinobits)
+		goto out;
+
+	if (p->name[0] == '.') {
+		if (p->len == 1) {
+			this = dget(dir);
+			goto get;
+		}
+		if (p->len == 2 && p->name[1] == '.') {
+			/* we shall not be moved */
+			this = dget(dir->d_parent);
+			goto get;
+		}
+	}
+	this = lookup_one_len(p->name, dir, p->len);
+	if (IS_ERR_OR_NULL(this) || !this->d_inode) {
+		if (IS_ERR(this)) {
+			err = PTR_ERR(this);
+			this = NULL;
+			goto fail;
+		}
+		goto out;
+	}
+
+get:
+	type = ovl_path_type(this);
+	if (OVL_TYPE_ORIGIN(type)) {
+		struct kstat stat;
+		struct path statpath = *path;
+
+		statpath.dentry = this;
+		err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
+		if (err)
+			goto fail;
+
+		/*
+		 * Directory inode is always on overlay st_dev.
+		 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
+		 * of xino bits overflow.
+		 */
+		WARN_ON_ONCE(S_ISDIR(stat.mode) &&
+			     dir->d_sb->s_dev != stat.dev);
+		ino = stat.ino;
+	} else if (xinobits && !OVL_TYPE_UPPER(type)) {
+		ino = ovl_remap_lower_ino(ino, xinobits,
+					  ovl_layer_lower(this)->fsid,
+					  p->name, p->len);
+	}
+
+out:
+	p->ino = ino;
+	dput(this);
+	return err;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
+			    p->name, err);
+	goto out;
+}
+
+static int ovl_fill_plain(struct dir_context *ctx, const char *name,
+			  int namelen, loff_t offset, u64 ino,
+			  unsigned int d_type)
+{
+	struct ovl_cache_entry *p;
+	struct ovl_readdir_data *rdd =
+		container_of(ctx, struct ovl_readdir_data, ctx);
+
+	rdd->count++;
+	p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+	if (p == NULL) {
+		rdd->err = -ENOMEM;
+		return -ENOMEM;
+	}
+	list_add_tail(&p->l_node, rdd->list);
+
+	return 0;
+}
+
+static int ovl_dir_read_impure(struct path *path,  struct list_head *list,
+			       struct rb_root *root)
+{
+	int err;
+	struct path realpath;
+	struct ovl_cache_entry *p, *n;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_plain,
+		.list = list,
+		.root = root,
+	};
+
+	INIT_LIST_HEAD(list);
+	*root = RB_ROOT;
+	ovl_path_upper(path->dentry, &realpath);
+
+	err = ovl_dir_read(&realpath, &rdd);
+	if (err)
+		return err;
+
+	list_for_each_entry_safe(p, n, list, l_node) {
+		if (strcmp(p->name, ".") != 0 &&
+		    strcmp(p->name, "..") != 0) {
+			err = ovl_cache_update_ino(path, p);
+			if (err)
+				return err;
+		}
+		if (p->ino == p->real_ino) {
+			list_del(&p->l_node);
+			kfree(p);
+		} else {
+			struct rb_node **newp = &root->rb_node;
+			struct rb_node *parent = NULL;
+
+			if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
+							      &newp, &parent)))
+				return -EIO;
+
+			rb_link_node(&p->node, parent, newp);
+			rb_insert_color(&p->node, root);
+		}
+	}
+	return 0;
+}
+
+static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
+{
+	int res;
+	struct dentry *dentry = path->dentry;
+	struct ovl_dir_cache *cache;
+
+	cache = ovl_dir_cache(d_inode(dentry));
+	if (cache && ovl_dentry_version_get(dentry) == cache->version)
+		return cache;
+
+	/* Impure cache is not refcounted, free it here */
+	ovl_dir_cache_free(d_inode(dentry));
+	ovl_set_dir_cache(d_inode(dentry), NULL);
+
+	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
+	if (!cache)
+		return ERR_PTR(-ENOMEM);
+
+	res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
+	if (res) {
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+		return ERR_PTR(res);
+	}
+	if (list_empty(&cache->entries)) {
+		/*
+		 * A good opportunity to get rid of an unneeded "impure" flag.
+		 * Removing the "impure" xattr is best effort.
+		 */
+		if (!ovl_want_write(dentry)) {
+			ovl_do_removexattr(ovl_dentry_upper(dentry),
+					   OVL_XATTR_IMPURE);
+			ovl_drop_write(dentry);
+		}
+		ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
+		kfree(cache);
+		return NULL;
+	}
+
+	cache->version = ovl_dentry_version_get(dentry);
+	ovl_set_dir_cache(d_inode(dentry), cache);
+
+	return cache;
+}
+
+struct ovl_readdir_translate {
+	struct dir_context *orig_ctx;
+	struct ovl_dir_cache *cache;
+	struct dir_context ctx;
+	u64 parent_ino;
+	int fsid;
+	int xinobits;
+};
+
+static int ovl_fill_real(struct dir_context *ctx, const char *name,
+			   int namelen, loff_t offset, u64 ino,
+			   unsigned int d_type)
+{
+	struct ovl_readdir_translate *rdt =
+		container_of(ctx, struct ovl_readdir_translate, ctx);
+	struct dir_context *orig_ctx = rdt->orig_ctx;
+
+	if (rdt->parent_ino && strcmp(name, "..") == 0) {
+		ino = rdt->parent_ino;
+	} else if (rdt->cache) {
+		struct ovl_cache_entry *p;
+
+		p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
+		if (p)
+			ino = p->ino;
+	} else if (rdt->xinobits) {
+		ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
+					  name, namelen);
+	}
+
+	return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+}
+
+static bool ovl_is_impure_dir(struct file *file)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct inode *dir = d_inode(file->f_path.dentry);
+
+	/*
+	 * Only upper dir can be impure, but if we are in the middle of
+	 * iterating a lower real dir, dir could be copied up and marked
+	 * impure. We only want the impure cache if we started iterating
+	 * a real upper dir to begin with.
+	 */
+	return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
+
+}
+
+static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
+{
+	int err;
+	struct ovl_dir_file *od = file->private_data;
+	struct dentry *dir = file->f_path.dentry;
+	struct ovl_layer *lower_layer = ovl_layer_lower(dir);
+	struct ovl_readdir_translate rdt = {
+		.ctx.actor = ovl_fill_real,
+		.orig_ctx = ctx,
+		.xinobits = ovl_xino_bits(dir->d_sb),
+	};
+
+	if (rdt.xinobits && lower_layer)
+		rdt.fsid = lower_layer->fsid;
+
+	if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
+		struct kstat stat;
+		struct path statpath = file->f_path;
+
+		statpath.dentry = dir->d_parent;
+		err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
+		if (err)
+			return err;
+
+		WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
+		rdt.parent_ino = stat.ino;
+	}
+
+	if (ovl_is_impure_dir(file)) {
+		rdt.cache = ovl_cache_get_impure(&file->f_path);
+		if (IS_ERR(rdt.cache))
+			return PTR_ERR(rdt.cache);
+	}
+
+	err = iterate_dir(od->realfile, &rdt.ctx);
+	ctx->pos = rdt.ctx.pos;
+
+	return err;
+}
+
+
+static int ovl_iterate(struct file *file, struct dir_context *ctx)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct dentry *dentry = file->f_path.dentry;
+	struct ovl_cache_entry *p;
+	int err;
+
+	if (!ctx->pos)
+		ovl_dir_reset(file);
+
+	if (od->is_real) {
+		/*
+		 * If parent is merge, then need to adjust d_ino for '..', if
+		 * dir is impure then need to adjust d_ino for copied up
+		 * entries.
+		 */
+		if (ovl_xino_bits(dentry->d_sb) ||
+		    (ovl_same_sb(dentry->d_sb) &&
+		     (ovl_is_impure_dir(file) ||
+		      OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
+			return ovl_iterate_real(file, ctx);
+		}
+		return iterate_dir(od->realfile, ctx);
+	}
+
+	if (!od->cache) {
+		struct ovl_dir_cache *cache;
+
+		cache = ovl_cache_get(dentry);
+		if (IS_ERR(cache))
+			return PTR_ERR(cache);
+
+		od->cache = cache;
+		ovl_seek_cursor(od, ctx->pos);
+	}
+
+	while (od->cursor != &od->cache->entries) {
+		p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
+		if (!p->is_whiteout) {
+			if (!p->ino) {
+				err = ovl_cache_update_ino(&file->f_path, p);
+				if (err)
+					return err;
+			}
+			if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
+				break;
+		}
+		od->cursor = p->l_node.next;
+		ctx->pos++;
+	}
+	return 0;
+}
+
+static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t res;
+	struct ovl_dir_file *od = file->private_data;
+
+	inode_lock(file_inode(file));
+	if (!file->f_pos)
+		ovl_dir_reset(file);
+
+	if (od->is_real) {
+		res = vfs_llseek(od->realfile, offset, origin);
+		file->f_pos = od->realfile->f_pos;
+	} else {
+		res = -EINVAL;
+
+		switch (origin) {
+		case SEEK_CUR:
+			offset += file->f_pos;
+			break;
+		case SEEK_SET:
+			break;
+		default:
+			goto out_unlock;
+		}
+		if (offset < 0)
+			goto out_unlock;
+
+		if (offset != file->f_pos) {
+			file->f_pos = offset;
+			if (od->cache)
+				ovl_seek_cursor(od, offset);
+		}
+		res = offset;
+	}
+out_unlock:
+	inode_unlock(file_inode(file));
+
+	return res;
+}
+
+static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
+			 int datasync)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct dentry *dentry = file->f_path.dentry;
+	struct file *realfile = od->realfile;
+
+	/* Nothing to sync for lower */
+	if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
+		return 0;
+
+	/*
+	 * Need to check if we started out being a lower dir, but got copied up
+	 */
+	if (!od->is_upper) {
+		struct inode *inode = file_inode(file);
+
+		realfile = READ_ONCE(od->upperfile);
+		if (!realfile) {
+			struct path upperpath;
+
+			ovl_path_upper(dentry, &upperpath);
+			realfile = ovl_path_open(&upperpath, O_RDONLY);
+
+			inode_lock(inode);
+			if (!od->upperfile) {
+				if (IS_ERR(realfile)) {
+					inode_unlock(inode);
+					return PTR_ERR(realfile);
+				}
+				smp_store_release(&od->upperfile, realfile);
+			} else {
+				/* somebody has beaten us to it */
+				if (!IS_ERR(realfile))
+					fput(realfile);
+				realfile = od->upperfile;
+			}
+			inode_unlock(inode);
+		}
+	}
+
+	return vfs_fsync_range(realfile, start, end, datasync);
+}
+
+static int ovl_dir_release(struct inode *inode, struct file *file)
+{
+	struct ovl_dir_file *od = file->private_data;
+
+	if (od->cache) {
+		inode_lock(inode);
+		ovl_cache_put(od, file->f_path.dentry);
+		inode_unlock(inode);
+	}
+	fput(od->realfile);
+	if (od->upperfile)
+		fput(od->upperfile);
+	kfree(od);
+
+	return 0;
+}
+
+static int ovl_dir_open(struct inode *inode, struct file *file)
+{
+	struct path realpath;
+	struct file *realfile;
+	struct ovl_dir_file *od;
+	enum ovl_path_type type;
+
+	od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
+	if (!od)
+		return -ENOMEM;
+
+	type = ovl_path_real(file->f_path.dentry, &realpath);
+	realfile = ovl_path_open(&realpath, file->f_flags);
+	if (IS_ERR(realfile)) {
+		kfree(od);
+		return PTR_ERR(realfile);
+	}
+	od->realfile = realfile;
+	od->is_real = ovl_dir_is_real(file->f_path.dentry);
+	od->is_upper = OVL_TYPE_UPPER(type);
+	file->private_data = od;
+
+	return 0;
+}
+
+const struct file_operations ovl_dir_operations = {
+	.read		= generic_read_dir,
+	.open		= ovl_dir_open,
+	.iterate	= ovl_iterate,
+	.llseek		= ovl_dir_llseek,
+	.fsync		= ovl_dir_fsync,
+	.release	= ovl_dir_release,
+};
+
+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
+{
+	int err;
+	struct ovl_cache_entry *p, *n;
+	struct rb_root root = RB_ROOT;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	err = ovl_dir_read_merged(dentry, list, &root);
+	revert_creds(old_cred);
+	if (err)
+		return err;
+
+	err = 0;
+
+	list_for_each_entry_safe(p, n, list, l_node) {
+		/*
+		 * Select whiteouts in upperdir, they should
+		 * be cleared when deleting this directory.
+		 */
+		if (p->is_whiteout) {
+			if (p->is_upper)
+				continue;
+			goto del_entry;
+		}
+
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				goto del_entry;
+			if (p->len == 2 && p->name[1] == '.')
+				goto del_entry;
+		}
+		err = -ENOTEMPTY;
+		break;
+
+del_entry:
+		list_del(&p->l_node);
+		kfree(p);
+	}
+
+	return err;
+}
+
+void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
+{
+	struct ovl_cache_entry *p;
+
+	inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
+	list_for_each_entry(p, list, l_node) {
+		struct dentry *dentry;
+
+		if (WARN_ON(!p->is_whiteout || !p->is_upper))
+			continue;
+
+		dentry = lookup_one_len(p->name, upper, p->len);
+		if (IS_ERR(dentry)) {
+			pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
+			       upper->d_name.name, p->len, p->name,
+			       (int) PTR_ERR(dentry));
+			continue;
+		}
+		if (dentry->d_inode)
+			ovl_cleanup(upper->d_inode, dentry);
+		dput(dentry);
+	}
+	inode_unlock(upper->d_inode);
+}
+
+static int ovl_check_d_type(struct dir_context *ctx, const char *name,
+			  int namelen, loff_t offset, u64 ino,
+			  unsigned int d_type)
+{
+	struct ovl_readdir_data *rdd =
+		container_of(ctx, struct ovl_readdir_data, ctx);
+
+	/* Even if d_type is not supported, DT_DIR is returned for . and .. */
+	if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
+		return 0;
+
+	if (d_type != DT_UNKNOWN)
+		rdd->d_type_supported = true;
+
+	return 0;
+}
+
+/*
+ * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
+ * if error is encountered.
+ */
+int ovl_check_d_type_supported(struct path *realpath)
+{
+	int err;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_check_d_type,
+		.d_type_supported = false,
+	};
+
+	err = ovl_dir_read(realpath, &rdd);
+	if (err)
+		return err;
+
+	return rdd.d_type_supported;
+}
+
+static void ovl_workdir_cleanup_recurse(struct path *path, int level)
+{
+	int err;
+	struct inode *dir = path->dentry->d_inode;
+	LIST_HEAD(list);
+	struct rb_root root = RB_ROOT;
+	struct ovl_cache_entry *p;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.dentry = NULL,
+		.list = &list,
+		.root = &root,
+		.is_lowest = false,
+	};
+
+	err = ovl_dir_read(path, &rdd);
+	if (err)
+		goto out;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	list_for_each_entry(p, &list, l_node) {
+		struct dentry *dentry;
+
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				continue;
+			if (p->len == 2 && p->name[1] == '.')
+				continue;
+		}
+		dentry = lookup_one_len(p->name, path->dentry, p->len);
+		if (IS_ERR(dentry))
+			continue;
+		if (dentry->d_inode)
+			ovl_workdir_cleanup(dir, path->mnt, dentry, level);
+		dput(dentry);
+	}
+	inode_unlock(dir);
+out:
+	ovl_cache_free(&list);
+}
+
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+			 struct dentry *dentry, int level)
+{
+	int err;
+
+	if (!d_is_dir(dentry) || level > 1) {
+		ovl_cleanup(dir, dentry);
+		return;
+	}
+
+	err = ovl_do_rmdir(dir, dentry);
+	if (err) {
+		struct path path = { .mnt = mnt, .dentry = dentry };
+
+		inode_unlock(dir);
+		ovl_workdir_cleanup_recurse(&path, level + 1);
+		inode_lock_nested(dir, I_MUTEX_PARENT);
+		ovl_cleanup(dir, dentry);
+	}
+}
+
+int ovl_indexdir_cleanup(struct ovl_fs *ofs)
+{
+	int err;
+	struct dentry *indexdir = ofs->indexdir;
+	struct dentry *index = NULL;
+	struct inode *dir = indexdir->d_inode;
+	struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir };
+	LIST_HEAD(list);
+	struct rb_root root = RB_ROOT;
+	struct ovl_cache_entry *p;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.dentry = NULL,
+		.list = &list,
+		.root = &root,
+		.is_lowest = false,
+	};
+
+	err = ovl_dir_read(&path, &rdd);
+	if (err)
+		goto out;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	list_for_each_entry(p, &list, l_node) {
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				continue;
+			if (p->len == 2 && p->name[1] == '.')
+				continue;
+		}
+		index = lookup_one_len(p->name, indexdir, p->len);
+		if (IS_ERR(index)) {
+			err = PTR_ERR(index);
+			index = NULL;
+			break;
+		}
+		err = ovl_verify_index(ofs, index);
+		if (!err) {
+			goto next;
+		} else if (err == -ESTALE) {
+			/* Cleanup stale index entries */
+			err = ovl_cleanup(dir, index);
+		} else if (err != -ENOENT) {
+			/*
+			 * Abort mount to avoid corrupting the index if
+			 * an incompatible index entry was found or on out
+			 * of memory.
+			 */
+			break;
+		} else if (ofs->config.nfs_export) {
+			/*
+			 * Whiteout orphan index to block future open by
+			 * handle after overlay nlink dropped to zero.
+			 */
+			err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+		} else {
+			/* Cleanup orphan index entries */
+			err = ovl_cleanup(dir, index);
+		}
+
+		if (err)
+			break;
+
+next:
+		dput(index);
+		index = NULL;
+	}
+	dput(index);
+	inode_unlock(dir);
+out:
+	ovl_cache_free(&list);
+	if (err)
+		pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
+	return err;
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
new file mode 100644
index 000000000..1a7a1e298
--- /dev/null
+++ b/fs/overlayfs/super.c
@@ -0,0 +1,1757 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <uapi/linux/magic.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/module.h>
+#include <linux/statfs.h>
+#include <linux/seq_file.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/exportfs.h>
+#include "overlayfs.h"
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Overlay filesystem");
+MODULE_LICENSE("GPL");
+
+
+struct ovl_dir_cache;
+
+#define OVL_MAX_STACK 500
+
+static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
+module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
+MODULE_PARM_DESC(ovl_redirect_dir_def,
+		 "Default to on or off for the redirect_dir feature");
+
+static bool ovl_redirect_always_follow =
+	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
+module_param_named(redirect_always_follow, ovl_redirect_always_follow,
+		   bool, 0644);
+MODULE_PARM_DESC(ovl_redirect_always_follow,
+		 "Follow redirects even if redirect_dir feature is turned off");
+
+static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
+module_param_named(index, ovl_index_def, bool, 0644);
+MODULE_PARM_DESC(ovl_index_def,
+		 "Default to on or off for the inodes index feature");
+
+static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
+module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
+MODULE_PARM_DESC(ovl_nfs_export_def,
+		 "Default to on or off for the NFS export feature");
+
+static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
+module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
+MODULE_PARM_DESC(ovl_xino_auto_def,
+		 "Auto enable xino feature");
+
+static void ovl_entry_stack_free(struct ovl_entry *oe)
+{
+	unsigned int i;
+
+	for (i = 0; i < oe->numlower; i++)
+		dput(oe->lowerstack[i].dentry);
+}
+
+static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
+module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
+MODULE_PARM_DESC(ovl_metacopy_def,
+		 "Default to on or off for the metadata only copy up feature");
+
+static void ovl_dentry_release(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe) {
+		ovl_entry_stack_free(oe);
+		kfree_rcu(oe, rcu);
+	}
+}
+
+static struct dentry *ovl_d_real(struct dentry *dentry,
+				 const struct inode *inode)
+{
+	struct dentry *real = NULL, *lower;
+
+	/* It's an overlay file */
+	if (inode && d_inode(dentry) == inode)
+		return dentry;
+
+	if (!d_is_reg(dentry)) {
+		if (!inode || inode == d_inode(dentry))
+			return dentry;
+		goto bug;
+	}
+
+	real = ovl_dentry_upper(dentry);
+	if (real && (inode == d_inode(real)))
+		return real;
+
+	if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
+		return real;
+
+	lower = ovl_dentry_lowerdata(dentry);
+	if (!lower)
+		goto bug;
+	real = lower;
+
+	/* Handle recursion */
+	real = d_real(real, inode);
+
+	if (!inode || inode == d_inode(real))
+		return real;
+bug:
+	WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
+	     __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
+	     inode ? inode->i_ino : 0, real,
+	     real && d_inode(real) ? d_inode(real)->i_ino : 0);
+	return dentry;
+}
+
+static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	unsigned int i;
+	int ret = 1;
+
+	for (i = 0; i < oe->numlower; i++) {
+		struct dentry *d = oe->lowerstack[i].dentry;
+
+		if (d->d_flags & DCACHE_OP_REVALIDATE) {
+			ret = d->d_op->d_revalidate(d, flags);
+			if (ret < 0)
+				return ret;
+			if (!ret) {
+				if (!(flags & LOOKUP_RCU))
+					d_invalidate(d);
+				return -ESTALE;
+			}
+		}
+	}
+	return 1;
+}
+
+static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	unsigned int i;
+	int ret = 1;
+
+	for (i = 0; i < oe->numlower; i++) {
+		struct dentry *d = oe->lowerstack[i].dentry;
+
+		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
+			ret = d->d_op->d_weak_revalidate(d, flags);
+			if (ret <= 0)
+				break;
+		}
+	}
+	return ret;
+}
+
+static const struct dentry_operations ovl_dentry_operations = {
+	.d_release = ovl_dentry_release,
+	.d_real = ovl_d_real,
+};
+
+static const struct dentry_operations ovl_reval_dentry_operations = {
+	.d_release = ovl_dentry_release,
+	.d_real = ovl_d_real,
+	.d_revalidate = ovl_dentry_revalidate,
+	.d_weak_revalidate = ovl_dentry_weak_revalidate,
+};
+
+static struct kmem_cache *ovl_inode_cachep;
+
+static struct inode *ovl_alloc_inode(struct super_block *sb)
+{
+	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
+
+	if (!oi)
+		return NULL;
+
+	oi->cache = NULL;
+	oi->redirect = NULL;
+	oi->version = 0;
+	oi->flags = 0;
+	oi->__upperdentry = NULL;
+	oi->lower = NULL;
+	oi->lowerdata = NULL;
+	mutex_init(&oi->lock);
+
+	return &oi->vfs_inode;
+}
+
+static void ovl_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
+}
+
+static void ovl_destroy_inode(struct inode *inode)
+{
+	struct ovl_inode *oi = OVL_I(inode);
+
+	dput(oi->__upperdentry);
+	iput(oi->lower);
+	if (S_ISDIR(inode->i_mode))
+		ovl_dir_cache_free(inode);
+	else
+		iput(oi->lowerdata);
+	kfree(oi->redirect);
+	mutex_destroy(&oi->lock);
+
+	call_rcu(&inode->i_rcu, ovl_i_callback);
+}
+
+static void ovl_free_fs(struct ovl_fs *ofs)
+{
+	unsigned i;
+
+	iput(ofs->workbasedir_trap);
+	iput(ofs->indexdir_trap);
+	iput(ofs->workdir_trap);
+	iput(ofs->upperdir_trap);
+	dput(ofs->indexdir);
+	dput(ofs->workdir);
+	if (ofs->workdir_locked)
+		ovl_inuse_unlock(ofs->workbasedir);
+	dput(ofs->workbasedir);
+	if (ofs->upperdir_locked)
+		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
+	mntput(ofs->upper_mnt);
+	for (i = 0; i < ofs->numlower; i++) {
+		iput(ofs->lower_layers[i].trap);
+		mntput(ofs->lower_layers[i].mnt);
+	}
+	for (i = 0; i < ofs->numlowerfs; i++)
+		free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
+	kfree(ofs->lower_layers);
+	kfree(ofs->lower_fs);
+
+	kfree(ofs->config.lowerdir);
+	kfree(ofs->config.upperdir);
+	kfree(ofs->config.workdir);
+	kfree(ofs->config.redirect_mode);
+	if (ofs->creator_cred)
+		put_cred(ofs->creator_cred);
+	kfree(ofs);
+}
+
+static void ovl_put_super(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	ovl_free_fs(ofs);
+}
+
+/* Sync real dirty inodes in upper filesystem (if it exists) */
+static int ovl_sync_fs(struct super_block *sb, int wait)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct super_block *upper_sb;
+	int ret;
+
+	if (!ofs->upper_mnt)
+		return 0;
+
+	/*
+	 * If this is a sync(2) call or an emergency sync, all the super blocks
+	 * will be iterated, including upper_sb, so no need to do anything.
+	 *
+	 * If this is a syncfs(2) call, then we do need to call
+	 * sync_filesystem() on upper_sb, but enough if we do it when being
+	 * called with wait == 1.
+	 */
+	if (!wait)
+		return 0;
+
+	upper_sb = ofs->upper_mnt->mnt_sb;
+
+	down_read(&upper_sb->s_umount);
+	ret = sync_filesystem(upper_sb);
+	up_read(&upper_sb->s_umount);
+
+	return ret;
+}
+
+/**
+ * ovl_statfs
+ * @sb: The overlayfs super block
+ * @buf: The struct kstatfs to fill in with stats
+ *
+ * Get the filesystem statistics.  As writes always target the upper layer
+ * filesystem pass the statfs to the upper filesystem (if it exists)
+ */
+static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct dentry *root_dentry = dentry->d_sb->s_root;
+	struct path path;
+	int err;
+
+	ovl_path_real(root_dentry, &path);
+
+	err = vfs_statfs(&path, buf);
+	if (!err) {
+		buf->f_namelen = ofs->namelen;
+		buf->f_type = OVERLAYFS_SUPER_MAGIC;
+	}
+
+	return err;
+}
+
+/* Will this overlay be forced to mount/remount ro? */
+static bool ovl_force_readonly(struct ovl_fs *ofs)
+{
+	return (!ofs->upper_mnt || !ofs->workdir);
+}
+
+static const char *ovl_redirect_mode_def(void)
+{
+	return ovl_redirect_dir_def ? "on" : "off";
+}
+
+enum {
+	OVL_XINO_OFF,
+	OVL_XINO_AUTO,
+	OVL_XINO_ON,
+};
+
+static const char * const ovl_xino_str[] = {
+	"off",
+	"auto",
+	"on",
+};
+
+static inline int ovl_xino_def(void)
+{
+	return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
+}
+
+/**
+ * ovl_show_options
+ *
+ * Prints the mount options for a given superblock.
+ * Returns zero; does not fail.
+ */
+static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
+	if (ofs->config.upperdir) {
+		seq_show_option(m, "upperdir", ofs->config.upperdir);
+		seq_show_option(m, "workdir", ofs->config.workdir);
+	}
+	if (ofs->config.default_permissions)
+		seq_puts(m, ",default_permissions");
+	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
+		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
+	if (ofs->config.index != ovl_index_def)
+		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
+	if (ofs->config.nfs_export != ovl_nfs_export_def)
+		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
+						"on" : "off");
+	if (ofs->config.xino != ovl_xino_def())
+		seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
+	if (ofs->config.metacopy != ovl_metacopy_def)
+		seq_printf(m, ",metacopy=%s",
+			   ofs->config.metacopy ? "on" : "off");
+	return 0;
+}
+
+static int ovl_remount(struct super_block *sb, int *flags, char *data)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
+		return -EROFS;
+
+	return 0;
+}
+
+static const struct super_operations ovl_super_operations = {
+	.alloc_inode	= ovl_alloc_inode,
+	.destroy_inode	= ovl_destroy_inode,
+	.drop_inode	= generic_delete_inode,
+	.put_super	= ovl_put_super,
+	.sync_fs	= ovl_sync_fs,
+	.statfs		= ovl_statfs,
+	.show_options	= ovl_show_options,
+	.remount_fs	= ovl_remount,
+};
+
+enum {
+	OPT_LOWERDIR,
+	OPT_UPPERDIR,
+	OPT_WORKDIR,
+	OPT_DEFAULT_PERMISSIONS,
+	OPT_REDIRECT_DIR,
+	OPT_INDEX_ON,
+	OPT_INDEX_OFF,
+	OPT_NFS_EXPORT_ON,
+	OPT_NFS_EXPORT_OFF,
+	OPT_XINO_ON,
+	OPT_XINO_OFF,
+	OPT_XINO_AUTO,
+	OPT_METACOPY_ON,
+	OPT_METACOPY_OFF,
+	OPT_ERR,
+};
+
+static const match_table_t ovl_tokens = {
+	{OPT_LOWERDIR,			"lowerdir=%s"},
+	{OPT_UPPERDIR,			"upperdir=%s"},
+	{OPT_WORKDIR,			"workdir=%s"},
+	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
+	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
+	{OPT_INDEX_ON,			"index=on"},
+	{OPT_INDEX_OFF,			"index=off"},
+	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
+	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
+	{OPT_XINO_ON,			"xino=on"},
+	{OPT_XINO_OFF,			"xino=off"},
+	{OPT_XINO_AUTO,			"xino=auto"},
+	{OPT_METACOPY_ON,		"metacopy=on"},
+	{OPT_METACOPY_OFF,		"metacopy=off"},
+	{OPT_ERR,			NULL}
+};
+
+static char *ovl_next_opt(char **s)
+{
+	char *sbegin = *s;
+	char *p;
+
+	if (sbegin == NULL)
+		return NULL;
+
+	for (p = sbegin; *p; p++) {
+		if (*p == '\\') {
+			p++;
+			if (!*p)
+				break;
+		} else if (*p == ',') {
+			*p = '\0';
+			*s = p + 1;
+			return sbegin;
+		}
+	}
+	*s = NULL;
+	return sbegin;
+}
+
+static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
+{
+	if (strcmp(mode, "on") == 0) {
+		config->redirect_dir = true;
+		/*
+		 * Does not make sense to have redirect creation without
+		 * redirect following.
+		 */
+		config->redirect_follow = true;
+	} else if (strcmp(mode, "follow") == 0) {
+		config->redirect_follow = true;
+	} else if (strcmp(mode, "off") == 0) {
+		if (ovl_redirect_always_follow)
+			config->redirect_follow = true;
+	} else if (strcmp(mode, "nofollow") != 0) {
+		pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
+		       mode);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ovl_parse_opt(char *opt, struct ovl_config *config)
+{
+	char *p;
+	int err;
+	bool metacopy_opt = false, redirect_opt = false;
+
+	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
+	if (!config->redirect_mode)
+		return -ENOMEM;
+
+	while ((p = ovl_next_opt(&opt)) != NULL) {
+		int token;
+		substring_t args[MAX_OPT_ARGS];
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, ovl_tokens, args);
+		switch (token) {
+		case OPT_UPPERDIR:
+			kfree(config->upperdir);
+			config->upperdir = match_strdup(&args[0]);
+			if (!config->upperdir)
+				return -ENOMEM;
+			break;
+
+		case OPT_LOWERDIR:
+			kfree(config->lowerdir);
+			config->lowerdir = match_strdup(&args[0]);
+			if (!config->lowerdir)
+				return -ENOMEM;
+			break;
+
+		case OPT_WORKDIR:
+			kfree(config->workdir);
+			config->workdir = match_strdup(&args[0]);
+			if (!config->workdir)
+				return -ENOMEM;
+			break;
+
+		case OPT_DEFAULT_PERMISSIONS:
+			config->default_permissions = true;
+			break;
+
+		case OPT_REDIRECT_DIR:
+			kfree(config->redirect_mode);
+			config->redirect_mode = match_strdup(&args[0]);
+			if (!config->redirect_mode)
+				return -ENOMEM;
+			redirect_opt = true;
+			break;
+
+		case OPT_INDEX_ON:
+			config->index = true;
+			break;
+
+		case OPT_INDEX_OFF:
+			config->index = false;
+			break;
+
+		case OPT_NFS_EXPORT_ON:
+			config->nfs_export = true;
+			break;
+
+		case OPT_NFS_EXPORT_OFF:
+			config->nfs_export = false;
+			break;
+
+		case OPT_XINO_ON:
+			config->xino = OVL_XINO_ON;
+			break;
+
+		case OPT_XINO_OFF:
+			config->xino = OVL_XINO_OFF;
+			break;
+
+		case OPT_XINO_AUTO:
+			config->xino = OVL_XINO_AUTO;
+			break;
+
+		case OPT_METACOPY_ON:
+			config->metacopy = true;
+			metacopy_opt = true;
+			break;
+
+		case OPT_METACOPY_OFF:
+			config->metacopy = false;
+			break;
+
+		default:
+			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
+			return -EINVAL;
+		}
+	}
+
+	/* Workdir is useless in non-upper mount */
+	if (!config->upperdir && config->workdir) {
+		pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+			config->workdir);
+		kfree(config->workdir);
+		config->workdir = NULL;
+	}
+
+	err = ovl_parse_redirect_mode(config, config->redirect_mode);
+	if (err)
+		return err;
+
+	/*
+	 * This is to make the logic below simpler.  It doesn't make any other
+	 * difference, since config->redirect_dir is only used for upper.
+	 */
+	if (!config->upperdir && config->redirect_follow)
+		config->redirect_dir = true;
+
+	/* Resolve metacopy -> redirect_dir dependency */
+	if (config->metacopy && !config->redirect_dir) {
+		if (metacopy_opt && redirect_opt) {
+			pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
+			       config->redirect_mode);
+			return -EINVAL;
+		}
+		if (redirect_opt) {
+			/*
+			 * There was an explicit redirect_dir=... that resulted
+			 * in this conflict.
+			 */
+			pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
+				config->redirect_mode);
+			config->metacopy = false;
+		} else {
+			/* Automatically enable redirect otherwise. */
+			config->redirect_follow = config->redirect_dir = true;
+		}
+	}
+
+	return 0;
+}
+
+#define OVL_WORKDIR_NAME "work"
+#define OVL_INDEXDIR_NAME "index"
+
+static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
+					 const char *name, bool persist)
+{
+	struct inode *dir =  ofs->workbasedir->d_inode;
+	struct vfsmount *mnt = ofs->upper_mnt;
+	struct dentry *work;
+	int err;
+	bool retried = false;
+	bool locked = false;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	locked = true;
+
+retry:
+	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
+
+	if (!IS_ERR(work)) {
+		struct iattr attr = {
+			.ia_valid = ATTR_MODE,
+			.ia_mode = S_IFDIR | 0,
+		};
+
+		if (work->d_inode) {
+			err = -EEXIST;
+			if (retried)
+				goto out_dput;
+
+			if (persist)
+				goto out_unlock;
+
+			retried = true;
+			ovl_workdir_cleanup(dir, mnt, work, 0);
+			dput(work);
+			goto retry;
+		}
+
+		err = ovl_mkdir_real(dir, &work, attr.ia_mode);
+		if (err)
+			goto out_dput;
+
+		/* Weird filesystem returning with hashed negative (kernfs)? */
+		err = -EINVAL;
+		if (d_really_is_negative(work))
+			goto out_dput;
+
+		/*
+		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
+		 *
+		 * a) success (there was a POSIX ACL xattr and was removed)
+		 * b) -ENODATA (there was no POSIX ACL xattr)
+		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
+		 *
+		 * There are various other error values that could effectively
+		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
+		 * if the xattr name is too long), but the set of filesystems
+		 * allowed as upper are limited to "normal" ones, where checking
+		 * for the above two errors is sufficient.
+		 */
+		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
+		if (err && err != -ENODATA && err != -EOPNOTSUPP)
+			goto out_dput;
+
+		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
+		if (err && err != -ENODATA && err != -EOPNOTSUPP)
+			goto out_dput;
+
+		/* Clear any inherited mode bits */
+		inode_lock(work->d_inode);
+		err = notify_change(work, &attr, NULL);
+		inode_unlock(work->d_inode);
+		if (err)
+			goto out_dput;
+	} else {
+		err = PTR_ERR(work);
+		goto out_err;
+	}
+out_unlock:
+	if (locked)
+		inode_unlock(dir);
+
+	return work;
+
+out_dput:
+	dput(work);
+out_err:
+	pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+		ofs->config.workdir, name, -err);
+	work = NULL;
+	goto out_unlock;
+}
+
+static void ovl_unescape(char *s)
+{
+	char *d = s;
+
+	for (;; s++, d++) {
+		if (*s == '\\')
+			s++;
+		*d = *s;
+		if (!*s)
+			break;
+	}
+}
+
+static int ovl_mount_dir_noesc(const char *name, struct path *path)
+{
+	int err = -EINVAL;
+
+	if (!*name) {
+		pr_err("overlayfs: empty lowerdir\n");
+		goto out;
+	}
+	err = kern_path(name, LOOKUP_FOLLOW, path);
+	if (err) {
+		pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
+		goto out;
+	}
+	err = -EINVAL;
+	if (ovl_dentry_weird(path->dentry)) {
+		pr_err("overlayfs: filesystem on '%s' not supported\n", name);
+		goto out_put;
+	}
+	if (!d_is_dir(path->dentry)) {
+		pr_err("overlayfs: '%s' not a directory\n", name);
+		goto out_put;
+	}
+	return 0;
+
+out_put:
+	path_put_init(path);
+out:
+	return err;
+}
+
+static int ovl_mount_dir(const char *name, struct path *path)
+{
+	int err = -ENOMEM;
+	char *tmp = kstrdup(name, GFP_KERNEL);
+
+	if (tmp) {
+		ovl_unescape(tmp);
+		err = ovl_mount_dir_noesc(tmp, path);
+
+		if (!err)
+			if (ovl_dentry_remote(path->dentry)) {
+				pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
+				       tmp);
+				path_put_init(path);
+				err = -EINVAL;
+			}
+		kfree(tmp);
+	}
+	return err;
+}
+
+static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
+			     const char *name)
+{
+	struct kstatfs statfs;
+	int err = vfs_statfs(path, &statfs);
+
+	if (err)
+		pr_err("overlayfs: statfs failed on '%s'\n", name);
+	else
+		ofs->namelen = max(ofs->namelen, statfs.f_namelen);
+
+	return err;
+}
+
+static int ovl_lower_dir(const char *name, struct path *path,
+			 struct ovl_fs *ofs, int *stack_depth, bool *remote)
+{
+	int fh_type;
+	int err;
+
+	err = ovl_mount_dir_noesc(name, path);
+	if (err)
+		goto out;
+
+	err = ovl_check_namelen(path, ofs, name);
+	if (err)
+		goto out_put;
+
+	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
+
+	if (ovl_dentry_remote(path->dentry))
+		*remote = true;
+
+	/*
+	 * The inodes index feature and NFS export need to encode and decode
+	 * file handles, so they require that all layers support them.
+	 */
+	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
+	if ((ofs->config.nfs_export ||
+	     (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
+		ofs->config.index = false;
+		ofs->config.nfs_export = false;
+		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
+			name);
+	}
+
+	/* Check if lower fs has 32bit inode numbers */
+	if (fh_type != FILEID_INO32_GEN)
+		ofs->xino_bits = 0;
+
+	return 0;
+
+out_put:
+	path_put_init(path);
+out:
+	return err;
+}
+
+/* Workdir should not be subdir of upperdir and vice versa */
+static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
+{
+	bool ok = false;
+
+	if (workdir != upperdir) {
+		ok = (lock_rename(workdir, upperdir) == NULL);
+		unlock_rename(workdir, upperdir);
+	}
+	return ok;
+}
+
+static unsigned int ovl_split_lowerdirs(char *str)
+{
+	unsigned int ctr = 1;
+	char *s, *d;
+
+	for (s = d = str;; s++, d++) {
+		if (*s == '\\') {
+			s++;
+		} else if (*s == ':') {
+			*d = '\0';
+			ctr++;
+			continue;
+		}
+		*d = *s;
+		if (!*s)
+			break;
+	}
+	return ctr;
+}
+
+static int __maybe_unused
+ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, void *buffer, size_t size)
+{
+	return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
+}
+
+static int __maybe_unused
+ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *value,
+			size_t size, int flags)
+{
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct inode *realinode = ovl_inode_real(inode);
+	struct posix_acl *acl = NULL;
+	int err;
+
+	/* Check that everything is OK before copy-up */
+	if (value) {
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+	}
+	err = -EOPNOTSUPP;
+	if (!IS_POSIXACL(d_inode(workdir)))
+		goto out_acl_release;
+	if (!realinode->i_op->set_acl)
+		goto out_acl_release;
+	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
+		err = acl ? -EACCES : 0;
+		goto out_acl_release;
+	}
+	err = -EPERM;
+	if (!inode_owner_or_capable(inode))
+		goto out_acl_release;
+
+	posix_acl_release(acl);
+
+	/*
+	 * Check if sgid bit needs to be cleared (actual setacl operation will
+	 * be done with mounter's capabilities and so that won't do it for us).
+	 */
+	if (unlikely(inode->i_mode & S_ISGID) &&
+	    handler->flags == ACL_TYPE_ACCESS &&
+	    !in_group_p(inode->i_gid) &&
+	    !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
+		struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
+
+		err = ovl_setattr(dentry, &iattr);
+		if (err)
+			return err;
+	}
+
+	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
+	if (!err)
+		ovl_copyattr(ovl_inode_real(inode), inode);
+
+	return err;
+
+out_acl_release:
+	posix_acl_release(acl);
+	return err;
+}
+
+static int ovl_own_xattr_get(const struct xattr_handler *handler,
+			     struct dentry *dentry, struct inode *inode,
+			     const char *name, void *buffer, size_t size)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ovl_own_xattr_set(const struct xattr_handler *handler,
+			     struct dentry *dentry, struct inode *inode,
+			     const char *name, const void *value,
+			     size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ovl_other_xattr_get(const struct xattr_handler *handler,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, void *buffer, size_t size)
+{
+	return ovl_xattr_get(dentry, inode, name, buffer, size);
+}
+
+static int ovl_other_xattr_set(const struct xattr_handler *handler,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, const void *value,
+			       size_t size, int flags)
+{
+	return ovl_xattr_set(dentry, inode, name, value, size, flags);
+}
+
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_access_xattr_handler = {
+	.name = XATTR_NAME_POSIX_ACL_ACCESS,
+	.flags = ACL_TYPE_ACCESS,
+	.get = ovl_posix_acl_xattr_get,
+	.set = ovl_posix_acl_xattr_set,
+};
+
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_default_xattr_handler = {
+	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
+	.flags = ACL_TYPE_DEFAULT,
+	.get = ovl_posix_acl_xattr_get,
+	.set = ovl_posix_acl_xattr_set,
+};
+
+static const struct xattr_handler ovl_own_xattr_handler = {
+	.prefix	= OVL_XATTR_PREFIX,
+	.get = ovl_own_xattr_get,
+	.set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_other_xattr_handler = {
+	.prefix	= "", /* catch all */
+	.get = ovl_other_xattr_get,
+	.set = ovl_other_xattr_set,
+};
+
+static const struct xattr_handler *ovl_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
+	&ovl_posix_acl_access_xattr_handler,
+	&ovl_posix_acl_default_xattr_handler,
+#endif
+	&ovl_own_xattr_handler,
+	&ovl_other_xattr_handler,
+	NULL
+};
+
+static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
+			  struct inode **ptrap, const char *name)
+{
+	struct inode *trap;
+	int err;
+
+	trap = ovl_get_trap_inode(sb, dir);
+	err = PTR_ERR_OR_ZERO(trap);
+	if (err) {
+		if (err == -ELOOP)
+			pr_err("overlayfs: conflicting %s path\n", name);
+		return err;
+	}
+
+	*ptrap = trap;
+	return 0;
+}
+
+/*
+ * Determine how we treat concurrent use of upperdir/workdir based on the
+ * index feature. This is papering over mount leaks of container runtimes,
+ * for example, an old overlay mount is leaked and now its upperdir is
+ * attempted to be used as a lower layer in a new overlay mount.
+ */
+static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
+{
+	if (ofs->config.index) {
+		pr_err("overlayfs: %s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
+		       name);
+		return -EBUSY;
+	} else {
+		pr_warn("overlayfs: %s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
+			name);
+		return 0;
+	}
+}
+
+static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
+			 struct path *upperpath)
+{
+	struct vfsmount *upper_mnt;
+	int err;
+
+	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
+	if (err)
+		goto out;
+
+	/* Upper fs should not be r/o */
+	if (sb_rdonly(upperpath->mnt->mnt_sb)) {
+		pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
+	if (err)
+		goto out;
+
+	err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
+			     "upperdir");
+	if (err)
+		goto out;
+
+	upper_mnt = clone_private_mount(upperpath);
+	err = PTR_ERR(upper_mnt);
+	if (IS_ERR(upper_mnt)) {
+		pr_err("overlayfs: failed to clone upperpath\n");
+		goto out;
+	}
+
+	/* Don't inherit atime flags */
+	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
+	ofs->upper_mnt = upper_mnt;
+
+	if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
+		ofs->upperdir_locked = true;
+	} else {
+		err = ovl_report_in_use(ofs, "upperdir");
+		if (err)
+			goto out;
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
+			    struct path *workpath)
+{
+	struct vfsmount *mnt = ofs->upper_mnt;
+	struct dentry *temp;
+	int fh_type;
+	int err;
+
+	err = mnt_want_write(mnt);
+	if (err)
+		return err;
+
+	ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
+	if (!ofs->workdir)
+		goto out;
+
+	err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
+	if (err)
+		goto out;
+
+	/*
+	 * Upper should support d_type, else whiteouts are visible.  Given
+	 * workdir and upper are on same fs, we can do iterate_dir() on
+	 * workdir. This check requires successful creation of workdir in
+	 * previous step.
+	 */
+	err = ovl_check_d_type_supported(workpath);
+	if (err < 0)
+		goto out;
+
+	/*
+	 * We allowed this configuration and don't want to break users over
+	 * kernel upgrade. So warn instead of erroring out.
+	 */
+	if (!err)
+		pr_warn("overlayfs: upper fs needs to support d_type.\n");
+
+	/* Check if upper/work fs supports O_TMPFILE */
+	temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
+	ofs->tmpfile = !IS_ERR(temp);
+	if (ofs->tmpfile)
+		dput(temp);
+	else
+		pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+
+	/*
+	 * Check if upper/work fs supports trusted.overlay.* xattr
+	 */
+	err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
+	if (err) {
+		ofs->noxattr = true;
+		ofs->config.index = false;
+		ofs->config.metacopy = false;
+		pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
+		err = 0;
+	} else {
+		vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
+	}
+
+	/* Check if upper/work fs supports file handles */
+	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
+	if (ofs->config.index && !fh_type) {
+		ofs->config.index = false;
+		pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+	}
+
+	/* Check if upper fs has 32bit inode numbers */
+	if (fh_type != FILEID_INO32_GEN)
+		ofs->xino_bits = 0;
+
+	/* NFS export of r/w mount depends on index */
+	if (ofs->config.nfs_export && !ofs->config.index) {
+		pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
+		ofs->config.nfs_export = false;
+	}
+out:
+	mnt_drop_write(mnt);
+	return err;
+}
+
+static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
+			   struct path *upperpath)
+{
+	int err;
+	struct path workpath = { };
+
+	err = ovl_mount_dir(ofs->config.workdir, &workpath);
+	if (err)
+		goto out;
+
+	err = -EINVAL;
+	if (upperpath->mnt != workpath.mnt) {
+		pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
+		goto out;
+	}
+	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
+		pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
+		goto out;
+	}
+
+	ofs->workbasedir = dget(workpath.dentry);
+
+	if (ovl_inuse_trylock(ofs->workbasedir)) {
+		ofs->workdir_locked = true;
+	} else {
+		err = ovl_report_in_use(ofs, "workdir");
+		if (err)
+			goto out;
+	}
+
+	err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
+			     "workdir");
+	if (err)
+		goto out;
+
+	err = ovl_make_workdir(sb, ofs, &workpath);
+
+out:
+	path_put(&workpath);
+
+	return err;
+}
+
+static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
+			    struct ovl_entry *oe, struct path *upperpath)
+{
+	struct vfsmount *mnt = ofs->upper_mnt;
+	int err;
+
+	err = mnt_want_write(mnt);
+	if (err)
+		return err;
+
+	/* Verify lower root is upper root origin */
+	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
+				true);
+	if (err) {
+		pr_err("overlayfs: failed to verify upper root origin\n");
+		goto out;
+	}
+
+	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
+	if (ofs->indexdir) {
+		err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
+				     "indexdir");
+		if (err)
+			goto out;
+
+		/*
+		 * Verify upper root is exclusively associated with index dir.
+		 * Older kernels stored upper fh in "trusted.overlay.origin"
+		 * xattr. If that xattr exists, verify that it is a match to
+		 * upper dir file handle. In any case, verify or set xattr
+		 * "trusted.overlay.upper" to indicate that index may have
+		 * directory entries.
+		 */
+		if (ovl_check_origin_xattr(ofs->indexdir)) {
+			err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
+						upperpath->dentry, true, false);
+			if (err)
+				pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
+		}
+		err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
+		if (err)
+			pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
+
+		/* Cleanup bad/stale/orphan index entries */
+		if (!err)
+			err = ovl_indexdir_cleanup(ofs);
+	}
+	if (err || !ofs->indexdir)
+		pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+
+out:
+	mnt_drop_write(mnt);
+	return err;
+}
+
+/* Get a unique fsid for the layer */
+static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
+{
+	unsigned int i;
+	dev_t dev;
+	int err;
+
+	/* fsid 0 is reserved for upper fs even with non upper overlay */
+	if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
+		return 0;
+
+	for (i = 0; i < ofs->numlowerfs; i++) {
+		if (ofs->lower_fs[i].sb == sb)
+			return i + 1;
+	}
+
+	err = get_anon_bdev(&dev);
+	if (err) {
+		pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+		return err;
+	}
+
+	ofs->lower_fs[ofs->numlowerfs].sb = sb;
+	ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
+	ofs->numlowerfs++;
+
+	return ofs->numlowerfs;
+}
+
+static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
+				struct path *stack, unsigned int numlower)
+{
+	int err;
+	unsigned int i;
+
+	err = -ENOMEM;
+	ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
+				    GFP_KERNEL);
+	if (ofs->lower_layers == NULL)
+		goto out;
+
+	ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
+				GFP_KERNEL);
+	if (ofs->lower_fs == NULL)
+		goto out;
+
+	for (i = 0; i < numlower; i++) {
+		struct vfsmount *mnt;
+		struct inode *trap;
+		int fsid;
+
+		err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
+		if (err < 0)
+			goto out;
+
+		/*
+		 * Check if lower root conflicts with this overlay layers before
+		 * checking if it is in-use as upperdir/workdir of "another"
+		 * mount, because we do not bother to check in ovl_is_inuse() if
+		 * the upperdir/workdir is in fact in-use by our
+		 * upperdir/workdir.
+		 */
+		err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
+		if (err)
+			goto out;
+
+		if (ovl_is_inuse(stack[i].dentry)) {
+			err = ovl_report_in_use(ofs, "lowerdir");
+			if (err) {
+				iput(trap);
+				goto out;
+			}
+		}
+
+		mnt = clone_private_mount(&stack[i]);
+		err = PTR_ERR(mnt);
+		if (IS_ERR(mnt)) {
+			pr_err("overlayfs: failed to clone lowerpath\n");
+			iput(trap);
+			goto out;
+		}
+
+		/*
+		 * Make lower layers R/O.  That way fchmod/fchown on lower file
+		 * will fail instead of modifying lower fs.
+		 */
+		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
+
+		ofs->lower_layers[ofs->numlower].trap = trap;
+		ofs->lower_layers[ofs->numlower].mnt = mnt;
+		ofs->lower_layers[ofs->numlower].idx = i + 1;
+		ofs->lower_layers[ofs->numlower].fsid = fsid;
+		if (fsid) {
+			ofs->lower_layers[ofs->numlower].fs =
+				&ofs->lower_fs[fsid - 1];
+		}
+		ofs->numlower++;
+	}
+
+	/*
+	 * When all layers on same fs, overlay can use real inode numbers.
+	 * With mount option "xino=on", mounter declares that there are enough
+	 * free high bits in underlying fs to hold the unique fsid.
+	 * If overlayfs does encounter underlying inodes using the high xino
+	 * bits reserved for fsid, it emits a warning and uses the original
+	 * inode number.
+	 */
+	if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
+		ofs->xino_bits = 0;
+		ofs->config.xino = OVL_XINO_OFF;
+	} else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
+		/*
+		 * This is a roundup of number of bits needed for numlowerfs+1
+		 * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
+		 * upper fs even with non upper overlay.
+		 */
+		BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
+		ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
+	}
+
+	if (ofs->xino_bits) {
+		pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
+			ofs->xino_bits);
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
+					    struct ovl_fs *ofs)
+{
+	int err;
+	char *lowertmp, *lower;
+	struct path *stack = NULL;
+	unsigned int stacklen, numlower = 0, i;
+	bool remote = false;
+	struct ovl_entry *oe;
+
+	err = -ENOMEM;
+	lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
+	if (!lowertmp)
+		goto out_err;
+
+	err = -EINVAL;
+	stacklen = ovl_split_lowerdirs(lowertmp);
+	if (stacklen > OVL_MAX_STACK) {
+		pr_err("overlayfs: too many lower directories, limit is %d\n",
+		       OVL_MAX_STACK);
+		goto out_err;
+	} else if (!ofs->config.upperdir && stacklen == 1) {
+		pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
+		goto out_err;
+	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
+		   ofs->config.redirect_follow) {
+		pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+		ofs->config.nfs_export = false;
+	}
+
+	err = -ENOMEM;
+	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
+	if (!stack)
+		goto out_err;
+
+	err = -EINVAL;
+	lower = lowertmp;
+	for (numlower = 0; numlower < stacklen; numlower++) {
+		err = ovl_lower_dir(lower, &stack[numlower], ofs,
+				    &sb->s_stack_depth, &remote);
+		if (err)
+			goto out_err;
+
+		lower = strchr(lower, '\0') + 1;
+	}
+
+	err = -EINVAL;
+	sb->s_stack_depth++;
+	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("overlayfs: maximum fs stacking depth exceeded\n");
+		goto out_err;
+	}
+
+	err = ovl_get_lower_layers(sb, ofs, stack, numlower);
+	if (err)
+		goto out_err;
+
+	err = -ENOMEM;
+	oe = ovl_alloc_entry(numlower);
+	if (!oe)
+		goto out_err;
+
+	for (i = 0; i < numlower; i++) {
+		oe->lowerstack[i].dentry = dget(stack[i].dentry);
+		oe->lowerstack[i].layer = &ofs->lower_layers[i];
+	}
+
+	if (remote)
+		sb->s_d_op = &ovl_reval_dentry_operations;
+	else
+		sb->s_d_op = &ovl_dentry_operations;
+
+out:
+	for (i = 0; i < numlower; i++)
+		path_put(&stack[i]);
+	kfree(stack);
+	kfree(lowertmp);
+
+	return oe;
+
+out_err:
+	oe = ERR_PTR(err);
+	goto out;
+}
+
+/*
+ * Check if this layer root is a descendant of:
+ * - another layer of this overlayfs instance
+ * - upper/work dir of any overlayfs instance
+ */
+static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
+			   struct dentry *dentry, const char *name,
+			   bool is_lower)
+{
+	struct dentry *next = dentry, *parent;
+	int err = 0;
+
+	if (!dentry)
+		return 0;
+
+	parent = dget_parent(next);
+
+	/* Walk back ancestors to root (inclusive) looking for traps */
+	while (!err && parent != next) {
+		if (is_lower && ovl_lookup_trap_inode(sb, parent)) {
+			err = -ELOOP;
+			pr_err("overlayfs: overlapping %s path\n", name);
+		} else if (ovl_is_inuse(parent)) {
+			err = ovl_report_in_use(ofs, name);
+		}
+		next = parent;
+		parent = dget_parent(next);
+		dput(next);
+	}
+
+	dput(parent);
+
+	return err;
+}
+
+/*
+ * Check if any of the layers or work dirs overlap.
+ */
+static int ovl_check_overlapping_layers(struct super_block *sb,
+					struct ovl_fs *ofs)
+{
+	int i, err;
+
+	if (ofs->upper_mnt) {
+		err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root,
+				      "upperdir", false);
+		if (err)
+			return err;
+
+		/*
+		 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
+		 * this instance and covers overlapping work and index dirs,
+		 * unless work or index dir have been moved since created inside
+		 * workbasedir.  In that case, we already have their traps in
+		 * inode cache and we will catch that case on lookup.
+		 */
+		err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir",
+				      false);
+		if (err)
+			return err;
+	}
+
+	for (i = 0; i < ofs->numlower; i++) {
+		err = ovl_check_layer(sb, ofs,
+				      ofs->lower_layers[i].mnt->mnt_root,
+				      "lowerdir", true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct path upperpath = { };
+	struct dentry *root_dentry;
+	struct ovl_entry *oe;
+	struct ovl_fs *ofs;
+	struct cred *cred;
+	int err;
+
+	err = -ENOMEM;
+	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
+	if (!ofs)
+		goto out;
+
+	ofs->creator_cred = cred = prepare_creds();
+	if (!cred)
+		goto out_err;
+
+	ofs->config.index = ovl_index_def;
+	ofs->config.nfs_export = ovl_nfs_export_def;
+	ofs->config.xino = ovl_xino_def();
+	ofs->config.metacopy = ovl_metacopy_def;
+	err = ovl_parse_opt((char *) data, &ofs->config);
+	if (err)
+		goto out_err;
+
+	err = -EINVAL;
+	if (!ofs->config.lowerdir) {
+		if (!silent)
+			pr_err("overlayfs: missing 'lowerdir'\n");
+		goto out_err;
+	}
+
+	sb->s_stack_depth = 0;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
+	if (ofs->config.xino != OVL_XINO_OFF)
+		ofs->xino_bits = BITS_PER_LONG - 32;
+
+	/* alloc/destroy_inode needed for setting up traps in inode cache */
+	sb->s_op = &ovl_super_operations;
+
+	if (ofs->config.upperdir) {
+		if (!ofs->config.workdir) {
+			pr_err("overlayfs: missing 'workdir'\n");
+			goto out_err;
+		}
+
+		err = ovl_get_upper(sb, ofs, &upperpath);
+		if (err)
+			goto out_err;
+
+		err = ovl_get_workdir(sb, ofs, &upperpath);
+		if (err)
+			goto out_err;
+
+		if (!ofs->workdir)
+			sb->s_flags |= SB_RDONLY;
+
+		sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
+		sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
+
+	}
+	oe = ovl_get_lowerstack(sb, ofs);
+	err = PTR_ERR(oe);
+	if (IS_ERR(oe))
+		goto out_err;
+
+	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
+	if (!ofs->upper_mnt)
+		sb->s_flags |= SB_RDONLY;
+
+	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
+		err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
+		if (err)
+			goto out_free_oe;
+
+		/* Force r/o mount with no index dir */
+		if (!ofs->indexdir) {
+			dput(ofs->workdir);
+			ofs->workdir = NULL;
+			sb->s_flags |= SB_RDONLY;
+		}
+
+	}
+
+	err = ovl_check_overlapping_layers(sb, ofs);
+	if (err)
+		goto out_free_oe;
+
+	/* Show index=off in /proc/mounts for forced r/o mount */
+	if (!ofs->indexdir) {
+		ofs->config.index = false;
+		if (ofs->upper_mnt && ofs->config.nfs_export) {
+			pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
+			ofs->config.nfs_export = false;
+		}
+	}
+
+	if (ofs->config.metacopy && ofs->config.nfs_export) {
+		pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
+		ofs->config.nfs_export = false;
+	}
+
+	if (ofs->config.nfs_export)
+		sb->s_export_op = &ovl_export_operations;
+
+	/* Never override disk quota limits or use reserved space */
+	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
+
+	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
+	sb->s_xattr = ovl_xattr_handlers;
+	sb->s_fs_info = ofs;
+	sb->s_flags |= SB_POSIXACL;
+
+	err = -ENOMEM;
+	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
+	if (!root_dentry)
+		goto out_free_oe;
+
+	root_dentry->d_fsdata = oe;
+
+	mntput(upperpath.mnt);
+	if (upperpath.dentry) {
+		ovl_dentry_set_upper_alias(root_dentry);
+		if (ovl_is_impuredir(upperpath.dentry))
+			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
+	}
+
+	/* Root is always merge -> can have whiteouts */
+	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
+	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
+	ovl_set_upperdata(d_inode(root_dentry));
+	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
+		       ovl_dentry_lower(root_dentry), NULL);
+
+	sb->s_root = root_dentry;
+
+	return 0;
+
+out_free_oe:
+	ovl_entry_stack_free(oe);
+	kfree(oe);
+out_err:
+	path_put(&upperpath);
+	ovl_free_fs(ofs);
+out:
+	return err;
+}
+
+static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *raw_data)
+{
+	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
+}
+
+static struct file_system_type ovl_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "overlay",
+	.mount		= ovl_mount,
+	.kill_sb	= kill_anon_super,
+};
+MODULE_ALIAS_FS("overlay");
+
+static void ovl_inode_init_once(void *foo)
+{
+	struct ovl_inode *oi = foo;
+
+	inode_init_once(&oi->vfs_inode);
+}
+
+static int __init ovl_init(void)
+{
+	int err;
+
+	ovl_inode_cachep = kmem_cache_create("ovl_inode",
+					     sizeof(struct ovl_inode), 0,
+					     (SLAB_RECLAIM_ACCOUNT|
+					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+					     ovl_inode_init_once);
+	if (ovl_inode_cachep == NULL)
+		return -ENOMEM;
+
+	err = register_filesystem(&ovl_fs_type);
+	if (err)
+		kmem_cache_destroy(ovl_inode_cachep);
+
+	return err;
+}
+
+static void __exit ovl_exit(void)
+{
+	unregister_filesystem(&ovl_fs_type);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+	kmem_cache_destroy(ovl_inode_cachep);
+
+}
+
+module_init(ovl_init);
+module_exit(ovl_exit);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
new file mode 100644
index 000000000..db8bdb29b
--- /dev/null
+++ b/fs/overlayfs/util.c
@@ -0,0 +1,945 @@
+/*
+ * Copyright (C) 2011 Novell Inc.
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/uuid.h>
+#include <linux/namei.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+int ovl_want_write(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	return mnt_want_write(ofs->upper_mnt);
+}
+
+void ovl_drop_write(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	mnt_drop_write(ofs->upper_mnt);
+}
+
+struct dentry *ovl_workdir(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	return ofs->workdir;
+}
+
+const struct cred *ovl_override_creds(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return override_creds(ofs->creator_cred);
+}
+
+struct super_block *ovl_same_sb(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	if (!ofs->numlowerfs)
+		return ofs->upper_mnt->mnt_sb;
+	else if (ofs->numlowerfs == 1 && !ofs->upper_mnt)
+		return ofs->lower_fs[0].sb;
+	else
+		return NULL;
+}
+
+/*
+ * Check if underlying fs supports file handles and try to determine encoding
+ * type, in order to deduce maximum inode number used by fs.
+ *
+ * Return 0 if file handles are not supported.
+ * Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding.
+ * Return -1 if fs uses a non default encoding with unknown inode size.
+ */
+int ovl_can_decode_fh(struct super_block *sb)
+{
+	if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry ||
+	    uuid_is_null(&sb->s_uuid))
+		return 0;
+
+	return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
+}
+
+struct dentry *ovl_indexdir(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->indexdir;
+}
+
+/* Index all files on copy up. For now only enabled for NFS export */
+bool ovl_index_all(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->config.nfs_export && ofs->config.index;
+}
+
+/* Verify lower origin on lookup. For now only enabled for NFS export */
+bool ovl_verify_lower(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->config.nfs_export && ofs->config.index;
+}
+
+struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
+{
+	size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
+	struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
+
+	if (oe)
+		oe->numlower = numlower;
+
+	return oe;
+}
+
+bool ovl_dentry_remote(struct dentry *dentry)
+{
+	return dentry->d_flags &
+		(DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE |
+		 DCACHE_OP_REAL);
+}
+
+bool ovl_dentry_weird(struct dentry *dentry)
+{
+	return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
+				  DCACHE_MANAGE_TRANSIT |
+				  DCACHE_OP_HASH |
+				  DCACHE_OP_COMPARE);
+}
+
+enum ovl_path_type ovl_path_type(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	enum ovl_path_type type = 0;
+
+	if (ovl_dentry_upper(dentry)) {
+		type = __OVL_PATH_UPPER;
+
+		/*
+		 * Non-dir dentry can hold lower dentry of its copy up origin.
+		 */
+		if (oe->numlower) {
+			if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry)))
+				type |= __OVL_PATH_ORIGIN;
+			if (d_is_dir(dentry) ||
+			    !ovl_has_upperdata(d_inode(dentry)))
+				type |= __OVL_PATH_MERGE;
+		}
+	} else {
+		if (oe->numlower > 1)
+			type |= __OVL_PATH_MERGE;
+	}
+	return type;
+}
+
+void ovl_path_upper(struct dentry *dentry, struct path *path)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+	path->mnt = ofs->upper_mnt;
+	path->dentry = ovl_dentry_upper(dentry);
+}
+
+void ovl_path_lower(struct dentry *dentry, struct path *path)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe->numlower) {
+		path->mnt = oe->lowerstack[0].layer->mnt;
+		path->dentry = oe->lowerstack[0].dentry;
+	} else {
+		*path = (struct path) { };
+	}
+}
+
+void ovl_path_lowerdata(struct dentry *dentry, struct path *path)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe->numlower) {
+		path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt;
+		path->dentry = oe->lowerstack[oe->numlower - 1].dentry;
+	} else {
+		*path = (struct path) { };
+	}
+}
+
+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
+{
+	enum ovl_path_type type = ovl_path_type(dentry);
+
+	if (!OVL_TYPE_UPPER(type))
+		ovl_path_lower(dentry, path);
+	else
+		ovl_path_upper(dentry, path);
+
+	return type;
+}
+
+struct dentry *ovl_dentry_upper(struct dentry *dentry)
+{
+	return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
+}
+
+struct dentry *ovl_dentry_lower(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->numlower ? oe->lowerstack[0].dentry : NULL;
+}
+
+struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->numlower ? oe->lowerstack[0].layer : NULL;
+}
+
+/*
+ * ovl_dentry_lower() could return either a data dentry or metacopy dentry
+ * dependig on what is stored in lowerstack[0]. At times we need to find
+ * lower dentry which has data (and not metacopy dentry). This helper
+ * returns the lower data dentry.
+ */
+struct dentry *ovl_dentry_lowerdata(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL;
+}
+
+struct dentry *ovl_dentry_real(struct dentry *dentry)
+{
+	return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
+}
+
+struct dentry *ovl_i_dentry_upper(struct inode *inode)
+{
+	return ovl_upperdentry_dereference(OVL_I(inode));
+}
+
+struct inode *ovl_inode_upper(struct inode *inode)
+{
+	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+
+	return upperdentry ? d_inode(upperdentry) : NULL;
+}
+
+struct inode *ovl_inode_lower(struct inode *inode)
+{
+	return OVL_I(inode)->lower;
+}
+
+struct inode *ovl_inode_real(struct inode *inode)
+{
+	return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
+}
+
+/* Return inode which contains lower data. Do not return metacopy */
+struct inode *ovl_inode_lowerdata(struct inode *inode)
+{
+	if (WARN_ON(!S_ISREG(inode->i_mode)))
+		return NULL;
+
+	return OVL_I(inode)->lowerdata ?: ovl_inode_lower(inode);
+}
+
+/* Return real inode which contains data. Does not return metacopy inode */
+struct inode *ovl_inode_realdata(struct inode *inode)
+{
+	struct inode *upperinode;
+
+	upperinode = ovl_inode_upper(inode);
+	if (upperinode && ovl_has_upperdata(inode))
+		return upperinode;
+
+	return ovl_inode_lowerdata(inode);
+}
+
+struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
+{
+	return OVL_I(inode)->cache;
+}
+
+void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
+{
+	OVL_I(inode)->cache = cache;
+}
+
+void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
+{
+	set_bit(flag, &OVL_E(dentry)->flags);
+}
+
+void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
+{
+	clear_bit(flag, &OVL_E(dentry)->flags);
+}
+
+bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
+{
+	return test_bit(flag, &OVL_E(dentry)->flags);
+}
+
+bool ovl_dentry_is_opaque(struct dentry *dentry)
+{
+	return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry);
+}
+
+bool ovl_dentry_is_whiteout(struct dentry *dentry)
+{
+	return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
+}
+
+void ovl_dentry_set_opaque(struct dentry *dentry)
+{
+	ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
+}
+
+/*
+ * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
+ * to return positive, while there's no actual upper alias for the inode.
+ * Copy up code needs to know about the existence of the upper alias, so it
+ * can't use ovl_dentry_upper().
+ */
+bool ovl_dentry_has_upper_alias(struct dentry *dentry)
+{
+	return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry);
+}
+
+void ovl_dentry_set_upper_alias(struct dentry *dentry)
+{
+	ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
+}
+
+static bool ovl_should_check_upperdata(struct inode *inode)
+{
+	if (!S_ISREG(inode->i_mode))
+		return false;
+
+	if (!ovl_inode_lower(inode))
+		return false;
+
+	return true;
+}
+
+bool ovl_has_upperdata(struct inode *inode)
+{
+	if (!ovl_should_check_upperdata(inode))
+		return true;
+
+	if (!ovl_test_flag(OVL_UPPERDATA, inode))
+		return false;
+	/*
+	 * Pairs with smp_wmb() in ovl_set_upperdata(). Main user of
+	 * ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure
+	 * if setting of OVL_UPPERDATA is visible, then effects of writes
+	 * before that are visible too.
+	 */
+	smp_rmb();
+	return true;
+}
+
+void ovl_set_upperdata(struct inode *inode)
+{
+	/*
+	 * Pairs with smp_rmb() in ovl_has_upperdata(). Make sure
+	 * if OVL_UPPERDATA flag is visible, then effects of write operations
+	 * before it are visible as well.
+	 */
+	smp_wmb();
+	ovl_set_flag(OVL_UPPERDATA, inode);
+}
+
+/* Caller should hold ovl_inode->lock */
+bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
+{
+	if (!ovl_open_flags_need_copy_up(flags))
+		return false;
+
+	return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry));
+}
+
+bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
+{
+	if (!ovl_open_flags_need_copy_up(flags))
+		return false;
+
+	return !ovl_has_upperdata(d_inode(dentry));
+}
+
+bool ovl_redirect_dir(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->config.redirect_dir && !ofs->noxattr;
+}
+
+const char *ovl_dentry_get_redirect(struct dentry *dentry)
+{
+	return OVL_I(d_inode(dentry))->redirect;
+}
+
+void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
+{
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
+
+	kfree(oi->redirect);
+	oi->redirect = redirect;
+}
+
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+		    struct dentry *lowerdentry, struct dentry *lowerdata)
+{
+	struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
+
+	if (upperdentry)
+		OVL_I(inode)->__upperdentry = upperdentry;
+	if (lowerdentry)
+		OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
+	if (lowerdata)
+		OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata));
+
+	ovl_copyattr(realinode, inode);
+	ovl_copyflags(realinode, inode);
+	if (!inode->i_ino)
+		inode->i_ino = realinode->i_ino;
+}
+
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
+{
+	struct inode *upperinode = d_inode(upperdentry);
+
+	WARN_ON(OVL_I(inode)->__upperdentry);
+
+	/*
+	 * Make sure upperdentry is consistent before making it visible
+	 */
+	smp_wmb();
+	OVL_I(inode)->__upperdentry = upperdentry;
+	if (inode_unhashed(inode)) {
+		if (!inode->i_ino)
+			inode->i_ino = upperinode->i_ino;
+		inode->i_private = upperinode;
+		__insert_inode_hash(inode, (unsigned long) upperinode);
+	}
+}
+
+static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
+{
+	struct inode *inode = d_inode(dentry);
+
+	WARN_ON(!inode_is_locked(inode));
+	/*
+	 * Version is used by readdir code to keep cache consistent.  For merge
+	 * dirs all changes need to be noted.  For non-merge dirs, cache only
+	 * contains impure (ones which have been copied up and have origins)
+	 * entries, so only need to note changes to impure entries.
+	 */
+	if (OVL_TYPE_MERGE(ovl_path_type(dentry)) || impurity)
+		OVL_I(inode)->version++;
+}
+
+void ovl_dir_modified(struct dentry *dentry, bool impurity)
+{
+	/* Copy mtime/ctime */
+	ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
+
+	ovl_dentry_version_inc(dentry, impurity);
+}
+
+u64 ovl_dentry_version_get(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+
+	WARN_ON(!inode_is_locked(inode));
+	return OVL_I(inode)->version;
+}
+
+bool ovl_is_whiteout(struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+
+	return inode && IS_WHITEOUT(inode);
+}
+
+struct file *ovl_path_open(struct path *path, int flags)
+{
+	return dentry_open(path, flags | O_NOATIME, current_cred());
+}
+
+/* Caller should hold ovl_inode->lock */
+static bool ovl_already_copied_up_locked(struct dentry *dentry, int flags)
+{
+	bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
+
+	if (ovl_dentry_upper(dentry) &&
+	    (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
+	    !ovl_dentry_needs_data_copy_up_locked(dentry, flags))
+		return true;
+
+	return false;
+}
+
+bool ovl_already_copied_up(struct dentry *dentry, int flags)
+{
+	bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
+
+	/*
+	 * Check if copy-up has happened as well as for upper alias (in
+	 * case of hard links) is there.
+	 *
+	 * Both checks are lockless:
+	 *  - false negatives: will recheck under oi->lock
+	 *  - false positives:
+	 *    + ovl_dentry_upper() uses memory barriers to ensure the
+	 *      upper dentry is up-to-date
+	 *    + ovl_dentry_has_upper_alias() relies on locking of
+	 *      upper parent i_rwsem to prevent reordering copy-up
+	 *      with rename.
+	 */
+	if (ovl_dentry_upper(dentry) &&
+	    (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
+	    !ovl_dentry_needs_data_copy_up(dentry, flags))
+		return true;
+
+	return false;
+}
+
+int ovl_copy_up_start(struct dentry *dentry, int flags)
+{
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
+	int err;
+
+	err = mutex_lock_interruptible(&oi->lock);
+	if (!err && ovl_already_copied_up_locked(dentry, flags)) {
+		err = 1; /* Already copied up */
+		mutex_unlock(&oi->lock);
+	}
+
+	return err;
+}
+
+void ovl_copy_up_end(struct dentry *dentry)
+{
+	mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+}
+
+bool ovl_check_origin_xattr(struct dentry *dentry)
+{
+	int res;
+
+	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+
+	/* Zero size value means "copied up but origin unknown" */
+	if (res >= 0)
+		return true;
+
+	return false;
+}
+
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
+{
+	int res;
+	char val;
+
+	if (!d_is_dir(dentry))
+		return false;
+
+	res = vfs_getxattr(dentry, name, &val, 1);
+	if (res == 1 && val == 'y')
+		return true;
+
+	return false;
+}
+
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+		       const char *name, const void *value, size_t size,
+		       int xerr)
+{
+	int err;
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+	if (ofs->noxattr)
+		return xerr;
+
+	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+
+	if (err == -EOPNOTSUPP) {
+		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
+		ofs->noxattr = true;
+		return xerr;
+	}
+
+	return err;
+}
+
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
+{
+	int err;
+
+	if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
+		return 0;
+
+	/*
+	 * Do not fail when upper doesn't support xattrs.
+	 * Upper inodes won't have origin nor redirect xattr anyway.
+	 */
+	err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
+				 "y", 1, 0);
+	if (!err)
+		ovl_set_flag(OVL_IMPURE, d_inode(dentry));
+
+	return err;
+}
+
+void ovl_set_flag(unsigned long flag, struct inode *inode)
+{
+	set_bit(flag, &OVL_I(inode)->flags);
+}
+
+void ovl_clear_flag(unsigned long flag, struct inode *inode)
+{
+	clear_bit(flag, &OVL_I(inode)->flags);
+}
+
+bool ovl_test_flag(unsigned long flag, struct inode *inode)
+{
+	return test_bit(flag, &OVL_I(inode)->flags);
+}
+
+/**
+ * Caller must hold a reference to inode to prevent it from being freed while
+ * it is marked inuse.
+ */
+bool ovl_inuse_trylock(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	bool locked = false;
+
+	spin_lock(&inode->i_lock);
+	if (!(inode->i_state & I_OVL_INUSE)) {
+		inode->i_state |= I_OVL_INUSE;
+		locked = true;
+	}
+	spin_unlock(&inode->i_lock);
+
+	return locked;
+}
+
+void ovl_inuse_unlock(struct dentry *dentry)
+{
+	if (dentry) {
+		struct inode *inode = d_inode(dentry);
+
+		spin_lock(&inode->i_lock);
+		WARN_ON(!(inode->i_state & I_OVL_INUSE));
+		inode->i_state &= ~I_OVL_INUSE;
+		spin_unlock(&inode->i_lock);
+	}
+}
+
+bool ovl_is_inuse(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	bool inuse;
+
+	spin_lock(&inode->i_lock);
+	inuse = (inode->i_state & I_OVL_INUSE);
+	spin_unlock(&inode->i_lock);
+
+	return inuse;
+}
+
+/*
+ * Does this overlay dentry need to be indexed on copy up?
+ */
+bool ovl_need_index(struct dentry *dentry)
+{
+	struct dentry *lower = ovl_dentry_lower(dentry);
+
+	if (!lower || !ovl_indexdir(dentry->d_sb))
+		return false;
+
+	/* Index all files for NFS export and consistency verification */
+	if (ovl_index_all(dentry->d_sb))
+		return true;
+
+	/* Index only lower hardlinks on copy up */
+	if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+		return true;
+
+	return false;
+}
+
+/* Caller must hold OVL_I(inode)->lock */
+static void ovl_cleanup_index(struct dentry *dentry)
+{
+	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
+	struct inode *dir = indexdir->d_inode;
+	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+	struct dentry *upperdentry = ovl_dentry_upper(dentry);
+	struct dentry *index = NULL;
+	struct inode *inode;
+	struct qstr name = { };
+	int err;
+
+	err = ovl_get_index_name(lowerdentry, &name);
+	if (err)
+		goto fail;
+
+	inode = d_inode(upperdentry);
+	if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
+		pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+				    upperdentry, inode->i_ino, inode->i_nlink);
+		/*
+		 * We either have a bug with persistent union nlink or a lower
+		 * hardlink was added while overlay is mounted. Adding a lower
+		 * hardlink and then unlinking all overlay hardlinks would drop
+		 * overlay nlink to zero before all upper inodes are unlinked.
+		 * As a safety measure, when that situation is detected, set
+		 * the overlay nlink to the index inode nlink minus one for the
+		 * index entry itself.
+		 */
+		set_nlink(d_inode(dentry), inode->i_nlink - 1);
+		ovl_set_nlink_upper(dentry);
+		goto out;
+	}
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	index = lookup_one_len(name.name, indexdir, name.len);
+	err = PTR_ERR(index);
+	if (IS_ERR(index)) {
+		index = NULL;
+	} else if (ovl_index_all(dentry->d_sb)) {
+		/* Whiteout orphan index to block future open by handle */
+		err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+	} else {
+		/* Cleanup orphan index entries */
+		err = ovl_cleanup(dir, index);
+	}
+
+	inode_unlock(dir);
+	if (err)
+		goto fail;
+
+out:
+	kfree(name.name);
+	dput(index);
+	return;
+
+fail:
+	pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
+	goto out;
+}
+
+/*
+ * Operations that change overlay inode and upper inode nlink need to be
+ * synchronized with copy up for persistent nlink accounting.
+ */
+int ovl_nlink_start(struct dentry *dentry, bool *locked)
+{
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
+	const struct cred *old_cred;
+	int err;
+
+	if (!d_inode(dentry))
+		return 0;
+
+	/*
+	 * With inodes index is enabled, we store the union overlay nlink
+	 * in an xattr on the index inode. When whiting out an indexed lower,
+	 * we need to decrement the overlay persistent nlink, but before the
+	 * first copy up, we have no upper index inode to store the xattr.
+	 *
+	 * As a workaround, before whiteout/rename over an indexed lower,
+	 * copy up to create the upper index. Creating the upper index will
+	 * initialize the overlay nlink, so it could be dropped if unlink
+	 * or rename succeeds.
+	 *
+	 * TODO: implement metadata only index copy up when called with
+	 *       ovl_copy_up_flags(dentry, O_PATH).
+	 */
+	if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) {
+		err = ovl_copy_up(dentry);
+		if (err)
+			return err;
+	}
+
+	err = mutex_lock_interruptible(&oi->lock);
+	if (err)
+		return err;
+
+	if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		goto out;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	/*
+	 * The overlay inode nlink should be incremented/decremented IFF the
+	 * upper operation succeeds, along with nlink change of upper inode.
+	 * Therefore, before link/unlink/rename, we store the union nlink
+	 * value relative to the upper inode nlink in an upper inode xattr.
+	 */
+	err = ovl_set_nlink_upper(dentry);
+	revert_creds(old_cred);
+
+out:
+	if (err)
+		mutex_unlock(&oi->lock);
+	else
+		*locked = true;
+
+	return err;
+}
+
+void ovl_nlink_end(struct dentry *dentry, bool locked)
+{
+	if (locked) {
+		if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
+		    d_inode(dentry)->i_nlink == 0) {
+			const struct cred *old_cred;
+
+			old_cred = ovl_override_creds(dentry->d_sb);
+			ovl_cleanup_index(dentry);
+			revert_creds(old_cred);
+		}
+
+		mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+	}
+}
+
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
+{
+	/* Workdir should not be the same as upperdir */
+	if (workdir == upperdir)
+		goto err;
+
+	/* Workdir should not be subdir of upperdir and vice versa */
+	if (lock_rename(workdir, upperdir) != NULL)
+		goto err_unlock;
+
+	return 0;
+
+err_unlock:
+	unlock_rename(workdir, upperdir);
+err:
+	pr_err("overlayfs: failed to lock workdir+upperdir\n");
+	return -EIO;
+}
+
+/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
+int ovl_check_metacopy_xattr(struct dentry *dentry)
+{
+	int res;
+
+	/* Only regular files can have metacopy xattr */
+	if (!S_ISREG(d_inode(dentry)->i_mode))
+		return 0;
+
+	res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
+	if (res < 0) {
+		if (res == -ENODATA || res == -EOPNOTSUPP)
+			return 0;
+		goto out;
+	}
+
+	return 1;
+out:
+	pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res);
+	return res;
+}
+
+bool ovl_is_metacopy_dentry(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (!d_is_reg(dentry))
+		return false;
+
+	if (ovl_dentry_upper(dentry)) {
+		if (!ovl_has_upperdata(d_inode(dentry)))
+			return true;
+		return false;
+	}
+
+	return (oe->numlower > 1);
+}
+
+ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value,
+		     size_t padding)
+{
+	ssize_t res;
+	char *buf = NULL;
+
+	res = vfs_getxattr(dentry, name, NULL, 0);
+	if (res < 0) {
+		if (res == -ENODATA || res == -EOPNOTSUPP)
+			return -ENODATA;
+		goto fail;
+	}
+
+	if (res != 0) {
+		buf = kzalloc(res + padding, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+
+		res = vfs_getxattr(dentry, name, buf, res);
+		if (res < 0)
+			goto fail;
+	}
+	*value = buf;
+
+	return res;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to get xattr %s: err=%zi)\n",
+			    name, res);
+	kfree(buf);
+	return res;
+}
+
+char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
+{
+	int res;
+	char *s, *next, *buf = NULL;
+
+	res = ovl_getxattr(dentry, OVL_XATTR_REDIRECT, &buf, padding + 1);
+	if (res == -ENODATA)
+		return NULL;
+	if (res < 0)
+		return ERR_PTR(res);
+	if (res == 0)
+		goto invalid;
+
+	if (buf[0] == '/') {
+		for (s = buf; *s++ == '/'; s = next) {
+			next = strchrnul(s, '/');
+			if (s == next)
+				goto invalid;
+		}
+	} else {
+		if (strchr(buf, '/') != NULL)
+			goto invalid;
+	}
+
+	return buf;
+invalid:
+	pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
+	res = -EINVAL;
+	kfree(buf);
+	return ERR_PTR(res);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-06 01:02:30 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-06 01:02:30 +0000
commit	76cb841cb886eef6b3bee341a2266c76578724ad (patch)
tree	f5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /fs/overlayfs
parent	Initial commit. (diff)
download	linux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip