From 2c3c1048746a4622d8c89a29670120dc8fab93c4 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 7 Apr 2024 20:49:45 +0200
Subject: Adding upstream version 6.1.76.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 fs/overlayfs/Kconfig     |  126 +++
 fs/overlayfs/Makefile    |    9 +
 fs/overlayfs/copy_up.c   | 1108 +++++++++++++++++++++++
 fs/overlayfs/dir.c       | 1331 +++++++++++++++++++++++++++
 fs/overlayfs/export.c    |  876 ++++++++++++++++++
 fs/overlayfs/file.c      |  716 +++++++++++++++
 fs/overlayfs/inode.c     | 1285 ++++++++++++++++++++++++++
 fs/overlayfs/namei.c     | 1203 +++++++++++++++++++++++++
 fs/overlayfs/overlayfs.h |  698 ++++++++++++++
 fs/overlayfs/ovl_entry.h |  160 ++++
 fs/overlayfs/readdir.c   | 1235 +++++++++++++++++++++++++
 fs/overlayfs/super.c     | 2244 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/overlayfs/util.c      | 1136 +++++++++++++++++++++++
 13 files changed, 12127 insertions(+)
 create mode 100644 fs/overlayfs/Kconfig
 create mode 100644 fs/overlayfs/Makefile
 create mode 100644 fs/overlayfs/copy_up.c
 create mode 100644 fs/overlayfs/dir.c
 create mode 100644 fs/overlayfs/export.c
 create mode 100644 fs/overlayfs/file.c
 create mode 100644 fs/overlayfs/inode.c
 create mode 100644 fs/overlayfs/namei.c
 create mode 100644 fs/overlayfs/overlayfs.h
 create mode 100644 fs/overlayfs/ovl_entry.h
 create mode 100644 fs/overlayfs/readdir.c
 create mode 100644 fs/overlayfs/super.c
 create mode 100644 fs/overlayfs/util.c

(limited to 'fs/overlayfs')

diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
new file mode 100644
index 000000000..dd188c799
--- /dev/null
+++ b/fs/overlayfs/Kconfig
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config OVERLAY_FS
+	tristate "Overlay filesystem support"
+	select EXPORTFS
+	help
+	  An overlay filesystem combines two filesystems - an 'upper' filesystem
+	  and a 'lower' filesystem.  When a name exists in both filesystems, the
+	  object in the 'upper' filesystem is visible while the object in the
+	  'lower' filesystem is either hidden or, in the case of directories,
+	  merged with the 'upper' object.
+
+	  For more information see Documentation/filesystems/overlayfs.rst
+
+config OVERLAY_FS_REDIRECT_DIR
+	bool "Overlayfs: turn on redirect directory feature by default"
+	depends on OVERLAY_FS
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  redirects when renaming directories by default.  In this case it is
+	  still possible to turn off redirects globally with the
+	  "redirect_dir=off" module option or on a filesystem instance basis
+	  with the "redirect_dir=off" mount option.
+
+	  Note, that redirects are not backward compatible.  That is, mounting
+	  an overlay which has redirects on a kernel that doesn't support this
+	  feature will have unexpected results.
+
+	  If unsure, say N.
+
+config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
+	bool "Overlayfs: follow redirects even if redirects are turned off"
+	default y
+	depends on OVERLAY_FS
+	help
+	  Disable this to get a possibly more secure configuration, but that
+	  might not be backward compatible with previous kernels.
+
+	  If backward compatibility is not an issue, then it is safe and
+	  recommended to say N here.
+
+	  For more information, see Documentation/filesystems/overlayfs.rst
+
+	  If unsure, say Y.
+
+config OVERLAY_FS_INDEX
+	bool "Overlayfs: turn on inodes index feature by default"
+	depends on OVERLAY_FS
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  the index directory to map lower inodes to upper inodes by default.
+	  In this case it is still possible to turn off index globally with the
+	  "index=off" module option or on a filesystem instance basis with the
+	  "index=off" mount option.
+
+	  The inodes index feature prevents breaking of lower hardlinks on copy
+	  up.
+
+	  Note, that the inodes index feature is not backward compatible.
+	  That is, mounting an overlay which has an inodes index on a kernel
+	  that doesn't support this feature will have unexpected results.
+
+	  If unsure, say N.
+
+config OVERLAY_FS_NFS_EXPORT
+	bool "Overlayfs: turn on NFS export feature by default"
+	depends on OVERLAY_FS
+	depends on OVERLAY_FS_INDEX
+	depends on !OVERLAY_FS_METACOPY
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  the index directory to decode overlay NFS file handles by default.
+	  In this case, it is still possible to turn off NFS export support
+	  globally with the "nfs_export=off" module option or on a filesystem
+	  instance basis with the "nfs_export=off" mount option.
+
+	  The NFS export feature creates an index on copy up of every file and
+	  directory.  This full index is used to detect overlay filesystems
+	  inconsistencies on lookup, like redirect from multiple upper dirs to
+	  the same lower dir.  The full index may incur some overhead on mount
+	  time, especially when verifying that directory file handles are not
+	  stale.
+
+	  Note, that the NFS export feature is not backward compatible.
+	  That is, mounting an overlay which has a full index on a kernel
+	  that doesn't support this feature will have unexpected results.
+
+	  Most users should say N here and enable this feature on a case-by-
+	  case basis with the "nfs_export=on" mount option.
+
+	  Say N unless you fully understand the consequences.
+
+config OVERLAY_FS_XINO_AUTO
+	bool "Overlayfs: auto enable inode number mapping"
+	default n
+	depends on OVERLAY_FS
+	depends on 64BIT
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  unused high bits in undelying filesystem inode numbers to map all
+	  inodes to a unified address space.  The mapped 64bit inode numbers
+	  might not be compatible with applications that expect 32bit inodes.
+
+	  If compatibility with applications that expect 32bit inodes is not an
+	  issue, then it is safe and recommended to say Y here.
+
+	  For more information, see Documentation/filesystems/overlayfs.rst
+
+	  If unsure, say N.
+
+config OVERLAY_FS_METACOPY
+	bool "Overlayfs: turn on metadata only copy up feature by default"
+	depends on OVERLAY_FS
+	select OVERLAY_FS_REDIRECT_DIR
+	help
+	  If this config option is enabled then overlay filesystems will
+	  copy up only metadata where appropriate and data copy up will
+	  happen when a file is opened for WRITE operation. It is still
+	  possible to turn off this feature globally with the "metacopy=off"
+	  module option or on a filesystem instance basis with the
+	  "metacopy=off" mount option.
+
+	  Note, that this feature is not backward compatible.  That is,
+	  mounting an overlay which has metacopy only inodes on a kernel
+	  that doesn't support this feature will have unexpected results.
+
+	  If unsure, say N.
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
new file mode 100644
index 000000000..9164c585e
--- /dev/null
+++ b/fs/overlayfs/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the overlay filesystem.
+#
+
+obj-$(CONFIG_OVERLAY_FS) += overlay.o
+
+overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
+		copy_up.o export.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
new file mode 100644
index 000000000..86d4b6975
--- /dev/null
+++ b/fs/overlayfs/copy_up.c
@@ -0,0 +1,1108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fileattr.h>
+#include <linux/splice.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+#include <linux/cred.h>
+#include <linux/namei.h>
+#include <linux/fdtable.h>
+#include <linux/ratelimit.h>
+#include <linux/exportfs.h>
+#include "overlayfs.h"
+
+#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
+
+static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
+{
+	pr_warn("\"check_copy_up\" module option is obsolete\n");
+	return 0;
+}
+
+static int ovl_ccup_get(char *buf, const struct kernel_param *param)
+{
+	return sprintf(buf, "N\n");
+}
+
+module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
+MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing");
+
+static bool ovl_must_copy_xattr(const char *name)
+{
+	return !strcmp(name, XATTR_POSIX_ACL_ACCESS) ||
+	       !strcmp(name, XATTR_POSIX_ACL_DEFAULT) ||
+	       !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
+}
+
+int ovl_copy_xattr(struct super_block *sb, const struct path *oldpath, struct dentry *new)
+{
+	struct dentry *old = oldpath->dentry;
+	ssize_t list_size, size, value_size = 0;
+	char *buf, *name, *value = NULL;
+	int error = 0;
+	size_t slen;
+
+	if (!(old->d_inode->i_opflags & IOP_XATTR) ||
+	    !(new->d_inode->i_opflags & IOP_XATTR))
+		return 0;
+
+	list_size = vfs_listxattr(old, NULL, 0);
+	if (list_size <= 0) {
+		if (list_size == -EOPNOTSUPP)
+			return 0;
+		return list_size;
+	}
+
+	buf = kvzalloc(list_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	list_size = vfs_listxattr(old, buf, list_size);
+	if (list_size <= 0) {
+		error = list_size;
+		goto out;
+	}
+
+	for (name = buf; list_size; name += slen) {
+		slen = strnlen(name, list_size) + 1;
+
+		/* underlying fs providing us with an broken xattr list? */
+		if (WARN_ON(slen > list_size)) {
+			error = -EIO;
+			break;
+		}
+		list_size -= slen;
+
+		if (ovl_is_private_xattr(sb, name))
+			continue;
+
+		error = security_inode_copy_up_xattr(name);
+		if (error < 0 && error != -EOPNOTSUPP)
+			break;
+		if (error == 1) {
+			error = 0;
+			continue; /* Discard */
+		}
+retry:
+		size = ovl_do_getxattr(oldpath, name, value, value_size);
+		if (size == -ERANGE)
+			size = ovl_do_getxattr(oldpath, name, NULL, 0);
+
+		if (size < 0) {
+			error = size;
+			break;
+		}
+
+		if (size > value_size) {
+			void *new;
+
+			new = kvmalloc(size, GFP_KERNEL);
+			if (!new) {
+				error = -ENOMEM;
+				break;
+			}
+			kvfree(value);
+			value = new;
+			value_size = size;
+			goto retry;
+		}
+
+		error = ovl_do_setxattr(OVL_FS(sb), new, name, value, size, 0);
+		if (error) {
+			if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
+				break;
+
+			/* Ignore failure to copy unknown xattrs */
+			error = 0;
+		}
+	}
+	kvfree(value);
+out:
+	kvfree(buf);
+	return error;
+}
+
+static int ovl_copy_fileattr(struct inode *inode, const struct path *old,
+			     const struct path *new)
+{
+	struct fileattr oldfa = { .flags_valid = true };
+	struct fileattr newfa = { .flags_valid = true };
+	int err;
+
+	err = ovl_real_fileattr_get(old, &oldfa);
+	if (err) {
+		/* Ntfs-3g returns -EINVAL for "no fileattr support" */
+		if (err == -ENOTTY || err == -EINVAL)
+			return 0;
+		pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n",
+			old->dentry, err);
+		return err;
+	}
+
+	/*
+	 * We cannot set immutable and append-only flags on upper inode,
+	 * because we would not be able to link upper inode to upper dir
+	 * not set overlay private xattr on upper inode.
+	 * Store these flags in overlay.protattr xattr instead.
+	 */
+	if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) {
+		err = ovl_set_protattr(inode, new->dentry, &oldfa);
+		if (err == -EPERM)
+			pr_warn_once("copying fileattr: no xattr on upper\n");
+		else if (err)
+			return err;
+	}
+
+	/* Don't bother copying flags if none are set */
+	if (!(oldfa.flags & OVL_COPY_FS_FLAGS_MASK))
+		return 0;
+
+	err = ovl_real_fileattr_get(new, &newfa);
+	if (err) {
+		/*
+		 * Returning an error if upper doesn't support fileattr will
+		 * result in a regression, so revert to the old behavior.
+		 */
+		if (err == -ENOTTY || err == -EINVAL) {
+			pr_warn_once("copying fileattr: no support on upper\n");
+			return 0;
+		}
+		pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n",
+			new->dentry, err);
+		return err;
+	}
+
+	BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL);
+	newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK;
+	newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK);
+
+	BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON);
+	newfa.fsx_xflags &= ~OVL_COPY_FSX_FLAGS_MASK;
+	newfa.fsx_xflags |= (oldfa.fsx_xflags & OVL_COPY_FSX_FLAGS_MASK);
+
+	return ovl_real_fileattr_set(new, &newfa);
+}
+
+static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
+			    struct file *new_file, loff_t len)
+{
+	struct path datapath;
+	struct file *old_file;
+	loff_t old_pos = 0;
+	loff_t new_pos = 0;
+	loff_t cloned;
+	loff_t data_pos = -1;
+	loff_t hole_len;
+	bool skip_hole = false;
+	int error = 0;
+
+	ovl_path_lowerdata(dentry, &datapath);
+	if (WARN_ON(datapath.dentry == NULL))
+		return -EIO;
+
+	old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY);
+	if (IS_ERR(old_file))
+		return PTR_ERR(old_file);
+
+	/* Try to use clone_file_range to clone up within the same fs */
+	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
+	if (cloned == len)
+		goto out_fput;
+	/* Couldn't clone, so now we try to copy the data */
+
+	/* Check if lower fs supports seek operation */
+	if (old_file->f_mode & FMODE_LSEEK)
+		skip_hole = true;
+
+	while (len) {
+		size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
+		long bytes;
+
+		if (len < this_len)
+			this_len = len;
+
+		if (signal_pending_state(TASK_KILLABLE, current)) {
+			error = -EINTR;
+			break;
+		}
+
+		/*
+		 * Fill zero for hole will cost unnecessary disk space
+		 * and meanwhile slow down the copy-up speed, so we do
+		 * an optimization for hole during copy-up, it relies
+		 * on SEEK_DATA implementation in lower fs so if lower
+		 * fs does not support it, copy-up will behave as before.
+		 *
+		 * Detail logic of hole detection as below:
+		 * When we detect next data position is larger than current
+		 * position we will skip that hole, otherwise we copy
+		 * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
+		 * it may not recognize all kind of holes and sometimes
+		 * only skips partial of hole area. However, it will be
+		 * enough for most of the use cases.
+		 */
+
+		if (skip_hole && data_pos < old_pos) {
+			data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
+			if (data_pos > old_pos) {
+				hole_len = data_pos - old_pos;
+				len -= hole_len;
+				old_pos = new_pos = data_pos;
+				continue;
+			} else if (data_pos == -ENXIO) {
+				break;
+			} else if (data_pos < 0) {
+				skip_hole = false;
+			}
+		}
+
+		bytes = do_splice_direct(old_file, &old_pos,
+					 new_file, &new_pos,
+					 this_len, SPLICE_F_MOVE);
+		if (bytes <= 0) {
+			error = bytes;
+			break;
+		}
+		WARN_ON(old_pos != new_pos);
+
+		len -= bytes;
+	}
+	if (!error && ovl_should_sync(ofs))
+		error = vfs_fsync(new_file, 0);
+out_fput:
+	fput(old_file);
+	return error;
+}
+
+static int ovl_set_size(struct ovl_fs *ofs,
+			struct dentry *upperdentry, struct kstat *stat)
+{
+	struct iattr attr = {
+		.ia_valid = ATTR_SIZE,
+		.ia_size = stat->size,
+	};
+
+	return ovl_do_notify_change(ofs, upperdentry, &attr);
+}
+
+static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry,
+			      struct kstat *stat)
+{
+	struct iattr attr = {
+		.ia_valid =
+		     ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME,
+		.ia_atime = stat->atime,
+		.ia_mtime = stat->mtime,
+	};
+
+	return ovl_do_notify_change(ofs, upperdentry, &attr);
+}
+
+int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry,
+		 struct kstat *stat)
+{
+	int err = 0;
+
+	if (!S_ISLNK(stat->mode)) {
+		struct iattr attr = {
+			.ia_valid = ATTR_MODE,
+			.ia_mode = stat->mode,
+		};
+		err = ovl_do_notify_change(ofs, upperdentry, &attr);
+	}
+	if (!err) {
+		struct iattr attr = {
+			.ia_valid = ATTR_UID | ATTR_GID,
+			.ia_vfsuid = VFSUIDT_INIT(stat->uid),
+			.ia_vfsgid = VFSGIDT_INIT(stat->gid),
+		};
+		err = ovl_do_notify_change(ofs, upperdentry, &attr);
+	}
+	if (!err)
+		ovl_set_timestamps(ofs, upperdentry, stat);
+
+	return err;
+}
+
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+				  bool is_upper)
+{
+	struct ovl_fh *fh;
+	int fh_type, dwords;
+	int buflen = MAX_HANDLE_SZ;
+	uuid_t *uuid = &real->d_sb->s_uuid;
+	int err;
+
+	/* Make sure the real fid stays 32bit aligned */
+	BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
+	BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);
+
+	fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
+	if (!fh)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * We encode a non-connectable file handle for non-dir, because we
+	 * only need to find the lower inode number and we don't want to pay
+	 * the price or reconnecting the dentry.
+	 */
+	dwords = buflen >> 2;
+	fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
+	buflen = (dwords << 2);
+
+	err = -EIO;
+	if (WARN_ON(fh_type < 0) ||
+	    WARN_ON(buflen > MAX_HANDLE_SZ) ||
+	    WARN_ON(fh_type == FILEID_INVALID))
+		goto out_err;
+
+	fh->fb.version = OVL_FH_VERSION;
+	fh->fb.magic = OVL_FH_MAGIC;
+	fh->fb.type = fh_type;
+	fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
+	/*
+	 * When we will want to decode an overlay dentry from this handle
+	 * and all layers are on the same fs, if we get a disconncted real
+	 * dentry when we decode fid, the only way to tell if we should assign
+	 * it to upperdentry or to lowerstack is by checking this flag.
+	 */
+	if (is_upper)
+		fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
+	fh->fb.len = sizeof(fh->fb) + buflen;
+	if (ofs->config.uuid)
+		fh->fb.uuid = *uuid;
+
+	return fh;
+
+out_err:
+	kfree(fh);
+	return ERR_PTR(err);
+}
+
+int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
+		   struct dentry *upper)
+{
+	const struct ovl_fh *fh = NULL;
+	int err;
+
+	/*
+	 * When lower layer doesn't support export operations store a 'null' fh,
+	 * so we can use the overlay.origin xattr to distignuish between a copy
+	 * up and a pure upper inode.
+	 */
+	if (ovl_can_decode_fh(lower->d_sb)) {
+		fh = ovl_encode_real_fh(ofs, lower, false);
+		if (IS_ERR(fh))
+			return PTR_ERR(fh);
+	}
+
+	/*
+	 * Do not fail when upper doesn't support xattrs.
+	 */
+	err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf,
+				 fh ? fh->fb.len : 0, 0);
+	kfree(fh);
+
+	/* Ignore -EPERM from setting "user.*" on symlink/special */
+	return err == -EPERM ? 0 : err;
+}
+
+/* Store file handle of @upper dir in @index dir entry */
+static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
+			    struct dentry *index)
+{
+	const struct ovl_fh *fh;
+	int err;
+
+	fh = ovl_encode_real_fh(ofs, upper, true);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
+	err = ovl_setxattr(ofs, index, OVL_XATTR_UPPER, fh->buf, fh->fb.len);
+
+	kfree(fh);
+	return err;
+}
+
+/*
+ * Create and install index entry.
+ *
+ * Caller must hold i_mutex on indexdir.
+ */
+static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
+			    struct dentry *upper)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
+	struct inode *dir = d_inode(indexdir);
+	struct dentry *index = NULL;
+	struct dentry *temp = NULL;
+	struct qstr name = { };
+	int err;
+
+	/*
+	 * For now this is only used for creating index entry for directories,
+	 * because non-dir are copied up directly to index and then hardlinked
+	 * to upper dir.
+	 *
+	 * TODO: implement create index for non-dir, so we can call it when
+	 * encoding file handle for non-dir in case index does not exist.
+	 */
+	if (WARN_ON(!d_is_dir(dentry)))
+		return -EIO;
+
+	/* Directory not expected to be indexed before copy up */
+	if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
+		return -EIO;
+
+	err = ovl_get_index_name(ofs, origin, &name);
+	if (err)
+		return err;
+
+	temp = ovl_create_temp(ofs, indexdir, OVL_CATTR(S_IFDIR | 0));
+	err = PTR_ERR(temp);
+	if (IS_ERR(temp))
+		goto free_name;
+
+	err = ovl_set_upper_fh(ofs, upper, temp);
+	if (err)
+		goto out;
+
+	index = ovl_lookup_upper(ofs, name.name, indexdir, name.len);
+	if (IS_ERR(index)) {
+		err = PTR_ERR(index);
+	} else {
+		err = ovl_do_rename(ofs, dir, temp, dir, index, 0);
+		dput(index);
+	}
+out:
+	if (err)
+		ovl_cleanup(ofs, dir, temp);
+	dput(temp);
+free_name:
+	kfree(name.name);
+	return err;
+}
+
+struct ovl_copy_up_ctx {
+	struct dentry *parent;
+	struct dentry *dentry;
+	struct path lowerpath;
+	struct kstat stat;
+	struct kstat pstat;
+	const char *link;
+	struct dentry *destdir;
+	struct qstr destname;
+	struct dentry *workdir;
+	bool origin;
+	bool indexed;
+	bool metacopy;
+};
+
+static int ovl_link_up(struct ovl_copy_up_ctx *c)
+{
+	int err;
+	struct dentry *upper;
+	struct dentry *upperdir = ovl_dentry_upper(c->parent);
+	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+	struct inode *udir = d_inode(upperdir);
+
+	/* Mark parent "impure" because it may now contain non-pure upper */
+	err = ovl_set_impure(c->parent, upperdir);
+	if (err)
+		return err;
+
+	err = ovl_set_nlink_lower(c->dentry);
+	if (err)
+		return err;
+
+	inode_lock_nested(udir, I_MUTEX_PARENT);
+	upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir,
+				 c->dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (!IS_ERR(upper)) {
+		err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper);
+		dput(upper);
+
+		if (!err) {
+			/* Restore timestamps on parent (best effort) */
+			ovl_set_timestamps(ofs, upperdir, &c->pstat);
+			ovl_dentry_set_upper_alias(c->dentry);
+			ovl_dentry_update_reval(c->dentry, upper);
+		}
+	}
+	inode_unlock(udir);
+	if (err)
+		return err;
+
+	err = ovl_set_nlink_upper(c->dentry);
+
+	return err;
+}
+
+static int ovl_copy_up_data(struct ovl_copy_up_ctx *c, const struct path *temp)
+{
+	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+	struct file *new_file;
+	int err;
+
+	if (!S_ISREG(c->stat.mode) || c->metacopy || !c->stat.size)
+		return 0;
+
+	new_file = ovl_path_open(temp, O_LARGEFILE | O_WRONLY);
+	if (IS_ERR(new_file))
+		return PTR_ERR(new_file);
+
+	err = ovl_copy_up_file(ofs, c->dentry, new_file, c->stat.size);
+	fput(new_file);
+
+	return err;
+}
+
+static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
+{
+	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+	struct inode *inode = d_inode(c->dentry);
+	struct path upperpath = { .mnt = ovl_upper_mnt(ofs), .dentry = temp };
+	int err;
+
+	err = ovl_copy_xattr(c->dentry->d_sb, &c->lowerpath, temp);
+	if (err)
+		return err;
+
+	if (inode->i_flags & OVL_COPY_I_FLAGS_MASK &&
+	    (S_ISREG(c->stat.mode) || S_ISDIR(c->stat.mode))) {
+		/*
+		 * Copy the fileattr inode flags that are the source of already
+		 * copied i_flags
+		 */
+		err = ovl_copy_fileattr(inode, &c->lowerpath, &upperpath);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * Store identifier of lower inode in upper inode xattr to
+	 * allow lookup of the copy up origin inode.
+	 *
+	 * Don't set origin when we are breaking the association with a lower
+	 * hard link.
+	 */
+	if (c->origin) {
+		err = ovl_set_origin(ofs, c->lowerpath.dentry, temp);
+		if (err)
+			return err;
+	}
+
+	if (c->metacopy) {
+		err = ovl_check_setxattr(ofs, temp, OVL_XATTR_METACOPY,
+					 NULL, 0, -EOPNOTSUPP);
+		if (err)
+			return err;
+	}
+
+	inode_lock(temp->d_inode);
+	if (S_ISREG(c->stat.mode))
+		err = ovl_set_size(ofs, temp, &c->stat);
+	if (!err)
+		err = ovl_set_attr(ofs, temp, &c->stat);
+	inode_unlock(temp->d_inode);
+
+	return err;
+}
+
+struct ovl_cu_creds {
+	const struct cred *old;
+	struct cred *new;
+};
+
+static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
+{
+	int err;
+
+	cc->old = cc->new = NULL;
+	err = security_inode_copy_up(dentry, &cc->new);
+	if (err < 0)
+		return err;
+
+	if (cc->new)
+		cc->old = override_creds(cc->new);
+
+	return 0;
+}
+
+static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
+{
+	if (cc->new) {
+		revert_creds(cc->old);
+		put_cred(cc->new);
+	}
+}
+
+/*
+ * Copyup using workdir to prepare temp file.  Used when copying up directories,
+ * special files or when upper fs doesn't support O_TMPFILE.
+ */
+static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
+{
+	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+	struct inode *inode;
+	struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
+	struct path path = { .mnt = ovl_upper_mnt(ofs) };
+	struct dentry *temp, *upper;
+	struct ovl_cu_creds cc;
+	int err;
+	struct ovl_cattr cattr = {
+		/* Can't properly set mode on creation because of the umask */
+		.mode = c->stat.mode & S_IFMT,
+		.rdev = c->stat.rdev,
+		.link = c->link
+	};
+
+	/* workdir and destdir could be the same when copying up to indexdir */
+	err = -EIO;
+	if (lock_rename(c->workdir, c->destdir) != NULL)
+		goto unlock;
+
+	err = ovl_prep_cu_creds(c->dentry, &cc);
+	if (err)
+		goto unlock;
+
+	temp = ovl_create_temp(ofs, c->workdir, &cattr);
+	ovl_revert_cu_creds(&cc);
+
+	err = PTR_ERR(temp);
+	if (IS_ERR(temp))
+		goto unlock;
+
+	/*
+	 * Copy up data first and then xattrs. Writing data after
+	 * xattrs will remove security.capability xattr automatically.
+	 */
+	path.dentry = temp;
+	err = ovl_copy_up_data(c, &path);
+	if (err)
+		goto cleanup;
+
+	err = ovl_copy_up_metadata(c, temp);
+	if (err)
+		goto cleanup;
+
+	if (S_ISDIR(c->stat.mode) && c->indexed) {
+		err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
+		if (err)
+			goto cleanup;
+	}
+
+	upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir,
+				 c->destname.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto cleanup;
+
+	err = ovl_do_rename(ofs, wdir, temp, udir, upper, 0);
+	dput(upper);
+	if (err)
+		goto cleanup;
+
+	if (!c->metacopy)
+		ovl_set_upperdata(d_inode(c->dentry));
+	inode = d_inode(c->dentry);
+	ovl_inode_update(inode, temp);
+	if (S_ISDIR(inode->i_mode))
+		ovl_set_flag(OVL_WHITEOUTS, inode);
+unlock:
+	unlock_rename(c->workdir, c->destdir);
+
+	return err;
+
+cleanup:
+	ovl_cleanup(ofs, wdir, temp);
+	dput(temp);
+	goto unlock;
+}
+
+/* Copyup using O_TMPFILE which does not require cross dir locking */
+static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
+{
+	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+	struct inode *udir = d_inode(c->destdir);
+	struct dentry *temp, *upper;
+	struct file *tmpfile;
+	struct ovl_cu_creds cc;
+	int err;
+
+	err = ovl_prep_cu_creds(c->dentry, &cc);
+	if (err)
+		return err;
+
+	tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
+	ovl_revert_cu_creds(&cc);
+
+	if (IS_ERR(tmpfile))
+		return PTR_ERR(tmpfile);
+
+	temp = tmpfile->f_path.dentry;
+	if (!c->metacopy && c->stat.size) {
+		err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size);
+		if (err)
+			goto out_fput;
+	}
+
+	err = ovl_copy_up_metadata(c, temp);
+	if (err)
+		goto out_fput;
+
+	inode_lock_nested(udir, I_MUTEX_PARENT);
+
+	upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir,
+				 c->destname.len);
+	err = PTR_ERR(upper);
+	if (!IS_ERR(upper)) {
+		err = ovl_do_link(ofs, temp, udir, upper);
+		dput(upper);
+	}
+	inode_unlock(udir);
+
+	if (err)
+		goto out_fput;
+
+	if (!c->metacopy)
+		ovl_set_upperdata(d_inode(c->dentry));
+	ovl_inode_update(d_inode(c->dentry), dget(temp));
+
+out_fput:
+	fput(tmpfile);
+	return err;
+}
+
+/*
+ * Copy up a single dentry
+ *
+ * All renames start with copy up of source if necessary.  The actual
+ * rename will only proceed once the copy up was successful.  Copy up uses
+ * upper parent i_mutex for exclusion.  Since rename can change d_parent it
+ * is possible that the copy up will lock the old parent.  At that point
+ * the file will have already been copied up anyway.
+ */
+static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+{
+	int err;
+	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+	bool to_index = false;
+
+	/*
+	 * Indexed non-dir is copied up directly to the index entry and then
+	 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
+	 * then index entry is created and then copied up dir installed.
+	 * Copying dir up to indexdir instead of workdir simplifies locking.
+	 */
+	if (ovl_need_index(c->dentry)) {
+		c->indexed = true;
+		if (S_ISDIR(c->stat.mode))
+			c->workdir = ovl_indexdir(c->dentry->d_sb);
+		else
+			to_index = true;
+	}
+
+	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
+		c->origin = true;
+
+	if (to_index) {
+		c->destdir = ovl_indexdir(c->dentry->d_sb);
+		err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
+		if (err)
+			return err;
+	} else if (WARN_ON(!c->parent)) {
+		/* Disconnected dentry must be copied up to index dir */
+		return -EIO;
+	} else {
+		/*
+		 * Mark parent "impure" because it may now contain non-pure
+		 * upper
+		 */
+		err = ovl_set_impure(c->parent, c->destdir);
+		if (err)
+			return err;
+	}
+
+	/* Should we copyup with O_TMPFILE or with workdir? */
+	if (S_ISREG(c->stat.mode) && ofs->tmpfile)
+		err = ovl_copy_up_tmpfile(c);
+	else
+		err = ovl_copy_up_workdir(c);
+	if (err)
+		goto out;
+
+	if (c->indexed)
+		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+
+	if (to_index) {
+		/* Initialize nlink for copy up of disconnected dentry */
+		err = ovl_set_nlink_upper(c->dentry);
+	} else {
+		struct inode *udir = d_inode(c->destdir);
+
+		/* Restore timestamps on parent (best effort) */
+		inode_lock(udir);
+		ovl_set_timestamps(ofs, c->destdir, &c->pstat);
+		inode_unlock(udir);
+
+		ovl_dentry_set_upper_alias(c->dentry);
+		ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry));
+	}
+
+out:
+	if (to_index)
+		kfree(c->destname.name);
+	return err;
+}
+
+static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
+				  int flags)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+	if (!ofs->config.metacopy)
+		return false;
+
+	if (!S_ISREG(mode))
+		return false;
+
+	if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
+		return false;
+
+	return true;
+}
+
+static ssize_t ovl_getxattr_value(const struct path *path, char *name, char **value)
+{
+	ssize_t res;
+	char *buf;
+
+	res = ovl_do_getxattr(path, name, NULL, 0);
+	if (res == -ENODATA || res == -EOPNOTSUPP)
+		res = 0;
+
+	if (res > 0) {
+		buf = kzalloc(res, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+
+		res = ovl_do_getxattr(path, name, buf, res);
+		if (res < 0)
+			kfree(buf);
+		else
+			*value = buf;
+	}
+	return res;
+}
+
+/* Copy up data of an inode which was copied up metadata only in the past. */
+static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
+{
+	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+	struct path upperpath;
+	int err;
+	char *capability = NULL;
+	ssize_t cap_size;
+
+	ovl_path_upper(c->dentry, &upperpath);
+	if (WARN_ON(upperpath.dentry == NULL))
+		return -EIO;
+
+	if (c->stat.size) {
+		err = cap_size = ovl_getxattr_value(&upperpath, XATTR_NAME_CAPS,
+						    &capability);
+		if (cap_size < 0)
+			goto out;
+	}
+
+	err = ovl_copy_up_data(c, &upperpath);
+	if (err)
+		goto out_free;
+
+	/*
+	 * Writing to upper file will clear security.capability xattr. We
+	 * don't want that to happen for normal copy-up operation.
+	 */
+	if (capability) {
+		err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS,
+				      capability, cap_size, 0);
+		if (err)
+			goto out_free;
+	}
+
+
+	err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
+	if (err)
+		goto out_free;
+
+	ovl_set_upperdata(d_inode(c->dentry));
+out_free:
+	kfree(capability);
+out:
+	return err;
+}
+
+static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
+			   int flags)
+{
+	int err;
+	DEFINE_DELAYED_CALL(done);
+	struct path parentpath;
+	struct ovl_copy_up_ctx ctx = {
+		.parent = parent,
+		.dentry = dentry,
+		.workdir = ovl_workdir(dentry),
+	};
+
+	if (WARN_ON(!ctx.workdir))
+		return -EROFS;
+
+	ovl_path_lower(dentry, &ctx.lowerpath);
+	err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
+			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+	if (err)
+		return err;
+
+	if (!kuid_has_mapping(current_user_ns(), ctx.stat.uid) ||
+	    !kgid_has_mapping(current_user_ns(), ctx.stat.gid))
+		return -EOVERFLOW;
+
+	ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
+
+	if (parent) {
+		ovl_path_upper(parent, &parentpath);
+		ctx.destdir = parentpath.dentry;
+		ctx.destname = dentry->d_name;
+
+		err = vfs_getattr(&parentpath, &ctx.pstat,
+				  STATX_ATIME | STATX_MTIME,
+				  AT_STATX_SYNC_AS_STAT);
+		if (err)
+			return err;
+	}
+
+	/* maybe truncate regular file. this has no effect on dirs */
+	if (flags & O_TRUNC)
+		ctx.stat.size = 0;
+
+	if (S_ISLNK(ctx.stat.mode)) {
+		ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
+		if (IS_ERR(ctx.link))
+			return PTR_ERR(ctx.link);
+	}
+
+	err = ovl_copy_up_start(dentry, flags);
+	/* err < 0: interrupted, err > 0: raced with another copy-up */
+	if (unlikely(err)) {
+		if (err > 0)
+			err = 0;
+	} else {
+		if (!ovl_dentry_upper(dentry))
+			err = ovl_do_copy_up(&ctx);
+		if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
+			err = ovl_link_up(&ctx);
+		if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
+			err = ovl_copy_up_meta_inode_data(&ctx);
+		ovl_copy_up_end(dentry);
+	}
+	do_delayed_call(&done);
+
+	return err;
+}
+
+static int ovl_copy_up_flags(struct dentry *dentry, int flags)
+{
+	int err = 0;
+	const struct cred *old_cred;
+	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
+
+	/*
+	 * With NFS export, copy up can get called for a disconnected non-dir.
+	 * In this case, we will copy up lower inode to index dir without
+	 * linking it to upper dir.
+	 */
+	if (WARN_ON(disconnected && d_is_dir(dentry)))
+		return -EIO;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	while (!err) {
+		struct dentry *next;
+		struct dentry *parent = NULL;
+
+		if (ovl_already_copied_up(dentry, flags))
+			break;
+
+		next = dget(dentry);
+		/* find the topmost dentry not yet copied up */
+		for (; !disconnected;) {
+			parent = dget_parent(next);
+
+			if (ovl_dentry_upper(parent))
+				break;
+
+			dput(next);
+			next = parent;
+		}
+
+		err = ovl_copy_up_one(parent, next, flags);
+
+		dput(parent);
+		dput(next);
+	}
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
+{
+	/* Copy up of disconnected dentry does not set upper alias */
+	if (ovl_already_copied_up(dentry, flags))
+		return false;
+
+	if (special_file(d_inode(dentry)->i_mode))
+		return false;
+
+	if (!ovl_open_flags_need_copy_up(flags))
+		return false;
+
+	return true;
+}
+
+int ovl_maybe_copy_up(struct dentry *dentry, int flags)
+{
+	int err = 0;
+
+	if (ovl_open_need_copy_up(dentry, flags)) {
+		err = ovl_want_write(dentry);
+		if (!err) {
+			err = ovl_copy_up_flags(dentry, flags);
+			ovl_drop_write(dentry);
+		}
+	}
+
+	return err;
+}
+
+int ovl_copy_up_with_data(struct dentry *dentry)
+{
+	return ovl_copy_up_flags(dentry, O_WRONLY);
+}
+
+int ovl_copy_up(struct dentry *dentry)
+{
+	return ovl_copy_up_flags(dentry, 0);
+}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
new file mode 100644
index 000000000..5339ff08b
--- /dev/null
+++ b/fs/overlayfs/dir.c
@@ -0,0 +1,1331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/cred.h>
+#include <linux/module.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/atomic.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+static unsigned short ovl_redirect_max = 256;
+module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
+MODULE_PARM_DESC(redirect_max,
+		 "Maximum length of absolute redirect xattr value");
+
+static int ovl_set_redirect(struct dentry *dentry, bool samedir);
+
+int ovl_cleanup(struct ovl_fs *ofs, struct inode *wdir, struct dentry *wdentry)
+{
+	int err;
+
+	dget(wdentry);
+	if (d_is_dir(wdentry))
+		err = ovl_do_rmdir(ofs, wdir, wdentry);
+	else
+		err = ovl_do_unlink(ofs, wdir, wdentry);
+	dput(wdentry);
+
+	if (err) {
+		pr_err("cleanup of '%pd2' failed (%i)\n",
+		       wdentry, err);
+	}
+
+	return err;
+}
+
+struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
+{
+	struct dentry *temp;
+	char name[20];
+	static atomic_t temp_id = ATOMIC_INIT(0);
+
+	/* counter is allowed to wrap, since temp dentries are ephemeral */
+	snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
+
+	temp = ovl_lookup_upper(ofs, name, workdir, strlen(name));
+	if (!IS_ERR(temp) && temp->d_inode) {
+		pr_err("workdir/%s already exists\n", name);
+		dput(temp);
+		temp = ERR_PTR(-EIO);
+	}
+
+	return temp;
+}
+
+/* caller holds i_mutex on workdir */
+static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
+{
+	int err;
+	struct dentry *whiteout;
+	struct dentry *workdir = ofs->workdir;
+	struct inode *wdir = workdir->d_inode;
+
+	if (!ofs->whiteout) {
+		whiteout = ovl_lookup_temp(ofs, workdir);
+		if (IS_ERR(whiteout))
+			goto out;
+
+		err = ovl_do_whiteout(ofs, wdir, whiteout);
+		if (err) {
+			dput(whiteout);
+			whiteout = ERR_PTR(err);
+			goto out;
+		}
+		ofs->whiteout = whiteout;
+	}
+
+	if (ofs->share_whiteout) {
+		whiteout = ovl_lookup_temp(ofs, workdir);
+		if (IS_ERR(whiteout))
+			goto out;
+
+		err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout);
+		if (!err)
+			goto out;
+
+		if (err != -EMLINK) {
+			pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
+				ofs->whiteout->d_inode->i_nlink, err);
+			ofs->share_whiteout = false;
+		}
+		dput(whiteout);
+	}
+	whiteout = ofs->whiteout;
+	ofs->whiteout = NULL;
+out:
+	return whiteout;
+}
+
+/* Caller must hold i_mutex on both workdir and dir */
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
+			     struct dentry *dentry)
+{
+	struct inode *wdir = ofs->workdir->d_inode;
+	struct dentry *whiteout;
+	int err;
+	int flags = 0;
+
+	whiteout = ovl_whiteout(ofs);
+	err = PTR_ERR(whiteout);
+	if (IS_ERR(whiteout))
+		return err;
+
+	if (d_is_dir(dentry))
+		flags = RENAME_EXCHANGE;
+
+	err = ovl_do_rename(ofs, wdir, whiteout, dir, dentry, flags);
+	if (err)
+		goto kill_whiteout;
+	if (flags)
+		ovl_cleanup(ofs, wdir, dentry);
+
+out:
+	dput(whiteout);
+	return err;
+
+kill_whiteout:
+	ovl_cleanup(ofs, wdir, whiteout);
+	goto out;
+}
+
+int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir,
+		   struct dentry **newdentry, umode_t mode)
+{
+	int err;
+	struct dentry *d, *dentry = *newdentry;
+
+	err = ovl_do_mkdir(ofs, dir, dentry, mode);
+	if (err)
+		return err;
+
+	if (likely(!d_unhashed(dentry)))
+		return 0;
+
+	/*
+	 * vfs_mkdir() may succeed and leave the dentry passed
+	 * to it unhashed and negative. If that happens, try to
+	 * lookup a new hashed and positive dentry.
+	 */
+	d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent,
+			     dentry->d_name.len);
+	if (IS_ERR(d)) {
+		pr_warn("failed lookup after mkdir (%pd2, err=%i).\n",
+			dentry, err);
+		return PTR_ERR(d);
+	}
+	dput(dentry);
+	*newdentry = d;
+
+	return 0;
+}
+
+struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir,
+			       struct dentry *newdentry, struct ovl_cattr *attr)
+{
+	int err;
+
+	if (IS_ERR(newdentry))
+		return newdentry;
+
+	err = -ESTALE;
+	if (newdentry->d_inode)
+		goto out;
+
+	if (attr->hardlink) {
+		err = ovl_do_link(ofs, attr->hardlink, dir, newdentry);
+	} else {
+		switch (attr->mode & S_IFMT) {
+		case S_IFREG:
+			err = ovl_do_create(ofs, dir, newdentry, attr->mode);
+			break;
+
+		case S_IFDIR:
+			/* mkdir is special... */
+			err =  ovl_mkdir_real(ofs, dir, &newdentry, attr->mode);
+			break;
+
+		case S_IFCHR:
+		case S_IFBLK:
+		case S_IFIFO:
+		case S_IFSOCK:
+			err = ovl_do_mknod(ofs, dir, newdentry, attr->mode,
+					   attr->rdev);
+			break;
+
+		case S_IFLNK:
+			err = ovl_do_symlink(ofs, dir, newdentry, attr->link);
+			break;
+
+		default:
+			err = -EPERM;
+		}
+	}
+	if (!err && WARN_ON(!newdentry->d_inode)) {
+		/*
+		 * Not quite sure if non-instantiated dentry is legal or not.
+		 * VFS doesn't seem to care so check and warn here.
+		 */
+		err = -EIO;
+	}
+out:
+	if (err) {
+		dput(newdentry);
+		return ERR_PTR(err);
+	}
+	return newdentry;
+}
+
+struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
+			       struct ovl_cattr *attr)
+{
+	return ovl_create_real(ofs, d_inode(workdir),
+			       ovl_lookup_temp(ofs, workdir), attr);
+}
+
+static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
+			       int xerr)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	int err;
+
+	err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
+	if (!err)
+		ovl_dentry_set_opaque(dentry);
+
+	return err;
+}
+
+static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+{
+	/*
+	 * Fail with -EIO when trying to create opaque dir and upper doesn't
+	 * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
+	 * return a specific error for noxattr case.
+	 */
+	return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
+}
+
+/*
+ * Common operations required to be done after creation of file on upper.
+ * If @hardlink is false, then @inode is a pre-allocated inode, we may or
+ * may not use to instantiate the new dentry.
+ */
+static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
+			   struct dentry *newdentry, bool hardlink)
+{
+	struct ovl_inode_params oip = {
+		.upperdentry = newdentry,
+		.newinode = inode,
+	};
+
+	ovl_dir_modified(dentry->d_parent, false);
+	ovl_dentry_set_upper_alias(dentry);
+	ovl_dentry_init_reval(dentry, newdentry);
+
+	if (!hardlink) {
+		/*
+		 * ovl_obtain_alias() can be called after ovl_create_real()
+		 * and before we get here, so we may get an inode from cache
+		 * with the same real upperdentry that is not the inode we
+		 * pre-allocated.  In this case we will use the cached inode
+		 * to instantiate the new dentry.
+		 *
+		 * XXX: if we ever use ovl_obtain_alias() to decode directory
+		 * file handles, need to use ovl_get_inode_locked() and
+		 * d_instantiate_new() here to prevent from creating two
+		 * hashed directory inode aliases.
+		 */
+		inode = ovl_get_inode(dentry->d_sb, &oip);
+		if (IS_ERR(inode))
+			return PTR_ERR(inode);
+		if (inode == oip.newinode)
+			ovl_set_flag(OVL_UPPERDATA, inode);
+	} else {
+		WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
+		dput(newdentry);
+		inc_nlink(inode);
+	}
+
+	d_instantiate(dentry, inode);
+	if (inode != oip.newinode) {
+		pr_warn_ratelimited("newly created inode found in cache (%pd2)\n",
+				    dentry);
+	}
+
+	/* Force lookup of new upper hardlink to find its lower */
+	if (hardlink)
+		d_drop(dentry);
+
+	return 0;
+}
+
+static bool ovl_type_merge(struct dentry *dentry)
+{
+	return OVL_TYPE_MERGE(ovl_path_type(dentry));
+}
+
+static bool ovl_type_origin(struct dentry *dentry)
+{
+	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
+}
+
+static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
+			    struct ovl_cattr *attr)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *newdentry;
+	int err;
+
+	if (!attr->hardlink && !IS_POSIXACL(udir))
+		attr->mode &= ~current_umask();
+
+	inode_lock_nested(udir, I_MUTEX_PARENT);
+	newdentry = ovl_create_real(ofs, udir,
+				    ovl_lookup_upper(ofs, dentry->d_name.name,
+						     upperdir, dentry->d_name.len),
+				    attr);
+	err = PTR_ERR(newdentry);
+	if (IS_ERR(newdentry))
+		goto out_unlock;
+
+	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
+	    !ovl_allow_offline_changes(ofs)) {
+		/* Setting opaque here is just an optimization, allow to fail */
+		ovl_set_opaque(dentry, newdentry);
+	}
+
+	err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink);
+	if (err)
+		goto out_cleanup;
+out_unlock:
+	inode_unlock(udir);
+	return err;
+
+out_cleanup:
+	ovl_cleanup(ofs, udir, newdentry);
+	dput(newdentry);
+	goto out_unlock;
+}
+
+static struct dentry *ovl_clear_empty(struct dentry *dentry,
+				      struct list_head *list)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct inode *wdir = workdir->d_inode;
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct path upperpath;
+	struct dentry *upper;
+	struct dentry *opaquedir;
+	struct kstat stat;
+	int err;
+
+	if (WARN_ON(!workdir))
+		return ERR_PTR(-EROFS);
+
+	err = ovl_lock_rename_workdir(workdir, upperdir);
+	if (err)
+		goto out;
+
+	ovl_path_upper(dentry, &upperpath);
+	err = vfs_getattr(&upperpath, &stat,
+			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+	if (err)
+		goto out_unlock;
+
+	err = -ESTALE;
+	if (!S_ISDIR(stat.mode))
+		goto out_unlock;
+	upper = upperpath.dentry;
+	if (upper->d_parent->d_inode != udir)
+		goto out_unlock;
+
+	opaquedir = ovl_create_temp(ofs, workdir, OVL_CATTR(stat.mode));
+	err = PTR_ERR(opaquedir);
+	if (IS_ERR(opaquedir))
+		goto out_unlock;
+
+	err = ovl_copy_xattr(dentry->d_sb, &upperpath, opaquedir);
+	if (err)
+		goto out_cleanup;
+
+	err = ovl_set_opaque(dentry, opaquedir);
+	if (err)
+		goto out_cleanup;
+
+	inode_lock(opaquedir->d_inode);
+	err = ovl_set_attr(ofs, opaquedir, &stat);
+	inode_unlock(opaquedir->d_inode);
+	if (err)
+		goto out_cleanup;
+
+	err = ovl_do_rename(ofs, wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
+	if (err)
+		goto out_cleanup;
+
+	ovl_cleanup_whiteouts(ofs, upper, list);
+	ovl_cleanup(ofs, wdir, upper);
+	unlock_rename(workdir, upperdir);
+
+	/* dentry's upper doesn't match now, get rid of it */
+	d_drop(dentry);
+
+	return opaquedir;
+
+out_cleanup:
+	ovl_cleanup(ofs, wdir, opaquedir);
+	dput(opaquedir);
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out:
+	return ERR_PTR(err);
+}
+
+static int ovl_set_upper_acl(struct ovl_fs *ofs, struct dentry *upperdentry,
+			     const char *name, const struct posix_acl *acl)
+{
+	void *buffer;
+	size_t size;
+	int err;
+
+	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
+		return 0;
+
+	size = posix_acl_xattr_size(acl->a_count);
+	buffer = kmalloc(size, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+	if (err < 0)
+		goto out_free;
+
+	err = ovl_do_setxattr(ofs, upperdentry, name, buffer, size, XATTR_CREATE);
+out_free:
+	kfree(buffer);
+	return err;
+}
+
+static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
+				    struct ovl_cattr *cattr)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct inode *wdir = workdir->d_inode;
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *upper;
+	struct dentry *newdentry;
+	int err;
+	struct posix_acl *acl, *default_acl;
+	bool hardlink = !!cattr->hardlink;
+
+	if (WARN_ON(!workdir))
+		return -EROFS;
+
+	if (!hardlink) {
+		err = posix_acl_create(dentry->d_parent->d_inode,
+				       &cattr->mode, &default_acl, &acl);
+		if (err)
+			return err;
+	}
+
+	err = ovl_lock_rename_workdir(workdir, upperdir);
+	if (err)
+		goto out;
+
+	upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
+				 dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto out_unlock;
+
+	err = -ESTALE;
+	if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
+		goto out_dput;
+
+	newdentry = ovl_create_temp(ofs, workdir, cattr);
+	err = PTR_ERR(newdentry);
+	if (IS_ERR(newdentry))
+		goto out_dput;
+
+	/*
+	 * mode could have been mutilated due to umask (e.g. sgid directory)
+	 */
+	if (!hardlink &&
+	    !S_ISLNK(cattr->mode) &&
+	    newdentry->d_inode->i_mode != cattr->mode) {
+		struct iattr attr = {
+			.ia_valid = ATTR_MODE,
+			.ia_mode = cattr->mode,
+		};
+		inode_lock(newdentry->d_inode);
+		err = ovl_do_notify_change(ofs, newdentry, &attr);
+		inode_unlock(newdentry->d_inode);
+		if (err)
+			goto out_cleanup;
+	}
+	if (!hardlink) {
+		err = ovl_set_upper_acl(ofs, newdentry,
+					XATTR_NAME_POSIX_ACL_ACCESS, acl);
+		if (err)
+			goto out_cleanup;
+
+		err = ovl_set_upper_acl(ofs, newdentry,
+					XATTR_NAME_POSIX_ACL_DEFAULT, default_acl);
+		if (err)
+			goto out_cleanup;
+	}
+
+	if (!hardlink && S_ISDIR(cattr->mode)) {
+		err = ovl_set_opaque(dentry, newdentry);
+		if (err)
+			goto out_cleanup;
+
+		err = ovl_do_rename(ofs, wdir, newdentry, udir, upper,
+				    RENAME_EXCHANGE);
+		if (err)
+			goto out_cleanup;
+
+		ovl_cleanup(ofs, wdir, upper);
+	} else {
+		err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, 0);
+		if (err)
+			goto out_cleanup;
+	}
+	err = ovl_instantiate(dentry, inode, newdentry, hardlink);
+	if (err) {
+		ovl_cleanup(ofs, udir, newdentry);
+		dput(newdentry);
+	}
+out_dput:
+	dput(upper);
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out:
+	if (!hardlink) {
+		posix_acl_release(acl);
+		posix_acl_release(default_acl);
+	}
+	return err;
+
+out_cleanup:
+	ovl_cleanup(ofs, wdir, newdentry);
+	dput(newdentry);
+	goto out_dput;
+}
+
+static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
+			      struct ovl_cattr *attr, bool origin)
+{
+	int err;
+	const struct cred *old_cred;
+	struct cred *override_cred;
+	struct dentry *parent = dentry->d_parent;
+
+	err = ovl_copy_up(parent);
+	if (err)
+		return err;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+
+	/*
+	 * When linking a file with copy up origin into a new parent, mark the
+	 * new parent dir "impure".
+	 */
+	if (origin) {
+		err = ovl_set_impure(parent, ovl_dentry_upper(parent));
+		if (err)
+			goto out_revert_creds;
+	}
+
+	if (!attr->hardlink) {
+		err = -ENOMEM;
+		override_cred = prepare_creds();
+		if (!override_cred)
+			goto out_revert_creds;
+		/*
+		 * In the creation cases(create, mkdir, mknod, symlink),
+		 * ovl should transfer current's fs{u,g}id to underlying
+		 * fs. Because underlying fs want to initialize its new
+		 * inode owner using current's fs{u,g}id. And in this
+		 * case, the @inode is a new inode that is initialized
+		 * in inode_init_owner() to current's fs{u,g}id. So use
+		 * the inode's i_{u,g}id to override the cred's fs{u,g}id.
+		 *
+		 * But in the other hardlink case, ovl_link() does not
+		 * create a new inode, so just use the ovl mounter's
+		 * fs{u,g}id.
+		 */
+		override_cred->fsuid = inode->i_uid;
+		override_cred->fsgid = inode->i_gid;
+		err = security_dentry_create_files_as(dentry,
+				attr->mode, &dentry->d_name, old_cred,
+				override_cred);
+		if (err) {
+			put_cred(override_cred);
+			goto out_revert_creds;
+		}
+		put_cred(override_creds(override_cred));
+		put_cred(override_cred);
+	}
+
+	if (!ovl_dentry_is_whiteout(dentry))
+		err = ovl_create_upper(dentry, inode, attr);
+	else
+		err = ovl_create_over_whiteout(dentry, inode, attr);
+
+out_revert_creds:
+	revert_creds(old_cred);
+	return err;
+}
+
+static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
+			     const char *link)
+{
+	int err;
+	struct inode *inode;
+	struct ovl_cattr attr = {
+		.rdev = rdev,
+		.link = link,
+	};
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	/* Preallocate inode to be used by ovl_get_inode() */
+	err = -ENOMEM;
+	inode = ovl_new_inode(dentry->d_sb, mode, rdev);
+	if (!inode)
+		goto out_drop_write;
+
+	spin_lock(&inode->i_lock);
+	inode->i_state |= I_CREATING;
+	spin_unlock(&inode->i_lock);
+
+	inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
+	attr.mode = inode->i_mode;
+
+	err = ovl_create_or_link(dentry, inode, &attr, false);
+	/* Did we end up using the preallocated inode? */
+	if (inode != d_inode(dentry))
+		iput(inode);
+
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static int ovl_create(struct user_namespace *mnt_userns, struct inode *dir,
+		      struct dentry *dentry, umode_t mode, bool excl)
+{
+	return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
+}
+
+static int ovl_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+		     struct dentry *dentry, umode_t mode)
+{
+	return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
+}
+
+static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+		     struct dentry *dentry, umode_t mode, dev_t rdev)
+{
+	/* Don't allow creation of "whiteout" on overlay */
+	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
+		return -EPERM;
+
+	return ovl_create_object(dentry, mode, rdev, NULL);
+}
+
+static int ovl_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+		       struct dentry *dentry, const char *link)
+{
+	return ovl_create_object(dentry, S_IFLNK, 0, link);
+}
+
+static int ovl_set_link_redirect(struct dentry *dentry)
+{
+	const struct cred *old_cred;
+	int err;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	err = ovl_set_redirect(dentry, false);
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static int ovl_link(struct dentry *old, struct inode *newdir,
+		    struct dentry *new)
+{
+	int err;
+	struct inode *inode;
+
+	err = ovl_want_write(old);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(old);
+	if (err)
+		goto out_drop_write;
+
+	err = ovl_copy_up(new->d_parent);
+	if (err)
+		goto out_drop_write;
+
+	if (ovl_is_metacopy_dentry(old)) {
+		err = ovl_set_link_redirect(old);
+		if (err)
+			goto out_drop_write;
+	}
+
+	err = ovl_nlink_start(old);
+	if (err)
+		goto out_drop_write;
+
+	inode = d_inode(old);
+	ihold(inode);
+
+	err = ovl_create_or_link(new, inode,
+			&(struct ovl_cattr) {.hardlink = ovl_dentry_upper(old)},
+			ovl_type_origin(old));
+	if (err)
+		iput(inode);
+
+	ovl_nlink_end(old);
+out_drop_write:
+	ovl_drop_write(old);
+out:
+	return err;
+}
+
+static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
+{
+	return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
+}
+
+static int ovl_remove_and_whiteout(struct dentry *dentry,
+				   struct list_head *list)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct dentry *upper;
+	struct dentry *opaquedir = NULL;
+	int err;
+
+	if (WARN_ON(!workdir))
+		return -EROFS;
+
+	if (!list_empty(list)) {
+		opaquedir = ovl_clear_empty(dentry, list);
+		err = PTR_ERR(opaquedir);
+		if (IS_ERR(opaquedir))
+			goto out;
+	}
+
+	err = ovl_lock_rename_workdir(workdir, upperdir);
+	if (err)
+		goto out_dput;
+
+	upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
+				 dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto out_unlock;
+
+	err = -ESTALE;
+	if ((opaquedir && upper != opaquedir) ||
+	    (!opaquedir && ovl_dentry_upper(dentry) &&
+	     !ovl_matches_upper(dentry, upper))) {
+		goto out_dput_upper;
+	}
+
+	err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper);
+	if (err)
+		goto out_d_drop;
+
+	ovl_dir_modified(dentry->d_parent, true);
+out_d_drop:
+	d_drop(dentry);
+out_dput_upper:
+	dput(upper);
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out_dput:
+	dput(opaquedir);
+out:
+	return err;
+}
+
+static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
+			    struct list_head *list)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *dir = upperdir->d_inode;
+	struct dentry *upper;
+	struct dentry *opaquedir = NULL;
+	int err;
+
+	if (!list_empty(list)) {
+		opaquedir = ovl_clear_empty(dentry, list);
+		err = PTR_ERR(opaquedir);
+		if (IS_ERR(opaquedir))
+			goto out;
+	}
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
+				 dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto out_unlock;
+
+	err = -ESTALE;
+	if ((opaquedir && upper != opaquedir) ||
+	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
+		goto out_dput_upper;
+
+	if (is_dir)
+		err = ovl_do_rmdir(ofs, dir, upper);
+	else
+		err = ovl_do_unlink(ofs, dir, upper);
+	ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
+
+	/*
+	 * Keeping this dentry hashed would mean having to release
+	 * upperpath/lowerpath, which could only be done if we are the
+	 * sole user of this dentry.  Too tricky...  Just unhash for
+	 * now.
+	 */
+	if (!err)
+		d_drop(dentry);
+out_dput_upper:
+	dput(upper);
+out_unlock:
+	inode_unlock(dir);
+	dput(opaquedir);
+out:
+	return err;
+}
+
+static bool ovl_pure_upper(struct dentry *dentry)
+{
+	return !ovl_dentry_lower(dentry) &&
+	       !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
+}
+
+static void ovl_drop_nlink(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	struct dentry *alias;
+
+	/* Try to find another, hashed alias */
+	spin_lock(&inode->i_lock);
+	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+		if (alias != dentry && !d_unhashed(alias))
+			break;
+	}
+	spin_unlock(&inode->i_lock);
+
+	/*
+	 * Changes to underlying layers may cause i_nlink to lose sync with
+	 * reality.  In this case prevent the link count from going to zero
+	 * prematurely.
+	 */
+	if (inode->i_nlink > !!alias)
+		drop_nlink(inode);
+}
+
+static int ovl_do_remove(struct dentry *dentry, bool is_dir)
+{
+	int err;
+	const struct cred *old_cred;
+	bool lower_positive = ovl_lower_positive(dentry);
+	LIST_HEAD(list);
+
+	/* No need to clean pure upper removed by vfs_rmdir() */
+	if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) {
+		err = ovl_check_empty_dir(dentry, &list);
+		if (err)
+			goto out;
+	}
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(dentry->d_parent);
+	if (err)
+		goto out_drop_write;
+
+	err = ovl_nlink_start(dentry);
+	if (err)
+		goto out_drop_write;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	if (!lower_positive)
+		err = ovl_remove_upper(dentry, is_dir, &list);
+	else
+		err = ovl_remove_and_whiteout(dentry, &list);
+	revert_creds(old_cred);
+	if (!err) {
+		if (is_dir)
+			clear_nlink(dentry->d_inode);
+		else
+			ovl_drop_nlink(dentry);
+	}
+	ovl_nlink_end(dentry);
+
+	/*
+	 * Copy ctime
+	 *
+	 * Note: we fail to update ctime if there was no copy-up, only a
+	 * whiteout
+	 */
+	if (ovl_dentry_upper(dentry))
+		ovl_copyattr(d_inode(dentry));
+
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	ovl_cache_free(&list);
+	return err;
+}
+
+static int ovl_unlink(struct inode *dir, struct dentry *dentry)
+{
+	return ovl_do_remove(dentry, false);
+}
+
+static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	return ovl_do_remove(dentry, true);
+}
+
+static bool ovl_type_merge_or_lower(struct dentry *dentry)
+{
+	enum ovl_path_type type = ovl_path_type(dentry);
+
+	return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
+}
+
+static bool ovl_can_move(struct dentry *dentry)
+{
+	return ovl_redirect_dir(dentry->d_sb) ||
+		!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
+}
+
+static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
+{
+	char *buf, *ret;
+	struct dentry *d, *tmp;
+	int buflen = ovl_redirect_max + 1;
+
+	if (!abs_redirect) {
+		ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
+			       GFP_KERNEL);
+		goto out;
+	}
+
+	buf = ret = kmalloc(buflen, GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	buflen--;
+	buf[buflen] = '\0';
+	for (d = dget(dentry); !IS_ROOT(d);) {
+		const char *name;
+		int thislen;
+
+		spin_lock(&d->d_lock);
+		name = ovl_dentry_get_redirect(d);
+		if (name) {
+			thislen = strlen(name);
+		} else {
+			name = d->d_name.name;
+			thislen = d->d_name.len;
+		}
+
+		/* If path is too long, fall back to userspace move */
+		if (thislen + (name[0] != '/') > buflen) {
+			ret = ERR_PTR(-EXDEV);
+			spin_unlock(&d->d_lock);
+			goto out_put;
+		}
+
+		buflen -= thislen;
+		memcpy(&buf[buflen], name, thislen);
+		spin_unlock(&d->d_lock);
+		tmp = dget_parent(d);
+
+		dput(d);
+		d = tmp;
+
+		/* Absolute redirect: finished */
+		if (buf[buflen] == '/')
+			break;
+		buflen--;
+		buf[buflen] = '/';
+	}
+	ret = kstrdup(&buf[buflen], GFP_KERNEL);
+out_put:
+	dput(d);
+	kfree(buf);
+out:
+	return ret ? ret : ERR_PTR(-ENOMEM);
+}
+
+static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
+{
+	struct dentry *lowerdentry;
+
+	if (!samedir)
+		return true;
+
+	if (d_is_dir(dentry))
+		return false;
+
+	/*
+	 * For non-dir hardlinked files, we need absolute redirects
+	 * in general as two upper hardlinks could be in different
+	 * dirs. We could put a relative redirect now and convert
+	 * it to absolute redirect later. But when nlink > 1 and
+	 * indexing is on, that means relative redirect needs to be
+	 * converted to absolute during copy up of another lower
+	 * hardllink as well.
+	 *
+	 * So without optimizing too much, just check if lower is
+	 * a hard link or not. If lower is hard link, put absolute
+	 * redirect.
+	 */
+	lowerdentry = ovl_dentry_lower(dentry);
+	return (d_inode(lowerdentry)->i_nlink > 1);
+}
+
+static int ovl_set_redirect(struct dentry *dentry, bool samedir)
+{
+	int err;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	const char *redirect = ovl_dentry_get_redirect(dentry);
+	bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
+
+	if (redirect && (!absolute_redirect || redirect[0] == '/'))
+		return 0;
+
+	redirect = ovl_get_redirect(dentry, absolute_redirect);
+	if (IS_ERR(redirect))
+		return PTR_ERR(redirect);
+
+	err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry),
+				 OVL_XATTR_REDIRECT,
+				 redirect, strlen(redirect), -EXDEV);
+	if (!err) {
+		spin_lock(&dentry->d_lock);
+		ovl_dentry_set_redirect(dentry, redirect);
+		spin_unlock(&dentry->d_lock);
+	} else {
+		kfree(redirect);
+		pr_warn_ratelimited("failed to set redirect (%i)\n",
+				    err);
+		/* Fall back to userspace copy-up */
+		err = -EXDEV;
+	}
+	return err;
+}
+
+static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
+		      struct dentry *old, struct inode *newdir,
+		      struct dentry *new, unsigned int flags)
+{
+	int err;
+	struct dentry *old_upperdir;
+	struct dentry *new_upperdir;
+	struct dentry *olddentry;
+	struct dentry *newdentry;
+	struct dentry *trap;
+	bool old_opaque;
+	bool new_opaque;
+	bool cleanup_whiteout = false;
+	bool update_nlink = false;
+	bool overwrite = !(flags & RENAME_EXCHANGE);
+	bool is_dir = d_is_dir(old);
+	bool new_is_dir = d_is_dir(new);
+	bool samedir = olddir == newdir;
+	struct dentry *opaquedir = NULL;
+	const struct cred *old_cred = NULL;
+	struct ovl_fs *ofs = OVL_FS(old->d_sb);
+	LIST_HEAD(list);
+
+	err = -EINVAL;
+	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
+		goto out;
+
+	flags &= ~RENAME_NOREPLACE;
+
+	/* Don't copy up directory trees */
+	err = -EXDEV;
+	if (!ovl_can_move(old))
+		goto out;
+	if (!overwrite && !ovl_can_move(new))
+		goto out;
+
+	if (overwrite && new_is_dir && !ovl_pure_upper(new)) {
+		err = ovl_check_empty_dir(new, &list);
+		if (err)
+			goto out;
+	}
+
+	if (overwrite) {
+		if (ovl_lower_positive(old)) {
+			if (!ovl_dentry_is_whiteout(new)) {
+				/* Whiteout source */
+				flags |= RENAME_WHITEOUT;
+			} else {
+				/* Switch whiteouts */
+				flags |= RENAME_EXCHANGE;
+			}
+		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
+			flags |= RENAME_EXCHANGE;
+			cleanup_whiteout = true;
+		}
+	}
+
+	err = ovl_want_write(old);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(old);
+	if (err)
+		goto out_drop_write;
+
+	err = ovl_copy_up(new->d_parent);
+	if (err)
+		goto out_drop_write;
+	if (!overwrite) {
+		err = ovl_copy_up(new);
+		if (err)
+			goto out_drop_write;
+	} else if (d_inode(new)) {
+		err = ovl_nlink_start(new);
+		if (err)
+			goto out_drop_write;
+
+		update_nlink = true;
+	}
+
+	old_cred = ovl_override_creds(old->d_sb);
+
+	if (!list_empty(&list)) {
+		opaquedir = ovl_clear_empty(new, &list);
+		err = PTR_ERR(opaquedir);
+		if (IS_ERR(opaquedir)) {
+			opaquedir = NULL;
+			goto out_revert_creds;
+		}
+	}
+
+	old_upperdir = ovl_dentry_upper(old->d_parent);
+	new_upperdir = ovl_dentry_upper(new->d_parent);
+
+	if (!samedir) {
+		/*
+		 * When moving a merge dir or non-dir with copy up origin into
+		 * a new parent, we are marking the new parent dir "impure".
+		 * When ovl_iterate() iterates an "impure" upper dir, it will
+		 * lookup the origin inodes of the entries to fill d_ino.
+		 */
+		if (ovl_type_origin(old)) {
+			err = ovl_set_impure(new->d_parent, new_upperdir);
+			if (err)
+				goto out_revert_creds;
+		}
+		if (!overwrite && ovl_type_origin(new)) {
+			err = ovl_set_impure(old->d_parent, old_upperdir);
+			if (err)
+				goto out_revert_creds;
+		}
+	}
+
+	trap = lock_rename(new_upperdir, old_upperdir);
+
+	olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
+				     old->d_name.len);
+	err = PTR_ERR(olddentry);
+	if (IS_ERR(olddentry))
+		goto out_unlock;
+
+	err = -ESTALE;
+	if (!ovl_matches_upper(old, olddentry))
+		goto out_dput_old;
+
+	newdentry = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir,
+				     new->d_name.len);
+	err = PTR_ERR(newdentry);
+	if (IS_ERR(newdentry))
+		goto out_dput_old;
+
+	old_opaque = ovl_dentry_is_opaque(old);
+	new_opaque = ovl_dentry_is_opaque(new);
+
+	err = -ESTALE;
+	if (d_inode(new) && ovl_dentry_upper(new)) {
+		if (opaquedir) {
+			if (newdentry != opaquedir)
+				goto out_dput;
+		} else {
+			if (!ovl_matches_upper(new, newdentry))
+				goto out_dput;
+		}
+	} else {
+		if (!d_is_negative(newdentry)) {
+			if (!new_opaque || !ovl_is_whiteout(newdentry))
+				goto out_dput;
+		} else {
+			if (flags & RENAME_EXCHANGE)
+				goto out_dput;
+		}
+	}
+
+	if (olddentry == trap)
+		goto out_dput;
+	if (newdentry == trap)
+		goto out_dput;
+
+	if (olddentry->d_inode == newdentry->d_inode)
+		goto out_dput;
+
+	err = 0;
+	if (ovl_type_merge_or_lower(old))
+		err = ovl_set_redirect(old, samedir);
+	else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
+		err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
+	if (err)
+		goto out_dput;
+
+	if (!overwrite && ovl_type_merge_or_lower(new))
+		err = ovl_set_redirect(new, samedir);
+	else if (!overwrite && new_is_dir && !new_opaque &&
+		 ovl_type_merge(old->d_parent))
+		err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
+	if (err)
+		goto out_dput;
+
+	err = ovl_do_rename(ofs, old_upperdir->d_inode, olddentry,
+			    new_upperdir->d_inode, newdentry, flags);
+	if (err)
+		goto out_dput;
+
+	if (cleanup_whiteout)
+		ovl_cleanup(ofs, old_upperdir->d_inode, newdentry);
+
+	if (overwrite && d_inode(new)) {
+		if (new_is_dir)
+			clear_nlink(d_inode(new));
+		else
+			ovl_drop_nlink(new);
+	}
+
+	ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
+			 (!overwrite && ovl_type_origin(new)));
+	ovl_dir_modified(new->d_parent, ovl_type_origin(old) ||
+			 (d_inode(new) && ovl_type_origin(new)));
+
+	/* copy ctime: */
+	ovl_copyattr(d_inode(old));
+	if (d_inode(new) && ovl_dentry_upper(new))
+		ovl_copyattr(d_inode(new));
+
+out_dput:
+	dput(newdentry);
+out_dput_old:
+	dput(olddentry);
+out_unlock:
+	unlock_rename(new_upperdir, old_upperdir);
+out_revert_creds:
+	revert_creds(old_cred);
+	if (update_nlink)
+		ovl_nlink_end(new);
+out_drop_write:
+	ovl_drop_write(old);
+out:
+	dput(opaquedir);
+	ovl_cache_free(&list);
+	return err;
+}
+
+const struct inode_operations ovl_dir_inode_operations = {
+	.lookup		= ovl_lookup,
+	.mkdir		= ovl_mkdir,
+	.symlink	= ovl_symlink,
+	.unlink		= ovl_unlink,
+	.rmdir		= ovl_rmdir,
+	.rename		= ovl_rename,
+	.link		= ovl_link,
+	.setattr	= ovl_setattr,
+	.create		= ovl_create,
+	.mknod		= ovl_mknod,
+	.permission	= ovl_permission,
+	.getattr	= ovl_getattr,
+	.listxattr	= ovl_listxattr,
+	.get_acl	= ovl_get_acl,
+	.update_time	= ovl_update_time,
+	.fileattr_get	= ovl_fileattr_get,
+	.fileattr_set	= ovl_fileattr_set,
+};
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
new file mode 100644
index 000000000..e55363d34
--- /dev/null
+++ b/fs/overlayfs/export.c
@@ -0,0 +1,876 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Overlayfs NFS export support.
+ *
+ * Amir Goldstein <amir73il@gmail.com>
+ *
+ * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved.
+ */
+
+#include <linux/fs.h>
+#include <linux/cred.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+static int ovl_encode_maybe_copy_up(struct dentry *dentry)
+{
+	int err;
+
+	if (ovl_dentry_upper(dentry))
+		return 0;
+
+	err = ovl_want_write(dentry);
+	if (!err) {
+		err = ovl_copy_up(dentry);
+		ovl_drop_write(dentry);
+	}
+
+	if (err) {
+		pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n",
+				    dentry, err);
+	}
+
+	return err;
+}
+
+/*
+ * Before encoding a non-upper directory file handle from real layer N, we need
+ * to check if it will be possible to reconnect an overlay dentry from the real
+ * lower decoded dentry. This is done by following the overlay ancestry up to a
+ * "layer N connected" ancestor and verifying that all parents along the way are
+ * "layer N connectable". If an ancestor that is NOT "layer N connectable" is
+ * found, we need to copy up an ancestor, which is "layer N connectable", thus
+ * making that ancestor "layer N connected". For example:
+ *
+ * layer 1: /a
+ * layer 2: /a/b/c
+ *
+ * The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is
+ * copied up and renamed, upper dir /a will be indexed by lower dir /a from
+ * layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*)
+ * in ovl_lookup_real_ancestor() will not be able to lookup a connected overlay
+ * dentry from the connected lower dentry /a/b/c.
+ *
+ * To avoid this problem on decode time, we need to copy up an ancestor of
+ * /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is
+ * /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected"
+ * and when the time comes to decode the file handle from lower dentry /a/b/c,
+ * ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding
+ * a connected overlay dentry will be accomplished.
+ *
+ * (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an
+ * entry /a in the lower layers above layer N and find the indexed dir /a from
+ * layer 1. If that improvement is made, then the check for "layer N connected"
+ * will need to verify there are no redirects in lower layers above N. In the
+ * example above, /a will be "layer 2 connectable". However, if layer 2 dir /a
+ * is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable":
+ *
+ * layer 1: /A (redirect = /a)
+ * layer 2: /a/b/c
+ */
+
+/* Return the lowest layer for encoding a connectable file handle */
+static int ovl_connectable_layer(struct dentry *dentry)
+{
+	struct ovl_entry *oe = OVL_E(dentry);
+
+	/* We can get overlay root from root of any layer */
+	if (dentry == dentry->d_sb->s_root)
+		return oe->numlower;
+
+	/*
+	 * If it's an unindexed merge dir, then it's not connectable with any
+	 * lower layer
+	 */
+	if (ovl_dentry_upper(dentry) &&
+	    !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		return 0;
+
+	/* We can get upper/overlay path from indexed/lower dentry */
+	return oe->lowerstack[0].layer->idx;
+}
+
+/*
+ * @dentry is "connected" if all ancestors up to root or a "connected" ancestor
+ * have the same uppermost lower layer as the origin's layer. We may need to
+ * copy up a "connectable" ancestor to make it "connected". A "connected" dentry
+ * cannot become non "connected", so cache positive result in dentry flags.
+ *
+ * Return the connected origin layer or < 0 on error.
+ */
+static int ovl_connect_layer(struct dentry *dentry)
+{
+	struct dentry *next, *parent = NULL;
+	int origin_layer;
+	int err = 0;
+
+	if (WARN_ON(dentry == dentry->d_sb->s_root) ||
+	    WARN_ON(!ovl_dentry_lower(dentry)))
+		return -EIO;
+
+	origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx;
+	if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry))
+		return origin_layer;
+
+	/* Find the topmost origin layer connectable ancestor of @dentry */
+	next = dget(dentry);
+	for (;;) {
+		parent = dget_parent(next);
+		if (WARN_ON(parent == next)) {
+			err = -EIO;
+			break;
+		}
+
+		/*
+		 * If @parent is not origin layer connectable, then copy up
+		 * @next which is origin layer connectable and we are done.
+		 */
+		if (ovl_connectable_layer(parent) < origin_layer) {
+			err = ovl_encode_maybe_copy_up(next);
+			break;
+		}
+
+		/* If @parent is connected or indexed we are done */
+		if (ovl_dentry_test_flag(OVL_E_CONNECTED, parent) ||
+		    ovl_test_flag(OVL_INDEX, d_inode(parent)))
+			break;
+
+		dput(next);
+		next = parent;
+	}
+
+	dput(parent);
+	dput(next);
+
+	if (!err)
+		ovl_dentry_set_flag(OVL_E_CONNECTED, dentry);
+
+	return err ?: origin_layer;
+}
+
+/*
+ * We only need to encode origin if there is a chance that the same object was
+ * encoded pre copy up and then we need to stay consistent with the same
+ * encoding also after copy up. If non-pure upper is not indexed, then it was
+ * copied up before NFS export was enabled. In that case we don't need to worry
+ * about staying consistent with pre copy up encoding and we encode an upper
+ * file handle. Overlay root dentry is a private case of non-indexed upper.
+ *
+ * The following table summarizes the different file handle encodings used for
+ * different overlay object types:
+ *
+ *  Object type		| Encoding
+ * --------------------------------
+ *  Pure upper		| U
+ *  Non-indexed upper	| U
+ *  Indexed upper	| L (*)
+ *  Non-upper		| L (*)
+ *
+ * U = upper file handle
+ * L = lower file handle
+ *
+ * (*) Connecting an overlay dir from real lower dentry is not always
+ * possible when there are redirects in lower layers and non-indexed merge dirs.
+ * To mitigate those case, we may copy up the lower dir ancestor before encode
+ * a lower dir file handle.
+ *
+ * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
+ */
+static int ovl_check_encode_origin(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+	/* Upper file handle for pure upper */
+	if (!ovl_dentry_lower(dentry))
+		return 0;
+
+	/*
+	 * Upper file handle for non-indexed upper.
+	 *
+	 * Root is never indexed, so if there's an upper layer, encode upper for
+	 * root.
+	 */
+	if (ovl_dentry_upper(dentry) &&
+	    !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		return 0;
+
+	/*
+	 * Decoding a merge dir, whose origin's ancestor is under a redirected
+	 * lower dir or under a non-indexed upper is not always possible.
+	 * ovl_connect_layer() will try to make origin's layer "connected" by
+	 * copying up a "connectable" ancestor.
+	 */
+	if (d_is_dir(dentry) && ovl_upper_mnt(ofs))
+		return ovl_connect_layer(dentry);
+
+	/* Lower file handle for indexed and non-upper dir/non-dir */
+	return 1;
+}
+
+static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
+			     u32 *fid, int buflen)
+{
+	struct ovl_fh *fh = NULL;
+	int err, enc_lower;
+	int len;
+
+	/*
+	 * Check if we should encode a lower or upper file handle and maybe
+	 * copy up an ancestor to make lower file handle connectable.
+	 */
+	err = enc_lower = ovl_check_encode_origin(dentry);
+	if (enc_lower < 0)
+		goto fail;
+
+	/* Encode an upper or lower file handle */
+	fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) :
+				ovl_dentry_upper(dentry), !enc_lower);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
+	len = OVL_FH_LEN(fh);
+	if (len <= buflen)
+		memcpy(fid, fh, len);
+	err = len;
+
+out:
+	kfree(fh);
+	return err;
+
+fail:
+	pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n",
+			    dentry, err);
+	goto out;
+}
+
+static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
+			 struct inode *parent)
+{
+	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+	struct dentry *dentry;
+	int bytes, buflen = *max_len << 2;
+
+	/* TODO: encode connectable file handles */
+	if (parent)
+		return FILEID_INVALID;
+
+	dentry = d_find_any_alias(inode);
+	if (!dentry)
+		return FILEID_INVALID;
+
+	bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen);
+	dput(dentry);
+	if (bytes <= 0)
+		return FILEID_INVALID;
+
+	*max_len = bytes >> 2;
+	if (bytes > buflen)
+		return FILEID_INVALID;
+
+	return OVL_FILEID_V1;
+}
+
+/*
+ * Find or instantiate an overlay dentry from real dentries and index.
+ */
+static struct dentry *ovl_obtain_alias(struct super_block *sb,
+				       struct dentry *upper_alias,
+				       struct ovl_path *lowerpath,
+				       struct dentry *index)
+{
+	struct dentry *lower = lowerpath ? lowerpath->dentry : NULL;
+	struct dentry *upper = upper_alias ?: index;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct ovl_entry *oe;
+	struct ovl_inode_params oip = {
+		.lowerpath = lowerpath,
+		.index = index,
+		.numlower = !!lower
+	};
+
+	/* We get overlay directory dentries with ovl_lookup_real() */
+	if (d_is_dir(upper ?: lower))
+		return ERR_PTR(-EIO);
+
+	oip.upperdentry = dget(upper);
+	inode = ovl_get_inode(sb, &oip);
+	if (IS_ERR(inode)) {
+		dput(upper);
+		return ERR_CAST(inode);
+	}
+
+	if (upper)
+		ovl_set_flag(OVL_UPPERDATA, inode);
+
+	dentry = d_find_any_alias(inode);
+	if (dentry)
+		goto out_iput;
+
+	dentry = d_alloc_anon(inode->i_sb);
+	if (unlikely(!dentry))
+		goto nomem;
+	oe = ovl_alloc_entry(lower ? 1 : 0);
+	if (!oe)
+		goto nomem;
+
+	if (lower) {
+		oe->lowerstack->dentry = dget(lower);
+		oe->lowerstack->layer = lowerpath->layer;
+	}
+	dentry->d_fsdata = oe;
+	if (upper_alias)
+		ovl_dentry_set_upper_alias(dentry);
+
+	ovl_dentry_init_reval(dentry, upper);
+
+	return d_instantiate_anon(dentry, inode);
+
+nomem:
+	dput(dentry);
+	dentry = ERR_PTR(-ENOMEM);
+out_iput:
+	iput(inode);
+	return dentry;
+}
+
+/* Get the upper or lower dentry in stach whose on layer @idx */
+static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	int i;
+
+	if (!idx)
+		return ovl_dentry_upper(dentry);
+
+	for (i = 0; i < oe->numlower; i++) {
+		if (oe->lowerstack[i].layer->idx == idx)
+			return oe->lowerstack[i].dentry;
+	}
+
+	return NULL;
+}
+
+/*
+ * Lookup a child overlay dentry to get a connected overlay dentry whose real
+ * dentry is @real. If @real is on upper layer, we lookup a child overlay
+ * dentry with the same name as the real dentry. Otherwise, we need to consult
+ * index for lookup.
+ */
+static struct dentry *ovl_lookup_real_one(struct dentry *connected,
+					  struct dentry *real,
+					  const struct ovl_layer *layer)
+{
+	struct inode *dir = d_inode(connected);
+	struct dentry *this, *parent = NULL;
+	struct name_snapshot name;
+	int err;
+
+	/*
+	 * Lookup child overlay dentry by real name. The dir mutex protects us
+	 * from racing with overlay rename. If the overlay dentry that is above
+	 * real has already been moved to a parent that is not under the
+	 * connected overlay dir, we return -ECHILD and restart the lookup of
+	 * connected real path from the top.
+	 */
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	err = -ECHILD;
+	parent = dget_parent(real);
+	if (ovl_dentry_real_at(connected, layer->idx) != parent)
+		goto fail;
+
+	/*
+	 * We also need to take a snapshot of real dentry name to protect us
+	 * from racing with underlying layer rename. In this case, we don't
+	 * care about returning ESTALE, only from dereferencing a free name
+	 * pointer because we hold no lock on the real dentry.
+	 */
+	take_dentry_name_snapshot(&name, real);
+	/*
+	 * No mnt_userns handling here: it's an internal lookup.  Could skip
+	 * permission checking altogether, but for now just use non-mnt_userns
+	 * transformed ids.
+	 */
+	this = lookup_one_len(name.name.name, connected, name.name.len);
+	release_dentry_name_snapshot(&name);
+	err = PTR_ERR(this);
+	if (IS_ERR(this)) {
+		goto fail;
+	} else if (!this || !this->d_inode) {
+		dput(this);
+		err = -ENOENT;
+		goto fail;
+	} else if (ovl_dentry_real_at(this, layer->idx) != real) {
+		dput(this);
+		err = -ESTALE;
+		goto fail;
+	}
+
+out:
+	dput(parent);
+	inode_unlock(dir);
+	return this;
+
+fail:
+	pr_warn_ratelimited("failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+			    real, layer->idx, connected, err);
+	this = ERR_PTR(err);
+	goto out;
+}
+
+static struct dentry *ovl_lookup_real(struct super_block *sb,
+				      struct dentry *real,
+				      const struct ovl_layer *layer);
+
+/*
+ * Lookup an indexed or hashed overlay dentry by real inode.
+ */
+static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
+					    struct dentry *real,
+					    const struct ovl_layer *layer)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct dentry *index = NULL;
+	struct dentry *this = NULL;
+	struct inode *inode;
+
+	/*
+	 * Decoding upper dir from index is expensive, so first try to lookup
+	 * overlay dentry in inode/dcache.
+	 */
+	inode = ovl_lookup_inode(sb, real, !layer->idx);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	if (inode) {
+		this = d_find_any_alias(inode);
+		iput(inode);
+	}
+
+	/*
+	 * For decoded lower dir file handle, lookup index by origin to check
+	 * if lower dir was copied up and and/or removed.
+	 */
+	if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) {
+		index = ovl_lookup_index(ofs, NULL, real, false);
+		if (IS_ERR(index))
+			return index;
+	}
+
+	/* Get connected upper overlay dir from index */
+	if (index) {
+		struct dentry *upper = ovl_index_upper(ofs, index);
+
+		dput(index);
+		if (IS_ERR_OR_NULL(upper))
+			return upper;
+
+		/*
+		 * ovl_lookup_real() in lower layer may call recursively once to
+		 * ovl_lookup_real() in upper layer. The first level call walks
+		 * back lower parents to the topmost indexed parent. The second
+		 * recursive call walks back from indexed upper to the topmost
+		 * connected/hashed upper parent (or up to root).
+		 */
+		this = ovl_lookup_real(sb, upper, &ofs->layers[0]);
+		dput(upper);
+	}
+
+	if (IS_ERR_OR_NULL(this))
+		return this;
+
+	if (ovl_dentry_real_at(this, layer->idx) != real) {
+		dput(this);
+		this = ERR_PTR(-EIO);
+	}
+
+	return this;
+}
+
+/*
+ * Lookup an indexed or hashed overlay dentry, whose real dentry is an
+ * ancestor of @real.
+ */
+static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
+					       struct dentry *real,
+					       const struct ovl_layer *layer)
+{
+	struct dentry *next, *parent = NULL;
+	struct dentry *ancestor = ERR_PTR(-EIO);
+
+	if (real == layer->mnt->mnt_root)
+		return dget(sb->s_root);
+
+	/* Find the topmost indexed or hashed ancestor */
+	next = dget(real);
+	for (;;) {
+		parent = dget_parent(next);
+
+		/*
+		 * Lookup a matching overlay dentry in inode/dentry
+		 * cache or in index by real inode.
+		 */
+		ancestor = ovl_lookup_real_inode(sb, next, layer);
+		if (ancestor)
+			break;
+
+		if (parent == layer->mnt->mnt_root) {
+			ancestor = dget(sb->s_root);
+			break;
+		}
+
+		/*
+		 * If @real has been moved out of the layer root directory,
+		 * we will eventully hit the real fs root. This cannot happen
+		 * by legit overlay rename, so we return error in that case.
+		 */
+		if (parent == next) {
+			ancestor = ERR_PTR(-EXDEV);
+			break;
+		}
+
+		dput(next);
+		next = parent;
+	}
+
+	dput(parent);
+	dput(next);
+
+	return ancestor;
+}
+
+/*
+ * Lookup a connected overlay dentry whose real dentry is @real.
+ * If @real is on upper layer, we lookup a child overlay dentry with the same
+ * path the real dentry. Otherwise, we need to consult index for lookup.
+ */
+static struct dentry *ovl_lookup_real(struct super_block *sb,
+				      struct dentry *real,
+				      const struct ovl_layer *layer)
+{
+	struct dentry *connected;
+	int err = 0;
+
+	connected = ovl_lookup_real_ancestor(sb, real, layer);
+	if (IS_ERR(connected))
+		return connected;
+
+	while (!err) {
+		struct dentry *next, *this;
+		struct dentry *parent = NULL;
+		struct dentry *real_connected = ovl_dentry_real_at(connected,
+								   layer->idx);
+
+		if (real_connected == real)
+			break;
+
+		/* Find the topmost dentry not yet connected */
+		next = dget(real);
+		for (;;) {
+			parent = dget_parent(next);
+
+			if (parent == real_connected)
+				break;
+
+			/*
+			 * If real has been moved out of 'real_connected',
+			 * we will not find 'real_connected' and hit the layer
+			 * root. In that case, we need to restart connecting.
+			 * This game can go on forever in the worst case. We
+			 * may want to consider taking s_vfs_rename_mutex if
+			 * this happens more than once.
+			 */
+			if (parent == layer->mnt->mnt_root) {
+				dput(connected);
+				connected = dget(sb->s_root);
+				break;
+			}
+
+			/*
+			 * If real file has been moved out of the layer root
+			 * directory, we will eventully hit the real fs root.
+			 * This cannot happen by legit overlay rename, so we
+			 * return error in that case.
+			 */
+			if (parent == next) {
+				err = -EXDEV;
+				break;
+			}
+
+			dput(next);
+			next = parent;
+		}
+
+		if (!err) {
+			this = ovl_lookup_real_one(connected, next, layer);
+			if (IS_ERR(this))
+				err = PTR_ERR(this);
+
+			/*
+			 * Lookup of child in overlay can fail when racing with
+			 * overlay rename of child away from 'connected' parent.
+			 * In this case, we need to restart the lookup from the
+			 * top, because we cannot trust that 'real_connected' is
+			 * still an ancestor of 'real'. There is a good chance
+			 * that the renamed overlay ancestor is now in cache, so
+			 * ovl_lookup_real_ancestor() will find it and we can
+			 * continue to connect exactly from where lookup failed.
+			 */
+			if (err == -ECHILD) {
+				this = ovl_lookup_real_ancestor(sb, real,
+								layer);
+				err = PTR_ERR_OR_ZERO(this);
+			}
+			if (!err) {
+				dput(connected);
+				connected = this;
+			}
+		}
+
+		dput(parent);
+		dput(next);
+	}
+
+	if (err)
+		goto fail;
+
+	return connected;
+
+fail:
+	pr_warn_ratelimited("failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+			    real, layer->idx, connected, err);
+	dput(connected);
+	return ERR_PTR(err);
+}
+
+/*
+ * Get an overlay dentry from upper/lower real dentries and index.
+ */
+static struct dentry *ovl_get_dentry(struct super_block *sb,
+				     struct dentry *upper,
+				     struct ovl_path *lowerpath,
+				     struct dentry *index)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	const struct ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer;
+	struct dentry *real = upper ?: (index ?: lowerpath->dentry);
+
+	/*
+	 * Obtain a disconnected overlay dentry from a non-dir real dentry
+	 * and index.
+	 */
+	if (!d_is_dir(real))
+		return ovl_obtain_alias(sb, upper, lowerpath, index);
+
+	/* Removed empty directory? */
+	if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real))
+		return ERR_PTR(-ENOENT);
+
+	/*
+	 * If real dentry is connected and hashed, get a connected overlay
+	 * dentry whose real dentry is @real.
+	 */
+	return ovl_lookup_real(sb, real, layer);
+}
+
+static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
+					struct ovl_fh *fh)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct dentry *dentry;
+	struct dentry *upper;
+
+	if (!ovl_upper_mnt(ofs))
+		return ERR_PTR(-EACCES);
+
+	upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
+	if (IS_ERR_OR_NULL(upper))
+		return upper;
+
+	dentry = ovl_get_dentry(sb, upper, NULL, NULL);
+	dput(upper);
+
+	return dentry;
+}
+
+static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
+					struct ovl_fh *fh)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct ovl_path origin = { };
+	struct ovl_path *stack = &origin;
+	struct dentry *dentry = NULL;
+	struct dentry *index = NULL;
+	struct inode *inode;
+	int err;
+
+	/* First lookup overlay inode in inode cache by origin fh */
+	err = ovl_check_origin_fh(ofs, fh, false, NULL, &stack);
+	if (err)
+		return ERR_PTR(err);
+
+	if (!d_is_dir(origin.dentry) ||
+	    !(origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
+		inode = ovl_lookup_inode(sb, origin.dentry, false);
+		err = PTR_ERR(inode);
+		if (IS_ERR(inode))
+			goto out_err;
+		if (inode) {
+			dentry = d_find_any_alias(inode);
+			iput(inode);
+			if (dentry)
+				goto out;
+		}
+	}
+
+	/* Then lookup indexed upper/whiteout by origin fh */
+	if (ofs->indexdir) {
+		index = ovl_get_index_fh(ofs, fh);
+		err = PTR_ERR(index);
+		if (IS_ERR(index)) {
+			index = NULL;
+			goto out_err;
+		}
+	}
+
+	/* Then try to get a connected upper dir by index */
+	if (index && d_is_dir(index)) {
+		struct dentry *upper = ovl_index_upper(ofs, index);
+
+		err = PTR_ERR(upper);
+		if (IS_ERR_OR_NULL(upper))
+			goto out_err;
+
+		dentry = ovl_get_dentry(sb, upper, NULL, NULL);
+		dput(upper);
+		goto out;
+	}
+
+	/* Find origin.dentry again with ovl_acceptable() layer check */
+	if (d_is_dir(origin.dentry)) {
+		dput(origin.dentry);
+		origin.dentry = NULL;
+		err = ovl_check_origin_fh(ofs, fh, true, NULL, &stack);
+		if (err)
+			goto out_err;
+	}
+	if (index) {
+		err = ovl_verify_origin(ofs, index, origin.dentry, false);
+		if (err)
+			goto out_err;
+	}
+
+	/* Get a connected non-upper dir or disconnected non-dir */
+	dentry = ovl_get_dentry(sb, NULL, &origin, index);
+
+out:
+	dput(origin.dentry);
+	dput(index);
+	return dentry;
+
+out_err:
+	dentry = ERR_PTR(err);
+	goto out;
+}
+
+static struct ovl_fh *ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type)
+{
+	struct ovl_fh *fh;
+
+	/* If on-wire inner fid is aligned - nothing to do */
+	if (fh_type == OVL_FILEID_V1)
+		return (struct ovl_fh *)fid;
+
+	if (fh_type != OVL_FILEID_V0)
+		return ERR_PTR(-EINVAL);
+
+	if (buflen <= OVL_FH_WIRE_OFFSET)
+		return ERR_PTR(-EINVAL);
+
+	fh = kzalloc(buflen, GFP_KERNEL);
+	if (!fh)
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy unaligned inner fh into aligned buffer */
+	memcpy(fh->buf, fid, buflen - OVL_FH_WIRE_OFFSET);
+	return fh;
+}
+
+static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
+				       int fh_len, int fh_type)
+{
+	struct dentry *dentry = NULL;
+	struct ovl_fh *fh = NULL;
+	int len = fh_len << 2;
+	unsigned int flags = 0;
+	int err;
+
+	fh = ovl_fid_to_fh(fid, len, fh_type);
+	err = PTR_ERR(fh);
+	if (IS_ERR(fh))
+		goto out_err;
+
+	err = ovl_check_fh_len(fh, len);
+	if (err)
+		goto out_err;
+
+	flags = fh->fb.flags;
+	dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
+		 ovl_upper_fh_to_d(sb, fh) :
+		 ovl_lower_fh_to_d(sb, fh);
+	err = PTR_ERR(dentry);
+	if (IS_ERR(dentry) && err != -ESTALE)
+		goto out_err;
+
+out:
+	/* We may have needed to re-align OVL_FILEID_V0 */
+	if (!IS_ERR_OR_NULL(fh) && fh != (void *)fid)
+		kfree(fh);
+
+	return dentry;
+
+out_err:
+	pr_warn_ratelimited("failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
+			    fh_len, fh_type, flags, err);
+	dentry = ERR_PTR(err);
+	goto out;
+}
+
+static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
+				       int fh_len, int fh_type)
+{
+	pr_warn_ratelimited("connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
+	return ERR_PTR(-EACCES);
+}
+
+static int ovl_get_name(struct dentry *parent, char *name,
+			struct dentry *child)
+{
+	/*
+	 * ovl_fh_to_dentry() returns connected dir overlay dentries and
+	 * ovl_fh_to_parent() is not implemented, so we should not get here.
+	 */
+	WARN_ON_ONCE(1);
+	return -EIO;
+}
+
+static struct dentry *ovl_get_parent(struct dentry *dentry)
+{
+	/*
+	 * ovl_fh_to_dentry() returns connected dir overlay dentries, so we
+	 * should not get here.
+	 */
+	WARN_ON_ONCE(1);
+	return ERR_PTR(-EIO);
+}
+
+const struct export_operations ovl_export_operations = {
+	.encode_fh	= ovl_encode_fh,
+	.fh_to_dentry	= ovl_fh_to_dentry,
+	.fh_to_parent	= ovl_fh_to_parent,
+	.get_name	= ovl_get_name,
+	.get_parent	= ovl_get_parent,
+};
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
new file mode 100644
index 000000000..cc0c07716
--- /dev/null
+++ b/fs/overlayfs/file.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ */
+
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/xattr.h>
+#include <linux/uio.h>
+#include <linux/uaccess.h>
+#include <linux/splice.h>
+#include <linux/security.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include "overlayfs.h"
+
+struct ovl_aio_req {
+	struct kiocb iocb;
+	refcount_t ref;
+	struct kiocb *orig_iocb;
+};
+
+static struct kmem_cache *ovl_aio_request_cachep;
+
+static char ovl_whatisit(struct inode *inode, struct inode *realinode)
+{
+	if (realinode != ovl_inode_upper(inode))
+		return 'l';
+	if (ovl_has_upperdata(inode))
+		return 'u';
+	else
+		return 'm';
+}
+
+/* No atime modificaton nor notify on underlying */
+#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
+
+static struct file *ovl_open_realfile(const struct file *file,
+				      const struct path *realpath)
+{
+	struct inode *realinode = d_inode(realpath->dentry);
+	struct inode *inode = file_inode(file);
+	struct user_namespace *real_mnt_userns;
+	struct file *realfile;
+	const struct cred *old_cred;
+	int flags = file->f_flags | OVL_OPEN_FLAGS;
+	int acc_mode = ACC_MODE(flags);
+	int err;
+
+	if (flags & O_APPEND)
+		acc_mode |= MAY_APPEND;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	real_mnt_userns = mnt_user_ns(realpath->mnt);
+	err = inode_permission(real_mnt_userns, realinode, MAY_OPEN | acc_mode);
+	if (err) {
+		realfile = ERR_PTR(err);
+	} else {
+		if (!inode_owner_or_capable(real_mnt_userns, realinode))
+			flags &= ~O_NOATIME;
+
+		realfile = open_with_fake_path(&file->f_path, flags, realinode,
+					       current_cred());
+	}
+	revert_creds(old_cred);
+
+	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
+		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
+		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
+
+	return realfile;
+}
+
+#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
+
+static int ovl_change_flags(struct file *file, unsigned int flags)
+{
+	struct inode *inode = file_inode(file);
+	int err;
+
+	flags &= OVL_SETFL_MASK;
+
+	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
+		return -EPERM;
+
+	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
+		return -EINVAL;
+
+	if (file->f_op->check_flags) {
+		err = file->f_op->check_flags(flags);
+		if (err)
+			return err;
+	}
+
+	spin_lock(&file->f_lock);
+	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
+	file->f_iocb_flags = iocb_flags(file);
+	spin_unlock(&file->f_lock);
+
+	return 0;
+}
+
+static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
+			       bool allow_meta)
+{
+	struct dentry *dentry = file_dentry(file);
+	struct path realpath;
+
+	real->flags = 0;
+	real->file = file->private_data;
+
+	if (allow_meta)
+		ovl_path_real(dentry, &realpath);
+	else
+		ovl_path_realdata(dentry, &realpath);
+
+	/* Has it been copied up since we'd opened it? */
+	if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
+		real->flags = FDPUT_FPUT;
+		real->file = ovl_open_realfile(file, &realpath);
+
+		return PTR_ERR_OR_ZERO(real->file);
+	}
+
+	/* Did the flags change since open? */
+	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
+		return ovl_change_flags(real->file, file->f_flags);
+
+	return 0;
+}
+
+static int ovl_real_fdget(const struct file *file, struct fd *real)
+{
+	if (d_is_dir(file_dentry(file))) {
+		real->flags = 0;
+		real->file = ovl_dir_real_file(file, false);
+
+		return PTR_ERR_OR_ZERO(real->file);
+	}
+
+	return ovl_real_fdget_meta(file, real, false);
+}
+
+static int ovl_open(struct inode *inode, struct file *file)
+{
+	struct dentry *dentry = file_dentry(file);
+	struct file *realfile;
+	struct path realpath;
+	int err;
+
+	err = ovl_maybe_copy_up(dentry, file->f_flags);
+	if (err)
+		return err;
+
+	/* No longer need these flags, so don't pass them on to underlying fs */
+	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+
+	ovl_path_realdata(dentry, &realpath);
+	realfile = ovl_open_realfile(file, &realpath);
+	if (IS_ERR(realfile))
+		return PTR_ERR(realfile);
+
+	file->private_data = realfile;
+
+	return 0;
+}
+
+static int ovl_release(struct inode *inode, struct file *file)
+{
+	fput(file->private_data);
+
+	return 0;
+}
+
+static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file_inode(file);
+	struct fd real;
+	const struct cred *old_cred;
+	loff_t ret;
+
+	/*
+	 * The two special cases below do not need to involve real fs,
+	 * so we can optimizing concurrent callers.
+	 */
+	if (offset == 0) {
+		if (whence == SEEK_CUR)
+			return file->f_pos;
+
+		if (whence == SEEK_SET)
+			return vfs_setpos(file, 0, 0);
+	}
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		return ret;
+
+	/*
+	 * Overlay file f_pos is the master copy that is preserved
+	 * through copy up and modified on read/write, but only real
+	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
+	 * limitations that are more strict than ->s_maxbytes for specific
+	 * files, so we use the real file to perform seeks.
+	 */
+	ovl_inode_lock(inode);
+	real.file->f_pos = file->f_pos;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	ret = vfs_llseek(real.file, offset, whence);
+	revert_creds(old_cred);
+
+	file->f_pos = real.file->f_pos;
+	ovl_inode_unlock(inode);
+
+	fdput(real);
+
+	return ret;
+}
+
+static void ovl_file_accessed(struct file *file)
+{
+	struct inode *inode, *upperinode;
+
+	if (file->f_flags & O_NOATIME)
+		return;
+
+	inode = file_inode(file);
+	upperinode = ovl_inode_upper(inode);
+
+	if (!upperinode)
+		return;
+
+	if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
+	     !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
+		inode->i_mtime = upperinode->i_mtime;
+		inode->i_ctime = upperinode->i_ctime;
+	}
+
+	touch_atime(&file->f_path);
+}
+
+static rwf_t ovl_iocb_to_rwf(int ifl)
+{
+	rwf_t flags = 0;
+
+	if (ifl & IOCB_NOWAIT)
+		flags |= RWF_NOWAIT;
+	if (ifl & IOCB_HIPRI)
+		flags |= RWF_HIPRI;
+	if (ifl & IOCB_DSYNC)
+		flags |= RWF_DSYNC;
+	if (ifl & IOCB_SYNC)
+		flags |= RWF_SYNC;
+
+	return flags;
+}
+
+static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
+{
+	if (refcount_dec_and_test(&aio_req->ref)) {
+		fput(aio_req->iocb.ki_filp);
+		kmem_cache_free(ovl_aio_request_cachep, aio_req);
+	}
+}
+
+static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
+{
+	struct kiocb *iocb = &aio_req->iocb;
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	if (iocb->ki_flags & IOCB_WRITE) {
+		struct inode *inode = file_inode(orig_iocb->ki_filp);
+
+		/* Actually acquired in ovl_write_iter() */
+		__sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+				      SB_FREEZE_WRITE);
+		file_end_write(iocb->ki_filp);
+		ovl_copyattr(inode);
+	}
+
+	orig_iocb->ki_pos = iocb->ki_pos;
+	ovl_aio_put(aio_req);
+}
+
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
+{
+	struct ovl_aio_req *aio_req = container_of(iocb,
+						   struct ovl_aio_req, iocb);
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	ovl_aio_cleanup_handler(aio_req);
+	orig_iocb->ki_complete(orig_iocb, res);
+}
+
+static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	struct fd real;
+	const struct cred *old_cred;
+	ssize_t ret;
+
+	if (!iov_iter_count(iter))
+		return 0;
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	if (iocb->ki_flags & IOCB_DIRECT &&
+	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
+		goto out_fdput;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	if (is_sync_kiocb(iocb)) {
+		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+				    ovl_iocb_to_rwf(iocb->ki_flags));
+	} else {
+		struct ovl_aio_req *aio_req;
+
+		ret = -ENOMEM;
+		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+		if (!aio_req)
+			goto out;
+
+		real.flags = 0;
+		aio_req->orig_iocb = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
+		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		refcount_set(&aio_req->ref, 2);
+		ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+		ovl_aio_put(aio_req);
+		if (ret != -EIOCBQUEUED)
+			ovl_aio_cleanup_handler(aio_req);
+	}
+out:
+	revert_creds(old_cred);
+	ovl_file_accessed(file);
+out_fdput:
+	fdput(real);
+
+	return ret;
+}
+
+static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	struct fd real;
+	const struct cred *old_cred;
+	ssize_t ret;
+	int ifl = iocb->ki_flags;
+
+	if (!iov_iter_count(iter))
+		return 0;
+
+	inode_lock(inode);
+	/* Update mode */
+	ovl_copyattr(inode);
+	ret = file_remove_privs(file);
+	if (ret)
+		goto out_unlock;
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		goto out_unlock;
+
+	ret = -EINVAL;
+	if (iocb->ki_flags & IOCB_DIRECT &&
+	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
+		goto out_fdput;
+
+	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
+		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	if (is_sync_kiocb(iocb)) {
+		file_start_write(real.file);
+		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+				     ovl_iocb_to_rwf(ifl));
+		file_end_write(real.file);
+		/* Update size */
+		ovl_copyattr(inode);
+	} else {
+		struct ovl_aio_req *aio_req;
+
+		ret = -ENOMEM;
+		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+		if (!aio_req)
+			goto out;
+
+		file_start_write(real.file);
+		/* Pacify lockdep, same trick as done in aio_write() */
+		__sb_writers_release(file_inode(real.file)->i_sb,
+				     SB_FREEZE_WRITE);
+		real.flags = 0;
+		aio_req->orig_iocb = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
+		aio_req->iocb.ki_flags = ifl;
+		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		refcount_set(&aio_req->ref, 2);
+		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+		ovl_aio_put(aio_req);
+		if (ret != -EIOCBQUEUED)
+			ovl_aio_cleanup_handler(aio_req);
+	}
+out:
+	revert_creds(old_cred);
+out_fdput:
+	fdput(real);
+
+out_unlock:
+	inode_unlock(inode);
+
+	return ret;
+}
+
+/*
+ * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
+ * due to lock order inversion between pipe->mutex in iter_file_splice_write()
+ * and file_start_write(real.file) in ovl_write_iter().
+ *
+ * So do everything ovl_write_iter() does and call iter_file_splice_write() on
+ * the real file.
+ */
+static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+				loff_t *ppos, size_t len, unsigned int flags)
+{
+	struct fd real;
+	const struct cred *old_cred;
+	struct inode *inode = file_inode(out);
+	ssize_t ret;
+
+	inode_lock(inode);
+	/* Update mode */
+	ovl_copyattr(inode);
+	ret = file_remove_privs(out);
+	if (ret)
+		goto out_unlock;
+
+	ret = ovl_real_fdget(out, &real);
+	if (ret)
+		goto out_unlock;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	file_start_write(real.file);
+
+	ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
+
+	file_end_write(real.file);
+	/* Update size */
+	ovl_copyattr(inode);
+	revert_creds(old_cred);
+	fdput(real);
+
+out_unlock:
+	inode_unlock(inode);
+
+	return ret;
+}
+
+static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	struct fd real;
+	const struct cred *old_cred;
+	int ret;
+
+	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
+	if (ret <= 0)
+		return ret;
+
+	ret = ovl_real_fdget_meta(file, &real, !datasync);
+	if (ret)
+		return ret;
+
+	/* Don't sync lower file for fear of receiving EROFS error */
+	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
+		old_cred = ovl_override_creds(file_inode(file)->i_sb);
+		ret = vfs_fsync_range(real.file, start, end, datasync);
+		revert_creds(old_cred);
+	}
+
+	fdput(real);
+
+	return ret;
+}
+
+static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct file *realfile = file->private_data;
+	const struct cred *old_cred;
+	int ret;
+
+	if (!realfile->f_op->mmap)
+		return -ENODEV;
+
+	if (WARN_ON(file != vma->vm_file))
+		return -EIO;
+
+	vma_set_file(vma, realfile);
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = call_mmap(vma->vm_file, vma);
+	revert_creds(old_cred);
+	ovl_file_accessed(file);
+
+	return ret;
+}
+
+static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+{
+	struct inode *inode = file_inode(file);
+	struct fd real;
+	const struct cred *old_cred;
+	int ret;
+
+	inode_lock(inode);
+	/* Update mode */
+	ovl_copyattr(inode);
+	ret = file_remove_privs(file);
+	if (ret)
+		goto out_unlock;
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		goto out_unlock;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = vfs_fallocate(real.file, mode, offset, len);
+	revert_creds(old_cred);
+
+	/* Update size */
+	ovl_copyattr(inode);
+
+	fdput(real);
+
+out_unlock:
+	inode_unlock(inode);
+
+	return ret;
+}
+
+static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
+{
+	struct fd real;
+	const struct cred *old_cred;
+	int ret;
+
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		return ret;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = vfs_fadvise(real.file, offset, len, advice);
+	revert_creds(old_cred);
+
+	fdput(real);
+
+	return ret;
+}
+
+enum ovl_copyop {
+	OVL_COPY,
+	OVL_CLONE,
+	OVL_DEDUPE,
+};
+
+static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+			    struct file *file_out, loff_t pos_out,
+			    loff_t len, unsigned int flags, enum ovl_copyop op)
+{
+	struct inode *inode_out = file_inode(file_out);
+	struct fd real_in, real_out;
+	const struct cred *old_cred;
+	loff_t ret;
+
+	inode_lock(inode_out);
+	if (op != OVL_DEDUPE) {
+		/* Update mode */
+		ovl_copyattr(inode_out);
+		ret = file_remove_privs(file_out);
+		if (ret)
+			goto out_unlock;
+	}
+
+	ret = ovl_real_fdget(file_out, &real_out);
+	if (ret)
+		goto out_unlock;
+
+	ret = ovl_real_fdget(file_in, &real_in);
+	if (ret) {
+		fdput(real_out);
+		goto out_unlock;
+	}
+
+	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
+	switch (op) {
+	case OVL_COPY:
+		ret = vfs_copy_file_range(real_in.file, pos_in,
+					  real_out.file, pos_out, len, flags);
+		break;
+
+	case OVL_CLONE:
+		ret = vfs_clone_file_range(real_in.file, pos_in,
+					   real_out.file, pos_out, len, flags);
+		break;
+
+	case OVL_DEDUPE:
+		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
+						real_out.file, pos_out, len,
+						flags);
+		break;
+	}
+	revert_creds(old_cred);
+
+	/* Update size */
+	ovl_copyattr(inode_out);
+
+	fdput(real_in);
+	fdput(real_out);
+
+out_unlock:
+	inode_unlock(inode_out);
+
+	return ret;
+}
+
+static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   size_t len, unsigned int flags)
+{
+	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
+			    OVL_COPY);
+}
+
+static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   loff_t len, unsigned int remap_flags)
+{
+	enum ovl_copyop op;
+
+	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+		return -EINVAL;
+
+	if (remap_flags & REMAP_FILE_DEDUP)
+		op = OVL_DEDUPE;
+	else
+		op = OVL_CLONE;
+
+	/*
+	 * Don't copy up because of a dedupe request, this wouldn't make sense
+	 * most of the time (data would be duplicated instead of deduplicated).
+	 */
+	if (op == OVL_DEDUPE &&
+	    (!ovl_inode_upper(file_inode(file_in)) ||
+	     !ovl_inode_upper(file_inode(file_out))))
+		return -EPERM;
+
+	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
+			    remap_flags, op);
+}
+
+static int ovl_flush(struct file *file, fl_owner_t id)
+{
+	struct fd real;
+	const struct cred *old_cred;
+	int err;
+
+	err = ovl_real_fdget(file, &real);
+	if (err)
+		return err;
+
+	if (real.file->f_op->flush) {
+		old_cred = ovl_override_creds(file_inode(file)->i_sb);
+		err = real.file->f_op->flush(real.file, id);
+		revert_creds(old_cred);
+	}
+	fdput(real);
+
+	return err;
+}
+
+const struct file_operations ovl_file_operations = {
+	.open		= ovl_open,
+	.release	= ovl_release,
+	.llseek		= ovl_llseek,
+	.read_iter	= ovl_read_iter,
+	.write_iter	= ovl_write_iter,
+	.fsync		= ovl_fsync,
+	.mmap		= ovl_mmap,
+	.fallocate	= ovl_fallocate,
+	.fadvise	= ovl_fadvise,
+	.flush		= ovl_flush,
+	.splice_read    = generic_file_splice_read,
+	.splice_write   = ovl_splice_write,
+
+	.copy_file_range	= ovl_copy_file_range,
+	.remap_file_range	= ovl_remap_file_range,
+};
+
+int __init ovl_aio_request_cache_init(void)
+{
+	ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
+						   sizeof(struct ovl_aio_req),
+						   0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ovl_aio_request_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ovl_aio_request_cache_destroy(void)
+{
+	kmem_cache_destroy(ovl_aio_request_cachep);
+}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
new file mode 100644
index 000000000..c062f7e2e
--- /dev/null
+++ b/fs/overlayfs/inode.c
@@ -0,0 +1,1285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl.h>
+#include <linux/ratelimit.h>
+#include <linux/fiemap.h>
+#include <linux/fileattr.h>
+#include <linux/security.h>
+#include <linux/namei.h>
+#include "overlayfs.h"
+
+
+int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+		struct iattr *attr)
+{
+	int err;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	bool full_copy_up = false;
+	struct dentry *upperdentry;
+	const struct cred *old_cred;
+
+	err = setattr_prepare(&init_user_ns, dentry, attr);
+	if (err)
+		return err;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	if (attr->ia_valid & ATTR_SIZE) {
+		/* Truncate should trigger data copy up as well */
+		full_copy_up = true;
+	}
+
+	if (!full_copy_up)
+		err = ovl_copy_up(dentry);
+	else
+		err = ovl_copy_up_with_data(dentry);
+	if (!err) {
+		struct inode *winode = NULL;
+
+		upperdentry = ovl_dentry_upper(dentry);
+
+		if (attr->ia_valid & ATTR_SIZE) {
+			winode = d_inode(upperdentry);
+			err = get_write_access(winode);
+			if (err)
+				goto out_drop_write;
+		}
+
+		if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+			attr->ia_valid &= ~ATTR_MODE;
+
+		/*
+		 * We might have to translate ovl file into real file object
+		 * once use cases emerge.  For now, simply don't let underlying
+		 * filesystem rely on attr->ia_file
+		 */
+		attr->ia_valid &= ~ATTR_FILE;
+
+		/*
+		 * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN
+		 * set.  Overlayfs does not pass O_TRUNC flag to underlying
+		 * filesystem during open -> do not pass ATTR_OPEN.  This
+		 * disables optimization in fuse which assumes open(O_TRUNC)
+		 * already set file size to 0.  But we never passed O_TRUNC to
+		 * fuse.  So by clearing ATTR_OPEN, fuse will be forced to send
+		 * setattr request to server.
+		 */
+		attr->ia_valid &= ~ATTR_OPEN;
+
+		inode_lock(upperdentry->d_inode);
+		old_cred = ovl_override_creds(dentry->d_sb);
+		err = ovl_do_notify_change(ofs, upperdentry, attr);
+		revert_creds(old_cred);
+		if (!err)
+			ovl_copyattr(dentry->d_inode);
+		inode_unlock(upperdentry->d_inode);
+
+		if (winode)
+			put_write_access(winode);
+	}
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
+{
+	bool samefs = ovl_same_fs(dentry->d_sb);
+	unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
+	unsigned int xinoshift = 64 - xinobits;
+
+	if (samefs) {
+		/*
+		 * When all layers are on the same fs, all real inode
+		 * number are unique, so we use the overlay st_dev,
+		 * which is friendly to du -x.
+		 */
+		stat->dev = dentry->d_sb->s_dev;
+		return;
+	} else if (xinobits) {
+		/*
+		 * All inode numbers of underlying fs should not be using the
+		 * high xinobits, so we use high xinobits to partition the
+		 * overlay st_ino address space. The high bits holds the fsid
+		 * (upper fsid is 0). The lowest xinobit is reserved for mapping
+		 * the non-persistent inode numbers range in case of overflow.
+		 * This way all overlay inode numbers are unique and use the
+		 * overlay st_dev.
+		 */
+		if (likely(!(stat->ino >> xinoshift))) {
+			stat->ino |= ((u64)fsid) << (xinoshift + 1);
+			stat->dev = dentry->d_sb->s_dev;
+			return;
+		} else if (ovl_xino_warn(dentry->d_sb)) {
+			pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
+					    dentry, stat->ino, xinobits);
+		}
+	}
+
+	/* The inode could not be mapped to a unified st_ino address space */
+	if (S_ISDIR(dentry->d_inode->i_mode)) {
+		/*
+		 * Always use the overlay st_dev for directories, so 'find
+		 * -xdev' will scan the entire overlay mount and won't cross the
+		 * overlay mount boundaries.
+		 *
+		 * If not all layers are on the same fs the pair {real st_ino;
+		 * overlay st_dev} is not unique, so use the non persistent
+		 * overlay st_ino for directories.
+		 */
+		stat->dev = dentry->d_sb->s_dev;
+		stat->ino = dentry->d_inode->i_ino;
+	} else {
+		/*
+		 * For non-samefs setup, if we cannot map all layers st_ino
+		 * to a unified address space, we need to make sure that st_dev
+		 * is unique per underlying fs, so we use the unique anonymous
+		 * bdev assigned to the underlying fs.
+		 */
+		stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev;
+	}
+}
+
+int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+		struct kstat *stat, u32 request_mask, unsigned int flags)
+{
+	struct dentry *dentry = path->dentry;
+	enum ovl_path_type type;
+	struct path realpath;
+	const struct cred *old_cred;
+	struct inode *inode = d_inode(dentry);
+	bool is_dir = S_ISDIR(inode->i_mode);
+	int fsid = 0;
+	int err;
+	bool metacopy_blocks = false;
+
+	metacopy_blocks = ovl_is_metacopy_dentry(dentry);
+
+	type = ovl_path_real(dentry, &realpath);
+	old_cred = ovl_override_creds(dentry->d_sb);
+	err = vfs_getattr(&realpath, stat, request_mask, flags);
+	if (err)
+		goto out;
+
+	/* Report the effective immutable/append-only STATX flags */
+	generic_fill_statx_attr(inode, stat);
+
+	/*
+	 * For non-dir or same fs, we use st_ino of the copy up origin.
+	 * This guaranties constant st_dev/st_ino across copy up.
+	 * With xino feature and non-samefs, we use st_ino of the copy up
+	 * origin masked with high bits that represent the layer id.
+	 *
+	 * If lower filesystem supports NFS file handles, this also guaranties
+	 * persistent st_ino across mount cycle.
+	 */
+	if (!is_dir || ovl_same_dev(dentry->d_sb)) {
+		if (!OVL_TYPE_UPPER(type)) {
+			fsid = ovl_layer_lower(dentry)->fsid;
+		} else if (OVL_TYPE_ORIGIN(type)) {
+			struct kstat lowerstat;
+			u32 lowermask = STATX_INO | STATX_BLOCKS |
+					(!is_dir ? STATX_NLINK : 0);
+
+			ovl_path_lower(dentry, &realpath);
+			err = vfs_getattr(&realpath, &lowerstat,
+					  lowermask, flags);
+			if (err)
+				goto out;
+
+			/*
+			 * Lower hardlinks may be broken on copy up to different
+			 * upper files, so we cannot use the lower origin st_ino
+			 * for those different files, even for the same fs case.
+			 *
+			 * Similarly, several redirected dirs can point to the
+			 * same dir on a lower layer. With the "verify_lower"
+			 * feature, we do not use the lower origin st_ino, if
+			 * we haven't verified that this redirect is unique.
+			 *
+			 * With inodes index enabled, it is safe to use st_ino
+			 * of an indexed origin. The index validates that the
+			 * upper hardlink is not broken and that a redirected
+			 * dir is the only redirect to that origin.
+			 */
+			if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
+			    (!ovl_verify_lower(dentry->d_sb) &&
+			     (is_dir || lowerstat.nlink == 1))) {
+				fsid = ovl_layer_lower(dentry)->fsid;
+				stat->ino = lowerstat.ino;
+			}
+
+			/*
+			 * If we are querying a metacopy dentry and lower
+			 * dentry is data dentry, then use the blocks we
+			 * queried just now. We don't have to do additional
+			 * vfs_getattr(). If lower itself is metacopy, then
+			 * additional vfs_getattr() is unavoidable.
+			 */
+			if (metacopy_blocks &&
+			    realpath.dentry == ovl_dentry_lowerdata(dentry)) {
+				stat->blocks = lowerstat.blocks;
+				metacopy_blocks = false;
+			}
+		}
+
+		if (metacopy_blocks) {
+			/*
+			 * If lower is not same as lowerdata or if there was
+			 * no origin on upper, we can end up here.
+			 */
+			struct kstat lowerdatastat;
+			u32 lowermask = STATX_BLOCKS;
+
+			ovl_path_lowerdata(dentry, &realpath);
+			err = vfs_getattr(&realpath, &lowerdatastat,
+					  lowermask, flags);
+			if (err)
+				goto out;
+			stat->blocks = lowerdatastat.blocks;
+		}
+	}
+
+	ovl_map_dev_ino(dentry, stat, fsid);
+
+	/*
+	 * It's probably not worth it to count subdirs to get the
+	 * correct link count.  nlink=1 seems to pacify 'find' and
+	 * other utilities.
+	 */
+	if (is_dir && OVL_TYPE_MERGE(type))
+		stat->nlink = 1;
+
+	/*
+	 * Return the overlay inode nlinks for indexed upper inodes.
+	 * Overlay inode nlink counts the union of the upper hardlinks
+	 * and non-covered lower hardlinks. It does not include the upper
+	 * index hardlink.
+	 */
+	if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		stat->nlink = dentry->d_inode->i_nlink;
+
+out:
+	revert_creds(old_cred);
+
+	return err;
+}
+
+int ovl_permission(struct user_namespace *mnt_userns,
+		   struct inode *inode, int mask)
+{
+	struct inode *upperinode = ovl_inode_upper(inode);
+	struct inode *realinode;
+	struct path realpath;
+	const struct cred *old_cred;
+	int err;
+
+	/* Careful in RCU walk mode */
+	realinode = ovl_i_path_real(inode, &realpath);
+	if (!realinode) {
+		WARN_ON(!(mask & MAY_NOT_BLOCK));
+		return -ECHILD;
+	}
+
+	/*
+	 * Check overlay inode with the creds of task and underlying inode
+	 * with creds of mounter
+	 */
+	err = generic_permission(&init_user_ns, inode, mask);
+	if (err)
+		return err;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	if (!upperinode &&
+	    !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+		mask &= ~(MAY_WRITE | MAY_APPEND);
+		/* Make sure mounter can read file for copy up later */
+		mask |= MAY_READ;
+	}
+	err = inode_permission(mnt_user_ns(realpath.mnt), realinode, mask);
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static const char *ovl_get_link(struct dentry *dentry,
+				struct inode *inode,
+				struct delayed_call *done)
+{
+	const struct cred *old_cred;
+	const char *p;
+
+	if (!dentry)
+		return ERR_PTR(-ECHILD);
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	p = vfs_get_link(ovl_dentry_real(dentry), done);
+	revert_creds(old_cred);
+	return p;
+}
+
+bool ovl_is_private_xattr(struct super_block *sb, const char *name)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	if (ofs->config.userxattr)
+		return strncmp(name, OVL_XATTR_USER_PREFIX,
+			       sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
+	else
+		return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
+			       sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
+}
+
+int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+		  const void *value, size_t size, int flags)
+{
+	int err;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+	struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
+	struct path realpath;
+	const struct cred *old_cred;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	if (!value && !upperdentry) {
+		ovl_path_lower(dentry, &realpath);
+		old_cred = ovl_override_creds(dentry->d_sb);
+		err = vfs_getxattr(mnt_user_ns(realpath.mnt), realdentry, name, NULL, 0);
+		revert_creds(old_cred);
+		if (err < 0)
+			goto out_drop_write;
+	}
+
+	if (!upperdentry) {
+		err = ovl_copy_up(dentry);
+		if (err)
+			goto out_drop_write;
+
+		realdentry = ovl_dentry_upper(dentry);
+	}
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	if (value) {
+		err = ovl_do_setxattr(ofs, realdentry, name, value, size,
+				      flags);
+	} else {
+		WARN_ON(flags != XATTR_REPLACE);
+		err = ovl_do_removexattr(ofs, realdentry, name);
+	}
+	revert_creds(old_cred);
+
+	/* copy c/mtime */
+	ovl_copyattr(inode);
+
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
+		  void *value, size_t size)
+{
+	ssize_t res;
+	const struct cred *old_cred;
+	struct path realpath;
+
+	ovl_i_path_real(inode, &realpath);
+	old_cred = ovl_override_creds(dentry->d_sb);
+	res = vfs_getxattr(mnt_user_ns(realpath.mnt), realpath.dentry, name, value, size);
+	revert_creds(old_cred);
+	return res;
+}
+
+static bool ovl_can_list(struct super_block *sb, const char *s)
+{
+	/* Never list private (.overlay) */
+	if (ovl_is_private_xattr(sb, s))
+		return false;
+
+	/* List all non-trusted xattrs */
+	if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+		return true;
+
+	/* list other trusted for superuser only */
+	return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+}
+
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+	struct dentry *realdentry = ovl_dentry_real(dentry);
+	ssize_t res;
+	size_t len;
+	char *s;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	res = vfs_listxattr(realdentry, list, size);
+	revert_creds(old_cred);
+	if (res <= 0 || size == 0)
+		return res;
+
+	/* filter out private xattrs */
+	for (s = list, len = res; len;) {
+		size_t slen = strnlen(s, len) + 1;
+
+		/* underlying fs providing us with an broken xattr list? */
+		if (WARN_ON(slen > len))
+			return -EIO;
+
+		len -= slen;
+		if (!ovl_can_list(dentry->d_sb, s)) {
+			res -= slen;
+			memmove(s, s + slen, len);
+		} else {
+			s += slen;
+		}
+	}
+
+	return res;
+}
+
+#ifdef CONFIG_FS_POSIX_ACL
+/*
+ * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
+ * of the POSIX ACLs retrieved from the lower layer to this function to not
+ * alter the POSIX ACLs for the underlying filesystem.
+ */
+static void ovl_idmap_posix_acl(struct inode *realinode,
+				struct user_namespace *mnt_userns,
+				struct posix_acl *acl)
+{
+	struct user_namespace *fs_userns = i_user_ns(realinode);
+
+	for (unsigned int i = 0; i < acl->a_count; i++) {
+		vfsuid_t vfsuid;
+		vfsgid_t vfsgid;
+
+		struct posix_acl_entry *e = &acl->a_entries[i];
+		switch (e->e_tag) {
+		case ACL_USER:
+			vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid);
+			e->e_uid = vfsuid_into_kuid(vfsuid);
+			break;
+		case ACL_GROUP:
+			vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid);
+			e->e_gid = vfsgid_into_kgid(vfsgid);
+			break;
+		}
+	}
+}
+
+/*
+ * When the relevant layer is an idmapped mount we need to take the idmapping
+ * of the layer into account and translate any ACL_{GROUP,USER} values
+ * according to the idmapped mount.
+ *
+ * We cannot alter the ACLs returned from the relevant layer as that would
+ * alter the cached values filesystem wide for the lower filesystem. Instead we
+ * can clone the ACLs and then apply the relevant idmapping of the layer.
+ *
+ * This is obviously only relevant when idmapped layers are used.
+ */
+struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
+{
+	struct inode *realinode;
+	struct posix_acl *acl, *clone;
+	struct path realpath;
+
+	/* Careful in RCU walk mode */
+	realinode = ovl_i_path_real(inode, &realpath);
+	if (!realinode) {
+		WARN_ON(!rcu);
+		return ERR_PTR(-ECHILD);
+	}
+
+	if (!IS_POSIXACL(realinode))
+		return NULL;
+
+	if (rcu) {
+		acl = get_cached_acl_rcu(realinode, type);
+	} else {
+		const struct cred *old_cred;
+
+		old_cred = ovl_override_creds(inode->i_sb);
+		acl = get_acl(realinode, type);
+		revert_creds(old_cred);
+	}
+	/*
+	 * If there are no POSIX ACLs, or we encountered an error,
+	 * or the layer isn't idmapped we don't need to do anything.
+	 */
+	if (!is_idmapped_mnt(realpath.mnt) || IS_ERR_OR_NULL(acl))
+		return acl;
+
+	/*
+	 * We only get here if the layer is idmapped. So drop out of RCU path
+	 * walk so we can clone the ACLs. There's no need to release the ACLs
+	 * since get_cached_acl_rcu() doesn't take a reference on the ACLs.
+	 */
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
+	clone = posix_acl_clone(acl, GFP_KERNEL);
+	if (!clone)
+		clone = ERR_PTR(-ENOMEM);
+	else
+		ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone);
+	/*
+	 * Since we're not in RCU path walk we always need to release the
+	 * original ACLs.
+	 */
+	posix_acl_release(acl);
+	return clone;
+}
+#endif
+
+int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
+{
+	if (flags & S_ATIME) {
+		struct ovl_fs *ofs = inode->i_sb->s_fs_info;
+		struct path upperpath = {
+			.mnt = ovl_upper_mnt(ofs),
+			.dentry = ovl_upperdentry_dereference(OVL_I(inode)),
+		};
+
+		if (upperpath.dentry) {
+			touch_atime(&upperpath);
+			inode->i_atime = d_inode(upperpath.dentry)->i_atime;
+		}
+	}
+	return 0;
+}
+
+static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		      u64 start, u64 len)
+{
+	int err;
+	struct inode *realinode = ovl_inode_realdata(inode);
+	const struct cred *old_cred;
+
+	if (!realinode->i_op->fiemap)
+		return -EOPNOTSUPP;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
+	revert_creds(old_cred);
+
+	return err;
+}
+
+/*
+ * Work around the fact that security_file_ioctl() takes a file argument.
+ * Introducing security_inode_fileattr_get/set() hooks would solve this issue
+ * properly.
+ */
+static int ovl_security_fileattr(const struct path *realpath, struct fileattr *fa,
+				 bool set)
+{
+	struct file *file;
+	unsigned int cmd;
+	int err;
+
+	file = dentry_open(realpath, O_RDONLY, current_cred());
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	if (set)
+		cmd = fa->fsx_valid ? FS_IOC_FSSETXATTR : FS_IOC_SETFLAGS;
+	else
+		cmd = fa->fsx_valid ? FS_IOC_FSGETXATTR : FS_IOC_GETFLAGS;
+
+	err = security_file_ioctl(file, cmd, 0);
+	fput(file);
+
+	return err;
+}
+
+int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa)
+{
+	int err;
+
+	err = ovl_security_fileattr(realpath, fa, true);
+	if (err)
+		return err;
+
+	return vfs_fileattr_set(mnt_user_ns(realpath->mnt), realpath->dentry, fa);
+}
+
+int ovl_fileattr_set(struct user_namespace *mnt_userns,
+		     struct dentry *dentry, struct fileattr *fa)
+{
+	struct inode *inode = d_inode(dentry);
+	struct path upperpath;
+	const struct cred *old_cred;
+	unsigned int flags;
+	int err;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(dentry);
+	if (!err) {
+		ovl_path_real(dentry, &upperpath);
+
+		old_cred = ovl_override_creds(inode->i_sb);
+		/*
+		 * Store immutable/append-only flags in xattr and clear them
+		 * in upper fileattr (in case they were set by older kernel)
+		 * so children of "ovl-immutable" directories lower aliases of
+		 * "ovl-immutable" hardlinks could be copied up.
+		 * Clear xattr when flags are cleared.
+		 */
+		err = ovl_set_protattr(inode, upperpath.dentry, fa);
+		if (!err)
+			err = ovl_real_fileattr_set(&upperpath, fa);
+		revert_creds(old_cred);
+
+		/*
+		 * Merge real inode flags with inode flags read from
+		 * overlay.protattr xattr
+		 */
+		flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK;
+
+		BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK);
+		flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK;
+		inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK);
+
+		/* Update ctime */
+		ovl_copyattr(inode);
+	}
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+/* Convert inode protection flags to fileattr flags */
+static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa)
+{
+	BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL);
+	BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON);
+
+	if (inode->i_flags & S_APPEND) {
+		fa->flags |= FS_APPEND_FL;
+		fa->fsx_xflags |= FS_XFLAG_APPEND;
+	}
+	if (inode->i_flags & S_IMMUTABLE) {
+		fa->flags |= FS_IMMUTABLE_FL;
+		fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
+	}
+}
+
+int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa)
+{
+	int err;
+
+	err = ovl_security_fileattr(realpath, fa, false);
+	if (err)
+		return err;
+
+	err = vfs_fileattr_get(realpath->dentry, fa);
+	if (err == -ENOIOCTLCMD)
+		err = -ENOTTY;
+	return err;
+}
+
+int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+{
+	struct inode *inode = d_inode(dentry);
+	struct path realpath;
+	const struct cred *old_cred;
+	int err;
+
+	ovl_path_real(dentry, &realpath);
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	err = ovl_real_fileattr_get(&realpath, fa);
+	ovl_fileattr_prot_flags(inode, fa);
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static const struct inode_operations ovl_file_inode_operations = {
+	.setattr	= ovl_setattr,
+	.permission	= ovl_permission,
+	.getattr	= ovl_getattr,
+	.listxattr	= ovl_listxattr,
+	.get_acl	= ovl_get_acl,
+	.update_time	= ovl_update_time,
+	.fiemap		= ovl_fiemap,
+	.fileattr_get	= ovl_fileattr_get,
+	.fileattr_set	= ovl_fileattr_set,
+};
+
+static const struct inode_operations ovl_symlink_inode_operations = {
+	.setattr	= ovl_setattr,
+	.get_link	= ovl_get_link,
+	.getattr	= ovl_getattr,
+	.listxattr	= ovl_listxattr,
+	.update_time	= ovl_update_time,
+};
+
+static const struct inode_operations ovl_special_inode_operations = {
+	.setattr	= ovl_setattr,
+	.permission	= ovl_permission,
+	.getattr	= ovl_getattr,
+	.listxattr	= ovl_listxattr,
+	.get_acl	= ovl_get_acl,
+	.update_time	= ovl_update_time,
+};
+
+static const struct address_space_operations ovl_aops = {
+	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
+	.direct_IO		= noop_direct_IO,
+};
+
+/*
+ * It is possible to stack overlayfs instance on top of another
+ * overlayfs instance as lower layer. We need to annotate the
+ * stackable i_mutex locks according to stack level of the super
+ * block instance. An overlayfs instance can never be in stack
+ * depth 0 (there is always a real fs below it).  An overlayfs
+ * inode lock will use the lockdep annotation ovl_i_mutex_key[depth].
+ *
+ * For example, here is a snip from /proc/lockdep_chains after
+ * dir_iterate of nested overlayfs:
+ *
+ * [...] &ovl_i_mutex_dir_key[depth]   (stack_depth=2)
+ * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
+ * [...] &type->i_mutex_dir_key        (stack_depth=0)
+ *
+ * Locking order w.r.t ovl_want_write() is important for nested overlayfs.
+ *
+ * This chain is valid:
+ * - inode->i_rwsem			(inode_lock[2])
+ * - upper_mnt->mnt_sb->s_writers	(ovl_want_write[0])
+ * - OVL_I(inode)->lock			(ovl_inode_lock[2])
+ * - OVL_I(lowerinode)->lock		(ovl_inode_lock[1])
+ *
+ * And this chain is valid:
+ * - inode->i_rwsem			(inode_lock[2])
+ * - OVL_I(inode)->lock			(ovl_inode_lock[2])
+ * - lowerinode->i_rwsem		(inode_lock[1])
+ * - OVL_I(lowerinode)->lock		(ovl_inode_lock[1])
+ *
+ * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is
+ * held, because it is in reverse order of the non-nested case using the same
+ * upper fs:
+ * - inode->i_rwsem			(inode_lock[1])
+ * - upper_mnt->mnt_sb->s_writers	(ovl_want_write[0])
+ * - OVL_I(inode)->lock			(ovl_inode_lock[1])
+ */
+#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
+
+static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
+#ifdef CONFIG_LOCKDEP
+	static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
+	static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
+	static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING];
+
+	int depth = inode->i_sb->s_stack_depth - 1;
+
+	if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
+		depth = 0;
+
+	if (S_ISDIR(inode->i_mode))
+		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
+	else
+		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
+
+	lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]);
+#endif
+}
+
+static void ovl_next_ino(struct inode *inode)
+{
+	struct ovl_fs *ofs = inode->i_sb->s_fs_info;
+
+	inode->i_ino = atomic_long_inc_return(&ofs->last_ino);
+	if (unlikely(!inode->i_ino))
+		inode->i_ino = atomic_long_inc_return(&ofs->last_ino);
+}
+
+static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid)
+{
+	int xinobits = ovl_xino_bits(inode->i_sb);
+	unsigned int xinoshift = 64 - xinobits;
+
+	/*
+	 * When d_ino is consistent with st_ino (samefs or i_ino has enough
+	 * bits to encode layer), set the same value used for st_ino to i_ino,
+	 * so inode number exposed via /proc/locks and a like will be
+	 * consistent with d_ino and st_ino values. An i_ino value inconsistent
+	 * with d_ino also causes nfsd readdirplus to fail.
+	 */
+	inode->i_ino = ino;
+	if (ovl_same_fs(inode->i_sb)) {
+		return;
+	} else if (xinobits && likely(!(ino >> xinoshift))) {
+		inode->i_ino |= (unsigned long)fsid << (xinoshift + 1);
+		return;
+	}
+
+	/*
+	 * For directory inodes on non-samefs with xino disabled or xino
+	 * overflow, we allocate a non-persistent inode number, to be used for
+	 * resolving st_ino collisions in ovl_map_dev_ino().
+	 *
+	 * To avoid ino collision with legitimate xino values from upper
+	 * layer (fsid 0), use the lowest xinobit to map the non
+	 * persistent inode numbers to the unified st_ino address space.
+	 */
+	if (S_ISDIR(inode->i_mode)) {
+		ovl_next_ino(inode);
+		if (xinobits) {
+			inode->i_ino &= ~0UL >> xinobits;
+			inode->i_ino |= 1UL << xinoshift;
+		}
+	}
+}
+
+void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip,
+		    unsigned long ino, int fsid)
+{
+	struct inode *realinode;
+	struct ovl_inode *oi = OVL_I(inode);
+
+	if (oip->upperdentry)
+		oi->__upperdentry = oip->upperdentry;
+	if (oip->lowerpath && oip->lowerpath->dentry) {
+		oi->lowerpath.dentry = dget(oip->lowerpath->dentry);
+		oi->lowerpath.layer = oip->lowerpath->layer;
+	}
+	if (oip->lowerdata)
+		oi->lowerdata = igrab(d_inode(oip->lowerdata));
+
+	realinode = ovl_inode_real(inode);
+	ovl_copyattr(inode);
+	ovl_copyflags(realinode, inode);
+	ovl_map_ino(inode, ino, fsid);
+}
+
+static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
+{
+	inode->i_mode = mode;
+	inode->i_flags |= S_NOCMTIME;
+#ifdef CONFIG_FS_POSIX_ACL
+	inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
+#endif
+
+	ovl_lockdep_annotate_inode_mutex_key(inode);
+
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+		inode->i_op = &ovl_file_inode_operations;
+		inode->i_fop = &ovl_file_operations;
+		inode->i_mapping->a_ops = &ovl_aops;
+		break;
+
+	case S_IFDIR:
+		inode->i_op = &ovl_dir_inode_operations;
+		inode->i_fop = &ovl_dir_operations;
+		break;
+
+	case S_IFLNK:
+		inode->i_op = &ovl_symlink_inode_operations;
+		break;
+
+	default:
+		inode->i_op = &ovl_special_inode_operations;
+		init_special_inode(inode, mode, rdev);
+		break;
+	}
+}
+
+/*
+ * With inodes index enabled, an overlay inode nlink counts the union of upper
+ * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure
+ * upper inode, the following nlink modifying operations can happen:
+ *
+ * 1. Lower hardlink copy up
+ * 2. Upper hardlink created, unlinked or renamed over
+ * 3. Lower hardlink whiteout or renamed over
+ *
+ * For the first, copy up case, the union nlink does not change, whether the
+ * operation succeeds or fails, but the upper inode nlink may change.
+ * Therefore, before copy up, we store the union nlink value relative to the
+ * lower inode nlink in the index inode xattr .overlay.nlink.
+ *
+ * For the second, upper hardlink case, the union nlink should be incremented
+ * or decremented IFF the operation succeeds, aligned with nlink change of the
+ * upper inode. Therefore, before link/unlink/rename, we store the union nlink
+ * value relative to the upper inode nlink in the index inode.
+ *
+ * For the last, lower cover up case, we simplify things by preceding the
+ * whiteout or cover up with copy up. This makes sure that there is an index
+ * upper inode where the nlink xattr can be stored before the copied up upper
+ * entry is unlink.
+ */
+#define OVL_NLINK_ADD_UPPER	(1 << 0)
+
+/*
+ * On-disk format for indexed nlink:
+ *
+ * nlink relative to the upper inode - "U[+-]NUM"
+ * nlink relative to the lower inode - "L[+-]NUM"
+ */
+
+static int ovl_set_nlink_common(struct dentry *dentry,
+				struct dentry *realdentry, const char *format)
+{
+	struct inode *inode = d_inode(dentry);
+	struct inode *realinode = d_inode(realdentry);
+	char buf[13];
+	int len;
+
+	len = snprintf(buf, sizeof(buf), format,
+		       (int) (inode->i_nlink - realinode->i_nlink));
+
+	if (WARN_ON(len >= sizeof(buf)))
+		return -EIO;
+
+	return ovl_setxattr(OVL_FS(inode->i_sb), ovl_dentry_upper(dentry),
+			    OVL_XATTR_NLINK, buf, len);
+}
+
+int ovl_set_nlink_upper(struct dentry *dentry)
+{
+	return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i");
+}
+
+int ovl_set_nlink_lower(struct dentry *dentry)
+{
+	return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
+}
+
+unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
+			   struct dentry *upperdentry,
+			   unsigned int fallback)
+{
+	int nlink_diff;
+	int nlink;
+	char buf[13];
+	int err;
+
+	if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
+		return fallback;
+
+	err = ovl_getxattr_upper(ofs, upperdentry, OVL_XATTR_NLINK,
+				 &buf, sizeof(buf) - 1);
+	if (err < 0)
+		goto fail;
+
+	buf[err] = '\0';
+	if ((buf[0] != 'L' && buf[0] != 'U') ||
+	    (buf[1] != '+' && buf[1] != '-'))
+		goto fail;
+
+	err = kstrtoint(buf + 1, 10, &nlink_diff);
+	if (err < 0)
+		goto fail;
+
+	nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink;
+	nlink += nlink_diff;
+
+	if (nlink <= 0)
+		goto fail;
+
+	return nlink;
+
+fail:
+	pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n",
+			    upperdentry, err);
+	return fallback;
+}
+
+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
+{
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (inode)
+		ovl_fill_inode(inode, mode, rdev);
+
+	return inode;
+}
+
+static int ovl_inode_test(struct inode *inode, void *data)
+{
+	return inode->i_private == data;
+}
+
+static int ovl_inode_set(struct inode *inode, void *data)
+{
+	inode->i_private = data;
+	return 0;
+}
+
+static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
+			     struct dentry *upperdentry, bool strict)
+{
+	/*
+	 * For directories, @strict verify from lookup path performs consistency
+	 * checks, so NULL lower/upper in dentry must match NULL lower/upper in
+	 * inode. Non @strict verify from NFS handle decode path passes NULL for
+	 * 'unknown' lower/upper.
+	 */
+	if (S_ISDIR(inode->i_mode) && strict) {
+		/* Real lower dir moved to upper layer under us? */
+		if (!lowerdentry && ovl_inode_lower(inode))
+			return false;
+
+		/* Lookup of an uncovered redirect origin? */
+		if (!upperdentry && ovl_inode_upper(inode))
+			return false;
+	}
+
+	/*
+	 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL.
+	 * This happens when finding a copied up overlay inode for a renamed
+	 * or hardlinked overlay dentry and lower dentry cannot be followed
+	 * by origin because lower fs does not support file handles.
+	 */
+	if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry))
+		return false;
+
+	/*
+	 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL.
+	 * This happens when finding a lower alias for a copied up hard link.
+	 */
+	if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry))
+		return false;
+
+	return true;
+}
+
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
+			       bool is_upper)
+{
+	struct inode *inode, *key = d_inode(real);
+
+	inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
+	if (!inode)
+		return NULL;
+
+	if (!ovl_verify_inode(inode, is_upper ? NULL : real,
+			      is_upper ? real : NULL, false)) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+
+	return inode;
+}
+
+bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir)
+{
+	struct inode *key = d_inode(dir);
+	struct inode *trap;
+	bool res;
+
+	trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
+	if (!trap)
+		return false;
+
+	res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) &&
+				  !ovl_inode_lower(trap);
+
+	iput(trap);
+	return res;
+}
+
+/*
+ * Create an inode cache entry for layer root dir, that will intentionally
+ * fail ovl_verify_inode(), so any lookup that will find some layer root
+ * will fail.
+ */
+struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir)
+{
+	struct inode *key = d_inode(dir);
+	struct inode *trap;
+
+	if (!d_is_dir(dir))
+		return ERR_PTR(-ENOTDIR);
+
+	trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test,
+			    ovl_inode_set, key);
+	if (!trap)
+		return ERR_PTR(-ENOMEM);
+
+	if (!(trap->i_state & I_NEW)) {
+		/* Conflicting layer roots? */
+		iput(trap);
+		return ERR_PTR(-ELOOP);
+	}
+
+	trap->i_mode = S_IFDIR;
+	trap->i_flags = S_DEAD;
+	unlock_new_inode(trap);
+
+	return trap;
+}
+
+/*
+ * Does overlay inode need to be hashed by lower inode?
+ */
+static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
+			     struct dentry *lower, bool index)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	/* No, if pure upper */
+	if (!lower)
+		return false;
+
+	/* Yes, if already indexed */
+	if (index)
+		return true;
+
+	/* Yes, if won't be copied up */
+	if (!ovl_upper_mnt(ofs))
+		return true;
+
+	/* No, if lower hardlink is or will be broken on copy up */
+	if ((upper || !ovl_indexdir(sb)) &&
+	    !d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+		return false;
+
+	/* No, if non-indexed upper with NFS export */
+	if (sb->s_export_op && upper)
+		return false;
+
+	/* Otherwise, hash by lower inode for fsnotify */
+	return true;
+}
+
+static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode,
+			       struct inode *key)
+{
+	return newinode ? inode_insert5(newinode, (unsigned long) key,
+					 ovl_inode_test, ovl_inode_set, key) :
+			  iget5_locked(sb, (unsigned long) key,
+				       ovl_inode_test, ovl_inode_set, key);
+}
+
+struct inode *ovl_get_inode(struct super_block *sb,
+			    struct ovl_inode_params *oip)
+{
+	struct ovl_fs *ofs = OVL_FS(sb);
+	struct dentry *upperdentry = oip->upperdentry;
+	struct ovl_path *lowerpath = oip->lowerpath;
+	struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
+	struct inode *inode;
+	struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL;
+	struct path realpath = {
+		.dentry = upperdentry ?: lowerdentry,
+		.mnt = upperdentry ? ovl_upper_mnt(ofs) : lowerpath->layer->mnt,
+	};
+	bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
+					oip->index);
+	int fsid = bylower ? lowerpath->layer->fsid : 0;
+	bool is_dir;
+	unsigned long ino = 0;
+	int err = oip->newinode ? -EEXIST : -ENOMEM;
+
+	if (!realinode)
+		realinode = d_inode(lowerdentry);
+
+	/*
+	 * Copy up origin (lower) may exist for non-indexed upper, but we must
+	 * not use lower as hash key if this is a broken hardlink.
+	 */
+	is_dir = S_ISDIR(realinode->i_mode);
+	if (upperdentry || bylower) {
+		struct inode *key = d_inode(bylower ? lowerdentry :
+						      upperdentry);
+		unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
+
+		inode = ovl_iget5(sb, oip->newinode, key);
+		if (!inode)
+			goto out_err;
+		if (!(inode->i_state & I_NEW)) {
+			/*
+			 * Verify that the underlying files stored in the inode
+			 * match those in the dentry.
+			 */
+			if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
+					      true)) {
+				iput(inode);
+				err = -ESTALE;
+				goto out_err;
+			}
+
+			dput(upperdentry);
+			kfree(oip->redirect);
+			goto out;
+		}
+
+		/* Recalculate nlink for non-dir due to indexing */
+		if (!is_dir)
+			nlink = ovl_get_nlink(ofs, lowerdentry, upperdentry,
+					      nlink);
+		set_nlink(inode, nlink);
+		ino = key->i_ino;
+	} else {
+		/* Lower hardlink that will be broken on copy up */
+		inode = new_inode(sb);
+		if (!inode) {
+			err = -ENOMEM;
+			goto out_err;
+		}
+		ino = realinode->i_ino;
+		fsid = lowerpath->layer->fsid;
+	}
+	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+	ovl_inode_init(inode, oip, ino, fsid);
+
+	if (upperdentry && ovl_is_impuredir(sb, upperdentry))
+		ovl_set_flag(OVL_IMPURE, inode);
+
+	if (oip->index)
+		ovl_set_flag(OVL_INDEX, inode);
+
+	OVL_I(inode)->redirect = oip->redirect;
+
+	if (bylower)
+		ovl_set_flag(OVL_CONST_INO, inode);
+
+	/* Check for non-merge dir that may have whiteouts */
+	if (is_dir) {
+		if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
+		    ovl_path_check_origin_xattr(ofs, &realpath)) {
+			ovl_set_flag(OVL_WHITEOUTS, inode);
+		}
+	}
+
+	/* Check for immutable/append-only inode flags in xattr */
+	if (upperdentry)
+		ovl_check_protattr(inode, upperdentry);
+
+	if (inode->i_state & I_NEW)
+		unlock_new_inode(inode);
+out:
+	return inode;
+
+out_err:
+	pr_warn_ratelimited("failed to get inode (%i)\n", err);
+	inode = ERR_PTR(err);
+	goto out;
+}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
new file mode 100644
index 000000000..655d08d6f
--- /dev/null
+++ b/fs/overlayfs/namei.c
@@ -0,0 +1,1203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011 Novell Inc.
+ * Copyright (C) 2016 Red Hat, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/cred.h>
+#include <linux/ctype.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/ratelimit.h>
+#include <linux/mount.h>
+#include <linux/exportfs.h>
+#include "overlayfs.h"
+
+struct ovl_lookup_data {
+	struct super_block *sb;
+	struct vfsmount *mnt;
+	struct qstr name;
+	bool is_dir;
+	bool opaque;
+	bool stop;
+	bool last;
+	char *redirect;
+	bool metacopy;
+};
+
+static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
+			      size_t prelen, const char *post)
+{
+	int res;
+	char *buf;
+	struct ovl_fs *ofs = OVL_FS(d->sb);
+
+	buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post));
+	if (IS_ERR_OR_NULL(buf))
+		return PTR_ERR(buf);
+
+	if (buf[0] == '/') {
+		/*
+		 * One of the ancestor path elements in an absolute path
+		 * lookup in ovl_lookup_layer() could have been opaque and
+		 * that will stop further lookup in lower layers (d->stop=true)
+		 * But we have found an absolute redirect in descendant path
+		 * element and that should force continue lookup in lower
+		 * layers (reset d->stop).
+		 */
+		d->stop = false;
+	} else {
+		res = strlen(buf) + 1;
+		memmove(buf + prelen, buf, res);
+		memcpy(buf, d->name.name, prelen);
+	}
+
+	strcat(buf, post);
+	kfree(d->redirect);
+	d->redirect = buf;
+	d->name.name = d->redirect;
+	d->name.len = strlen(d->redirect);
+
+	return 0;
+}
+
+static int ovl_acceptable(void *ctx, struct dentry *dentry)
+{
+	/*
+	 * A non-dir origin may be disconnected, which is fine, because
+	 * we only need it for its unique inode number.
+	 */
+	if (!d_is_dir(dentry))
+		return 1;
+
+	/* Don't decode a deleted empty directory */
+	if (d_unhashed(dentry))
+		return 0;
+
+	/* Check if directory belongs to the layer we are decoding from */
+	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
+}
+
+/*
+ * Check validity of an overlay file handle buffer.
+ *
+ * Return 0 for a valid file handle.
+ * Return -ENODATA for "origin unknown".
+ * Return <0 for an invalid file handle.
+ */
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
+{
+	if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
+		return -EINVAL;
+
+	if (fb->magic != OVL_FH_MAGIC)
+		return -EINVAL;
+
+	/* Treat larger version and unknown flags as "origin unknown" */
+	if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
+		return -ENODATA;
+
+	/* Treat endianness mismatch as "origin unknown" */
+	if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+	    (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+		return -ENODATA;
+
+	return 0;
+}
+
+static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
+				 enum ovl_xattr ox)
+{
+	int res, err;
+	struct ovl_fh *fh = NULL;
+
+	res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0);
+	if (res < 0) {
+		if (res == -ENODATA || res == -EOPNOTSUPP)
+			return NULL;
+		goto fail;
+	}
+	/* Zero size value means "copied up but origin unknown" */
+	if (res == 0)
+		return NULL;
+
+	fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
+	if (!fh)
+		return ERR_PTR(-ENOMEM);
+
+	res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res);
+	if (res < 0)
+		goto fail;
+
+	err = ovl_check_fb_len(&fh->fb, res);
+	if (err < 0) {
+		if (err == -ENODATA)
+			goto out;
+		goto invalid;
+	}
+
+	return fh;
+
+out:
+	kfree(fh);
+	return NULL;
+
+fail:
+	pr_warn_ratelimited("failed to get origin (%i)\n", res);
+	goto out;
+invalid:
+	pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
+	goto out;
+}
+
+struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+				  struct vfsmount *mnt, bool connected)
+{
+	struct dentry *real;
+	int bytes;
+
+	if (!capable(CAP_DAC_READ_SEARCH))
+		return NULL;
+
+	/*
+	 * Make sure that the stored uuid matches the uuid of the lower
+	 * layer where file handle will be decoded.
+	 * In case of uuid=off option just make sure that stored uuid is null.
+	 */
+	if (ofs->config.uuid ? !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
+			      !uuid_is_null(&fh->fb.uuid))
+		return NULL;
+
+	bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
+	real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
+				  bytes >> 2, (int)fh->fb.type,
+				  connected ? ovl_acceptable : NULL, mnt);
+	if (IS_ERR(real)) {
+		/*
+		 * Treat stale file handle to lower file as "origin unknown".
+		 * upper file handle could become stale when upper file is
+		 * unlinked and this information is needed to handle stale
+		 * index entries correctly.
+		 */
+		if (real == ERR_PTR(-ESTALE) &&
+		    !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
+			real = NULL;
+		return real;
+	}
+
+	if (ovl_dentry_weird(real)) {
+		dput(real);
+		return NULL;
+	}
+
+	return real;
+}
+
+static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path)
+{
+	return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE);
+}
+
+static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
+						   const char *name,
+						   struct dentry *base, int len,
+						   bool drop_negative)
+{
+	struct dentry *ret = lookup_one_unlocked(mnt_user_ns(d->mnt), name, base, len);
+
+	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
+		if (drop_negative && ret->d_lockref.count == 1) {
+			spin_lock(&ret->d_lock);
+			/* Recheck condition under lock */
+			if (d_is_negative(ret) && ret->d_lockref.count == 1)
+				__d_drop(ret);
+			spin_unlock(&ret->d_lock);
+		}
+		dput(ret);
+		ret = ERR_PTR(-ENOENT);
+	}
+	return ret;
+}
+
+static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
+			     const char *name, unsigned int namelen,
+			     size_t prelen, const char *post,
+			     struct dentry **ret, bool drop_negative)
+{
+	struct dentry *this;
+	struct path path;
+	int err;
+	bool last_element = !post[0];
+
+	this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
+	if (IS_ERR(this)) {
+		err = PTR_ERR(this);
+		this = NULL;
+		if (err == -ENOENT || err == -ENAMETOOLONG)
+			goto out;
+		goto out_err;
+	}
+
+	if (ovl_dentry_weird(this)) {
+		/* Don't support traversing automounts and other weirdness */
+		err = -EREMOTE;
+		goto out_err;
+	}
+	if (ovl_is_whiteout(this)) {
+		d->stop = d->opaque = true;
+		goto put_and_out;
+	}
+	/*
+	 * This dentry should be a regular file if previous layer lookup
+	 * found a metacopy dentry.
+	 */
+	if (last_element && d->metacopy && !d_is_reg(this)) {
+		d->stop = true;
+		goto put_and_out;
+	}
+
+	path.dentry = this;
+	path.mnt = d->mnt;
+	if (!d_can_lookup(this)) {
+		if (d->is_dir || !last_element) {
+			d->stop = true;
+			goto put_and_out;
+		}
+		err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path);
+		if (err < 0)
+			goto out_err;
+
+		d->metacopy = err;
+		d->stop = !d->metacopy;
+		if (!d->metacopy || d->last)
+			goto out;
+	} else {
+		if (ovl_lookup_trap_inode(d->sb, this)) {
+			/* Caught in a trap of overlapping layers */
+			err = -ELOOP;
+			goto out_err;
+		}
+
+		if (last_element)
+			d->is_dir = true;
+		if (d->last)
+			goto out;
+
+		if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) {
+			d->stop = true;
+			if (last_element)
+				d->opaque = true;
+			goto out;
+		}
+	}
+	err = ovl_check_redirect(&path, d, prelen, post);
+	if (err)
+		goto out_err;
+out:
+	*ret = this;
+	return 0;
+
+put_and_out:
+	dput(this);
+	this = NULL;
+	goto out;
+
+out_err:
+	dput(this);
+	return err;
+}
+
+static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
+			    struct dentry **ret, bool drop_negative)
+{
+	/* Counting down from the end, since the prefix can change */
+	size_t rem = d->name.len - 1;
+	struct dentry *dentry = NULL;
+	int err;
+
+	if (d->name.name[0] != '/')
+		return ovl_lookup_single(base, d, d->name.name, d->name.len,
+					 0, "", ret, drop_negative);
+
+	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
+		const char *s = d->name.name + d->name.len - rem;
+		const char *next = strchrnul(s, '/');
+		size_t thislen = next - s;
+		bool end = !next[0];
+
+		/* Verify we did not go off the rails */
+		if (WARN_ON(s[-1] != '/'))
+			return -EIO;
+
+		err = ovl_lookup_single(base, d, s, thislen,
+					d->name.len - rem, next, &base,
+					drop_negative);
+		dput(dentry);
+		if (err)
+			return err;
+		dentry = base;
+		if (end)
+			break;
+
+		rem -= thislen + 1;
+
+		if (WARN_ON(rem >= d->name.len))
+			return -EIO;
+	}
+	*ret = dentry;
+	return 0;
+}
+
+
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
+			struct dentry *upperdentry, struct ovl_path **stackp)
+{
+	struct dentry *origin = NULL;
+	int i;
+
+	for (i = 1; i < ofs->numlayer; i++) {
+		/*
+		 * If lower fs uuid is not unique among lower fs we cannot match
+		 * fh->uuid to layer.
+		 */
+		if (ofs->layers[i].fsid &&
+		    ofs->layers[i].fs->bad_uuid)
+			continue;
+
+		origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
+					    connected);
+		if (origin)
+			break;
+	}
+
+	if (!origin)
+		return -ESTALE;
+	else if (IS_ERR(origin))
+		return PTR_ERR(origin);
+
+	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
+	    inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
+		goto invalid;
+
+	if (!*stackp)
+		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
+	if (!*stackp) {
+		dput(origin);
+		return -ENOMEM;
+	}
+	**stackp = (struct ovl_path){
+		.dentry = origin,
+		.layer = &ofs->layers[i]
+	};
+
+	return 0;
+
+invalid:
+	pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
+			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
+			    d_inode(origin)->i_mode & S_IFMT);
+	dput(origin);
+	return -ESTALE;
+}
+
+static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
+			    struct ovl_path **stackp)
+{
+	struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
+	int err;
+
+	if (IS_ERR_OR_NULL(fh))
+		return PTR_ERR(fh);
+
+	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
+	kfree(fh);
+
+	if (err) {
+		if (err == -ESTALE)
+			return 0;
+		return err;
+	}
+
+	return 0;
+}
+
+/*
+ * Verify that @fh matches the file handle stored in xattr @name.
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
+			 enum ovl_xattr ox, const struct ovl_fh *fh)
+{
+	struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
+	int err = 0;
+
+	if (!ofh)
+		return -ENODATA;
+
+	if (IS_ERR(ofh))
+		return PTR_ERR(ofh);
+
+	if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
+		err = -ESTALE;
+
+	kfree(ofh);
+	return err;
+}
+
+/*
+ * Verify that @real dentry matches the file handle stored in xattr @name.
+ *
+ * If @set is true and there is no stored file handle, encode @real and store
+ * file handle in xattr @name.
+ *
+ * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
+ */
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+		      enum ovl_xattr ox, struct dentry *real, bool is_upper,
+		      bool set)
+{
+	struct inode *inode;
+	struct ovl_fh *fh;
+	int err;
+
+	fh = ovl_encode_real_fh(ofs, real, is_upper);
+	err = PTR_ERR(fh);
+	if (IS_ERR(fh)) {
+		fh = NULL;
+		goto fail;
+	}
+
+	err = ovl_verify_fh(ofs, dentry, ox, fh);
+	if (set && err == -ENODATA)
+		err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
+	if (err)
+		goto fail;
+
+out:
+	kfree(fh);
+	return err;
+
+fail:
+	inode = d_inode(real);
+	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+			    is_upper ? "upper" : "origin", real,
+			    inode ? inode->i_ino : 0, err);
+	goto out;
+}
+
+/* Get upper dentry from index */
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
+{
+	struct ovl_fh *fh;
+	struct dentry *upper;
+
+	if (!d_is_dir(index))
+		return dget(index);
+
+	fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
+	if (IS_ERR_OR_NULL(fh))
+		return ERR_CAST(fh);
+
+	upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
+	kfree(fh);
+
+	if (IS_ERR_OR_NULL(upper))
+		return upper ?: ERR_PTR(-ESTALE);
+
+	if (!d_is_dir(upper)) {
+		pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
+				    index, upper);
+		dput(upper);
+		return ERR_PTR(-EIO);
+	}
+
+	return upper;
+}
+
+/*
+ * Verify that an index entry name matches the origin file handle stored in
+ * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
+ * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
+ */
+int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
+{
+	struct ovl_fh *fh = NULL;
+	size_t len;
+	struct ovl_path origin = { };
+	struct ovl_path *stack = &origin;
+	struct dentry *upper = NULL;
+	int err;
+
+	if (!d_inode(index))
+		return 0;
+
+	err = -EINVAL;
+	if (index->d_name.len < sizeof(struct ovl_fb)*2)
+		goto fail;
+
+	err = -ENOMEM;
+	len = index->d_name.len / 2;
+	fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
+	if (!fh)
+		goto fail;
+
+	err = -EINVAL;
+	if (hex2bin(fh->buf, index->d_name.name, len))
+		goto fail;
+
+	err = ovl_check_fb_len(&fh->fb, len);
+	if (err)
+		goto fail;
+
+	/*
+	 * Whiteout index entries are used as an indication that an exported
+	 * overlay file handle should be treated as stale (i.e. after unlink
+	 * of the overlay inode). These entries contain no origin xattr.
+	 */
+	if (ovl_is_whiteout(index))
+		goto out;
+
+	/*
+	 * Verifying directory index entries are not stale is expensive, so
+	 * only verify stale dir index if NFS export is enabled.
+	 */
+	if (d_is_dir(index) && !ofs->config.nfs_export)
+		goto out;
+
+	/*
+	 * Directory index entries should have 'upper' xattr pointing to the
+	 * real upper dir. Non-dir index entries are hardlinks to the upper
+	 * real inode. For non-dir index, we can read the copy up origin xattr
+	 * directly from the index dentry, but for dir index we first need to
+	 * decode the upper directory.
+	 */
+	upper = ovl_index_upper(ofs, index);
+	if (IS_ERR_OR_NULL(upper)) {
+		err = PTR_ERR(upper);
+		/*
+		 * Directory index entries with no 'upper' xattr need to be
+		 * removed. When dir index entry has a stale 'upper' xattr,
+		 * we assume that upper dir was removed and we treat the dir
+		 * index as orphan entry that needs to be whited out.
+		 */
+		if (err == -ESTALE)
+			goto orphan;
+		else if (!err)
+			err = -ESTALE;
+		goto fail;
+	}
+
+	err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
+	dput(upper);
+	if (err)
+		goto fail;
+
+	/* Check if non-dir index is orphan and don't warn before cleaning it */
+	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
+		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
+		if (err)
+			goto fail;
+
+		if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
+			goto orphan;
+	}
+
+out:
+	dput(origin.dentry);
+	kfree(fh);
+	return err;
+
+fail:
+	pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
+			    index, d_inode(index)->i_mode & S_IFMT, err);
+	goto out;
+
+orphan:
+	pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
+			    index, d_inode(index)->i_mode & S_IFMT,
+			    d_inode(index)->i_nlink);
+	err = -ENOENT;
+	goto out;
+}
+
+static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
+{
+	char *n, *s;
+
+	n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
+	if (!n)
+		return -ENOMEM;
+
+	s  = bin2hex(n, fh->buf, fh->fb.len);
+	*name = (struct qstr) QSTR_INIT(n, s - n);
+
+	return 0;
+
+}
+
+/*
+ * Lookup in indexdir for the index entry of a lower real inode or a copy up
+ * origin inode. The index entry name is the hex representation of the lower
+ * inode file handle.
+ *
+ * If the index dentry in negative, then either no lower aliases have been
+ * copied up yet, or aliases have been copied up in older kernels and are
+ * not indexed.
+ *
+ * If the index dentry for a copy up origin inode is positive, but points
+ * to an inode different than the upper inode, then either the upper inode
+ * has been copied up and not indexed or it was indexed, but since then
+ * index dir was cleared. Either way, that index cannot be used to identify
+ * the overlay inode.
+ */
+int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+		       struct qstr *name)
+{
+	struct ovl_fh *fh;
+	int err;
+
+	fh = ovl_encode_real_fh(ofs, origin, false);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
+	err = ovl_get_index_name_fh(fh, name);
+
+	kfree(fh);
+	return err;
+}
+
+/* Lookup index by file handle for NFS export */
+struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
+{
+	struct dentry *index;
+	struct qstr name;
+	int err;
+
+	err = ovl_get_index_name_fh(fh, &name);
+	if (err)
+		return ERR_PTR(err);
+
+	index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
+	kfree(name.name);
+	if (IS_ERR(index)) {
+		if (PTR_ERR(index) == -ENOENT)
+			index = NULL;
+		return index;
+	}
+
+	if (ovl_is_whiteout(index))
+		err = -ESTALE;
+	else if (ovl_dentry_weird(index))
+		err = -EIO;
+	else
+		return index;
+
+	dput(index);
+	return ERR_PTR(err);
+}
+
+struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+				struct dentry *origin, bool verify)
+{
+	struct dentry *index;
+	struct inode *inode;
+	struct qstr name;
+	bool is_dir = d_is_dir(origin);
+	int err;
+
+	err = ovl_get_index_name(ofs, origin, &name);
+	if (err)
+		return ERR_PTR(err);
+
+	index = lookup_one_positive_unlocked(ovl_upper_mnt_userns(ofs), name.name,
+					     ofs->indexdir, name.len);
+	if (IS_ERR(index)) {
+		err = PTR_ERR(index);
+		if (err == -ENOENT) {
+			index = NULL;
+			goto out;
+		}
+		pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
+				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
+				    d_inode(origin)->i_ino, name.len, name.name,
+				    err);
+		goto out;
+	}
+
+	inode = d_inode(index);
+	if (ovl_is_whiteout(index) && !verify) {
+		/*
+		 * When index lookup is called with !verify for decoding an
+		 * overlay file handle, a whiteout index implies that decode
+		 * should treat file handle as stale and no need to print a
+		 * warning about it.
+		 */
+		dput(index);
+		index = ERR_PTR(-ESTALE);
+		goto out;
+	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
+		   inode_wrong_type(inode, d_inode(origin)->i_mode)) {
+		/*
+		 * Index should always be of the same file type as origin
+		 * except for the case of a whiteout index. A whiteout
+		 * index should only exist if all lower aliases have been
+		 * unlinked, which means that finding a lower origin on lookup
+		 * whose index is a whiteout should be treated as an error.
+		 */
+		pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
+				    index, d_inode(index)->i_mode & S_IFMT,
+				    d_inode(origin)->i_mode & S_IFMT);
+		goto fail;
+	} else if (is_dir && verify) {
+		if (!upper) {
+			pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
+					    origin, index);
+			goto fail;
+		}
+
+		/* Verify that dir index 'upper' xattr points to upper dir */
+		err = ovl_verify_upper(ofs, index, upper, false);
+		if (err) {
+			if (err == -ESTALE) {
+				pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
+						    upper, origin, index);
+			}
+			goto fail;
+		}
+	} else if (upper && d_inode(upper) != inode) {
+		goto out_dput;
+	}
+out:
+	kfree(name.name);
+	return index;
+
+out_dput:
+	dput(index);
+	index = NULL;
+	goto out;
+
+fail:
+	dput(index);
+	index = ERR_PTR(-EIO);
+	goto out;
+}
+
+/*
+ * Returns next layer in stack starting from top.
+ * Returns -1 if this is the last layer.
+ */
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	BUG_ON(idx < 0);
+	if (idx == 0) {
+		ovl_path_upper(dentry, path);
+		if (path->dentry)
+			return oe->numlower ? 1 : -1;
+		idx++;
+	}
+	BUG_ON(idx > oe->numlower);
+	path->dentry = oe->lowerstack[idx - 1].dentry;
+	path->mnt = oe->lowerstack[idx - 1].layer->mnt;
+
+	return (idx < oe->numlower) ? idx + 1 : -1;
+}
+
+/* Fix missing 'origin' xattr */
+static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
+			  struct dentry *lower, struct dentry *upper)
+{
+	int err;
+
+	if (ovl_check_origin_xattr(ofs, upper))
+		return 0;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		return err;
+
+	err = ovl_set_origin(ofs, lower, upper);
+	if (!err)
+		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
+
+	ovl_drop_write(dentry);
+	return err;
+}
+
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+			  unsigned int flags)
+{
+	struct ovl_entry *oe;
+	const struct cred *old_cred;
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
+	struct ovl_path *stack = NULL, *origin_path = NULL;
+	struct dentry *upperdir, *upperdentry = NULL;
+	struct dentry *origin = NULL;
+	struct dentry *index = NULL;
+	unsigned int ctr = 0;
+	struct inode *inode = NULL;
+	bool upperopaque = false;
+	char *upperredirect = NULL;
+	struct dentry *this;
+	unsigned int i;
+	int err;
+	bool uppermetacopy = false;
+	struct ovl_lookup_data d = {
+		.sb = dentry->d_sb,
+		.name = dentry->d_name,
+		.is_dir = false,
+		.opaque = false,
+		.stop = false,
+		.last = ofs->config.redirect_follow ? false : !poe->numlower,
+		.redirect = NULL,
+		.metacopy = false,
+	};
+
+	if (dentry->d_name.len > ofs->namelen)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	upperdir = ovl_dentry_upper(dentry->d_parent);
+	if (upperdir) {
+		d.mnt = ovl_upper_mnt(ofs);
+		err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
+		if (err)
+			goto out;
+
+		if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
+			dput(upperdentry);
+			err = -EREMOTE;
+			goto out;
+		}
+		if (upperdentry && !d.is_dir) {
+			/*
+			 * Lookup copy up origin by decoding origin file handle.
+			 * We may get a disconnected dentry, which is fine,
+			 * because we only need to hold the origin inode in
+			 * cache and use its inode number.  We may even get a
+			 * connected dentry, that is not under any of the lower
+			 * layers root.  That is also fine for using it's inode
+			 * number - it's the same as if we held a reference
+			 * to a dentry in lower layer that was moved under us.
+			 */
+			err = ovl_check_origin(ofs, upperdentry, &origin_path);
+			if (err)
+				goto out_put_upper;
+
+			if (d.metacopy)
+				uppermetacopy = true;
+		}
+
+		if (d.redirect) {
+			err = -ENOMEM;
+			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
+			if (!upperredirect)
+				goto out_put_upper;
+			if (d.redirect[0] == '/')
+				poe = roe;
+		}
+		upperopaque = d.opaque;
+	}
+
+	if (!d.stop && poe->numlower) {
+		err = -ENOMEM;
+		stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path),
+				GFP_KERNEL);
+		if (!stack)
+			goto out_put_upper;
+	}
+
+	for (i = 0; !d.stop && i < poe->numlower; i++) {
+		struct ovl_path lower = poe->lowerstack[i];
+
+		if (!ofs->config.redirect_follow)
+			d.last = i == poe->numlower - 1;
+		else
+			d.last = lower.layer->idx == roe->numlower;
+
+		d.mnt = lower.layer->mnt;
+		err = ovl_lookup_layer(lower.dentry, &d, &this, false);
+		if (err)
+			goto out_put;
+
+		if (!this)
+			continue;
+
+		if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
+			dput(this);
+			err = -EPERM;
+			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
+			goto out_put;
+		}
+
+		/*
+		 * If no origin fh is stored in upper of a merge dir, store fh
+		 * of lower dir and set upper parent "impure".
+		 */
+		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
+			err = ovl_fix_origin(ofs, dentry, this, upperdentry);
+			if (err) {
+				dput(this);
+				goto out_put;
+			}
+		}
+
+		/*
+		 * When "verify_lower" feature is enabled, do not merge with a
+		 * lower dir that does not match a stored origin xattr. In any
+		 * case, only verified origin is used for index lookup.
+		 *
+		 * For non-dir dentry, if index=on, then ensure origin
+		 * matches the dentry found using path based lookup,
+		 * otherwise error out.
+		 */
+		if (upperdentry && !ctr &&
+		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
+		     (!d.is_dir && ofs->config.index && origin_path))) {
+			err = ovl_verify_origin(ofs, upperdentry, this, false);
+			if (err) {
+				dput(this);
+				if (d.is_dir)
+					break;
+				goto out_put;
+			}
+			origin = this;
+		}
+
+		if (d.metacopy && ctr) {
+			/*
+			 * Do not store intermediate metacopy dentries in
+			 * lower chain, except top most lower metacopy dentry.
+			 * Continue the loop so that if there is an absolute
+			 * redirect on this dentry, poe can be reset to roe.
+			 */
+			dput(this);
+			this = NULL;
+		} else {
+			stack[ctr].dentry = this;
+			stack[ctr].layer = lower.layer;
+			ctr++;
+		}
+
+		/*
+		 * Following redirects can have security consequences: it's like
+		 * a symlink into the lower layer without the permission checks.
+		 * This is only a problem if the upper layer is untrusted (e.g
+		 * comes from an USB drive).  This can allow a non-readable file
+		 * or directory to become readable.
+		 *
+		 * Only following redirects when redirects are enabled disables
+		 * this attack vector when not necessary.
+		 */
+		err = -EPERM;
+		if (d.redirect && !ofs->config.redirect_follow) {
+			pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
+					    dentry);
+			goto out_put;
+		}
+
+		if (d.stop)
+			break;
+
+		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
+			poe = roe;
+			/* Find the current layer on the root dentry */
+			i = lower.layer->idx - 1;
+		}
+	}
+
+	/*
+	 * For regular non-metacopy upper dentries, there is no lower
+	 * path based lookup, hence ctr will be zero. If a dentry is found
+	 * using ORIGIN xattr on upper, install it in stack.
+	 *
+	 * For metacopy dentry, path based lookup will find lower dentries.
+	 * Just make sure a corresponding data dentry has been found.
+	 */
+	if (d.metacopy || (uppermetacopy && !ctr)) {
+		pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
+				    dentry);
+		err = -EIO;
+		goto out_put;
+	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
+		if (WARN_ON(stack != NULL)) {
+			err = -EIO;
+			goto out_put;
+		}
+		stack = origin_path;
+		ctr = 1;
+		origin = origin_path->dentry;
+		origin_path = NULL;
+	}
+
+	/*
+	 * Always lookup index if there is no-upperdentry.
+	 *
+	 * For the case of upperdentry, we have set origin by now if it
+	 * needed to be set. There are basically three cases.
+	 *
+	 * For directories, lookup index by lower inode and verify it matches
+	 * upper inode. We only trust dir index if we verified that lower dir
+	 * matches origin, otherwise dir index entries may be inconsistent
+	 * and we ignore them.
+	 *
+	 * For regular upper, we already set origin if upper had ORIGIN
+	 * xattr. There is no verification though as there is no path
+	 * based dentry lookup in lower in this case.
+	 *
+	 * For metacopy upper, we set a verified origin already if index
+	 * is enabled and if upper had an ORIGIN xattr.
+	 *
+	 */
+	if (!upperdentry && ctr)
+		origin = stack[0].dentry;
+
+	if (origin && ovl_indexdir(dentry->d_sb) &&
+	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
+		index = ovl_lookup_index(ofs, upperdentry, origin, true);
+		if (IS_ERR(index)) {
+			err = PTR_ERR(index);
+			index = NULL;
+			goto out_put;
+		}
+	}
+
+	oe = ovl_alloc_entry(ctr);
+	err = -ENOMEM;
+	if (!oe)
+		goto out_put;
+
+	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
+	dentry->d_fsdata = oe;
+
+	if (upperopaque)
+		ovl_dentry_set_opaque(dentry);
+
+	if (upperdentry)
+		ovl_dentry_set_upper_alias(dentry);
+	else if (index) {
+		struct path upperpath = {
+			.dentry = upperdentry = dget(index),
+			.mnt = ovl_upper_mnt(ofs),
+		};
+
+		upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
+		if (IS_ERR(upperredirect)) {
+			err = PTR_ERR(upperredirect);
+			upperredirect = NULL;
+			goto out_free_oe;
+		}
+		err = ovl_check_metacopy_xattr(ofs, &upperpath);
+		if (err < 0)
+			goto out_free_oe;
+		uppermetacopy = err;
+	}
+
+	if (upperdentry || ctr) {
+		struct ovl_inode_params oip = {
+			.upperdentry = upperdentry,
+			.lowerpath = stack,
+			.index = index,
+			.numlower = ctr,
+			.redirect = upperredirect,
+			.lowerdata = (ctr > 1 && !d.is_dir) ?
+				      stack[ctr - 1].dentry : NULL,
+		};
+
+		inode = ovl_get_inode(dentry->d_sb, &oip);
+		err = PTR_ERR(inode);
+		if (IS_ERR(inode))
+			goto out_free_oe;
+		if (upperdentry && !uppermetacopy)
+			ovl_set_flag(OVL_UPPERDATA, inode);
+	}
+
+	ovl_dentry_init_reval(dentry, upperdentry);
+
+	revert_creds(old_cred);
+	if (origin_path) {
+		dput(origin_path->dentry);
+		kfree(origin_path);
+	}
+	dput(index);
+	kfree(stack);
+	kfree(d.redirect);
+	return d_splice_alias(inode, dentry);
+
+out_free_oe:
+	dentry->d_fsdata = NULL;
+	kfree(oe);
+out_put:
+	dput(index);
+	for (i = 0; i < ctr; i++)
+		dput(stack[i].dentry);
+	kfree(stack);
+out_put_upper:
+	if (origin_path) {
+		dput(origin_path->dentry);
+		kfree(origin_path);
+	}
+	dput(upperdentry);
+	kfree(upperredirect);
+out:
+	kfree(d.redirect);
+	revert_creds(old_cred);
+	return ERR_PTR(err);
+}
+
+bool ovl_lower_positive(struct dentry *dentry)
+{
+	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+	const struct qstr *name = &dentry->d_name;
+	const struct cred *old_cred;
+	unsigned int i;
+	bool positive = false;
+	bool done = false;
+
+	/*
+	 * If dentry is negative, then lower is positive iff this is a
+	 * whiteout.
+	 */
+	if (!dentry->d_inode)
+		return ovl_dentry_is_opaque(dentry);
+
+	/* Negative upper -> positive lower */
+	if (!ovl_dentry_upper(dentry))
+		return true;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	/* Positive upper -> have to look up lower to see whether it exists */
+	for (i = 0; !done && !positive && i < poe->numlower; i++) {
+		struct dentry *this;
+		struct dentry *lowerdir = poe->lowerstack[i].dentry;
+
+		this = lookup_one_positive_unlocked(mnt_user_ns(poe->lowerstack[i].layer->mnt),
+						   name->name, lowerdir, name->len);
+		if (IS_ERR(this)) {
+			switch (PTR_ERR(this)) {
+			case -ENOENT:
+			case -ENAMETOOLONG:
+				break;
+
+			default:
+				/*
+				 * Assume something is there, we just couldn't
+				 * access it.
+				 */
+				positive = true;
+				break;
+			}
+		} else {
+			positive = !ovl_is_whiteout(this);
+			done = true;
+			dput(this);
+		}
+	}
+	revert_creds(old_cred);
+
+	return positive;
+}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
new file mode 100644
index 000000000..a3c59ac01
--- /dev/null
+++ b/fs/overlayfs/overlayfs.h
@@ -0,0 +1,698 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/uuid.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include "ovl_entry.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "overlayfs: " fmt
+
+enum ovl_path_type {
+	__OVL_PATH_UPPER	= (1 << 0),
+	__OVL_PATH_MERGE	= (1 << 1),
+	__OVL_PATH_ORIGIN	= (1 << 2),
+};
+
+#define OVL_TYPE_UPPER(type)	((type) & __OVL_PATH_UPPER)
+#define OVL_TYPE_MERGE(type)	((type) & __OVL_PATH_MERGE)
+#define OVL_TYPE_ORIGIN(type)	((type) & __OVL_PATH_ORIGIN)
+
+#define OVL_XATTR_NAMESPACE "overlay."
+#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
+
+enum ovl_xattr {
+	OVL_XATTR_OPAQUE,
+	OVL_XATTR_REDIRECT,
+	OVL_XATTR_ORIGIN,
+	OVL_XATTR_IMPURE,
+	OVL_XATTR_NLINK,
+	OVL_XATTR_UPPER,
+	OVL_XATTR_METACOPY,
+	OVL_XATTR_PROTATTR,
+};
+
+enum ovl_inode_flag {
+	/* Pure upper dir that may contain non pure upper entries */
+	OVL_IMPURE,
+	/* Non-merge dir that may contain whiteout entries */
+	OVL_WHITEOUTS,
+	OVL_INDEX,
+	OVL_UPPERDATA,
+	/* Inode number will remain constant over copy up. */
+	OVL_CONST_INO,
+};
+
+enum ovl_entry_flag {
+	OVL_E_UPPER_ALIAS,
+	OVL_E_OPAQUE,
+	OVL_E_CONNECTED,
+};
+
+enum {
+	OVL_XINO_OFF,
+	OVL_XINO_AUTO,
+	OVL_XINO_ON,
+};
+
+/*
+ * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
+ * where:
+ * origin.fh	- exported file handle of the lower file
+ * origin.uuid	- uuid of the lower filesystem
+ */
+#define OVL_FH_VERSION	0
+#define OVL_FH_MAGIC	0xfb
+
+/* CPU byte order required for fid decoding:  */
+#define OVL_FH_FLAG_BIG_ENDIAN	(1 << 0)
+#define OVL_FH_FLAG_ANY_ENDIAN	(1 << 1)
+/* Is the real inode encoded in fid an upper inode? */
+#define OVL_FH_FLAG_PATH_UPPER	(1 << 2)
+
+#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \
+			 OVL_FH_FLAG_PATH_UPPER)
+
+#if defined(__LITTLE_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN 0
+#elif defined(__BIG_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN
+#else
+#error Endianness not defined
+#endif
+
+/* The type used to be returned by overlay exportfs for misaligned fid */
+#define OVL_FILEID_V0	0xfb
+/* The type returned by overlay exportfs for 32bit aligned fid */
+#define OVL_FILEID_V1	0xf8
+
+/* On-disk format for "origin" file handle */
+struct ovl_fb {
+	u8 version;	/* 0 */
+	u8 magic;	/* 0xfb */
+	u8 len;		/* size of this header + size of fid */
+	u8 flags;	/* OVL_FH_FLAG_* */
+	u8 type;	/* fid_type of fid */
+	uuid_t uuid;	/* uuid of filesystem */
+	u32 fid[];	/* file identifier should be 32bit aligned in-memory */
+} __packed;
+
+/* In-memory and on-wire format for overlay file handle */
+struct ovl_fh {
+	u8 padding[3];	/* make sure fb.fid is 32bit aligned */
+	union {
+		struct ovl_fb fb;
+		DECLARE_FLEX_ARRAY(u8, buf);
+	};
+} __packed;
+
+#define OVL_FH_WIRE_OFFSET	offsetof(struct ovl_fh, fb)
+#define OVL_FH_LEN(fh)		(OVL_FH_WIRE_OFFSET + (fh)->fb.len)
+#define OVL_FH_FID_OFFSET	(OVL_FH_WIRE_OFFSET + \
+				 offsetof(struct ovl_fb, fid))
+
+extern const char *const ovl_xattr_table[][2];
+static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox)
+{
+	return ovl_xattr_table[ox][ofs->config.userxattr];
+}
+
+/*
+ * When changing ownership of an upper object map the intended ownership
+ * according to the upper layer's idmapping. When an upper mount idmaps files
+ * that are stored on-disk as owned by id 1001 to id 1000 this means stat on
+ * this object will report it as being owned by id 1000 when calling stat via
+ * the upper mount.
+ * In order to change ownership of an object so stat reports id 1000 when
+ * called on an idmapped upper mount the value written to disk - i.e., the
+ * value stored in ia_*id - must 1001. The mount mapping helper will thus take
+ * care to map 1000 to 1001.
+ * The mnt idmapping helpers are nops if the upper layer isn't idmapped.
+ */
+static inline int ovl_do_notify_change(struct ovl_fs *ofs,
+				       struct dentry *upperdentry,
+				       struct iattr *attr)
+{
+	return notify_change(ovl_upper_mnt_userns(ofs), upperdentry, attr, NULL);
+}
+
+static inline int ovl_do_rmdir(struct ovl_fs *ofs,
+			       struct inode *dir, struct dentry *dentry)
+{
+	int err = vfs_rmdir(ovl_upper_mnt_userns(ofs), dir, dentry);
+
+	pr_debug("rmdir(%pd2) = %i\n", dentry, err);
+	return err;
+}
+
+static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir,
+				struct dentry *dentry)
+{
+	int err = vfs_unlink(ovl_upper_mnt_userns(ofs), dir, dentry, NULL);
+
+	pr_debug("unlink(%pd2) = %i\n", dentry, err);
+	return err;
+}
+
+static inline int ovl_do_link(struct ovl_fs *ofs, struct dentry *old_dentry,
+			      struct inode *dir, struct dentry *new_dentry)
+{
+	int err = vfs_link(old_dentry, ovl_upper_mnt_userns(ofs), dir, new_dentry, NULL);
+
+	pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err);
+	return err;
+}
+
+static inline int ovl_do_create(struct ovl_fs *ofs,
+				struct inode *dir, struct dentry *dentry,
+				umode_t mode)
+{
+	int err = vfs_create(ovl_upper_mnt_userns(ofs), dir, dentry, mode, true);
+
+	pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return err;
+}
+
+static inline int ovl_do_mkdir(struct ovl_fs *ofs,
+			       struct inode *dir, struct dentry *dentry,
+			       umode_t mode)
+{
+	int err = vfs_mkdir(ovl_upper_mnt_userns(ofs), dir, dentry, mode);
+	pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return err;
+}
+
+static inline int ovl_do_mknod(struct ovl_fs *ofs,
+			       struct inode *dir, struct dentry *dentry,
+			       umode_t mode, dev_t dev)
+{
+	int err = vfs_mknod(ovl_upper_mnt_userns(ofs), dir, dentry, mode, dev);
+
+	pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
+	return err;
+}
+
+static inline int ovl_do_symlink(struct ovl_fs *ofs,
+				 struct inode *dir, struct dentry *dentry,
+				 const char *oldname)
+{
+	int err = vfs_symlink(ovl_upper_mnt_userns(ofs), dir, dentry, oldname);
+
+	pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
+	return err;
+}
+
+static inline ssize_t ovl_do_getxattr(const struct path *path, const char *name,
+				      void *value, size_t size)
+{
+	int err, len;
+
+	WARN_ON(path->dentry->d_sb != path->mnt->mnt_sb);
+
+	err = vfs_getxattr(mnt_user_ns(path->mnt), path->dentry,
+			       name, value, size);
+	len = (value && err > 0) ? err : 0;
+
+	pr_debug("getxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n",
+		 path->dentry, name, min(len, 48), value, size, err);
+	return err;
+}
+
+static inline ssize_t ovl_getxattr_upper(struct ovl_fs *ofs,
+					 struct dentry *upperdentry,
+					 enum ovl_xattr ox, void *value,
+					 size_t size)
+{
+	struct path upperpath = {
+		.dentry = upperdentry,
+		.mnt = ovl_upper_mnt(ofs),
+	};
+
+	return ovl_do_getxattr(&upperpath, ovl_xattr(ofs, ox), value, size);
+}
+
+static inline ssize_t ovl_path_getxattr(struct ovl_fs *ofs,
+					 const struct path *path,
+					 enum ovl_xattr ox, void *value,
+					 size_t size)
+{
+	return ovl_do_getxattr(path, ovl_xattr(ofs, ox), value, size);
+}
+
+static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
+				  const char *name, const void *value,
+				  size_t size, int flags)
+{
+	int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name,
+			       value, size, flags);
+
+	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n",
+		 dentry, name, min((int)size, 48), value, size, flags, err);
+	return err;
+}
+
+static inline int ovl_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
+			       enum ovl_xattr ox, const void *value,
+			       size_t size)
+{
+	return ovl_do_setxattr(ofs, dentry, ovl_xattr(ofs, ox), value, size, 0);
+}
+
+static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
+				     const char *name)
+{
+	int err = vfs_removexattr(ovl_upper_mnt_userns(ofs), dentry, name);
+	pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
+	return err;
+}
+
+static inline int ovl_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
+				  enum ovl_xattr ox)
+{
+	return ovl_do_removexattr(ofs, dentry, ovl_xattr(ofs, ox));
+}
+
+static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir,
+				struct dentry *olddentry, struct inode *newdir,
+				struct dentry *newdentry, unsigned int flags)
+{
+	int err;
+	struct renamedata rd = {
+		.old_mnt_userns	= ovl_upper_mnt_userns(ofs),
+		.old_dir 	= olddir,
+		.old_dentry 	= olddentry,
+		.new_mnt_userns	= ovl_upper_mnt_userns(ofs),
+		.new_dir 	= newdir,
+		.new_dentry 	= newdentry,
+		.flags 		= flags,
+	};
+
+	pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
+	err = vfs_rename(&rd);
+	if (err) {
+		pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
+			 olddentry, newdentry, err);
+	}
+	return err;
+}
+
+static inline int ovl_do_whiteout(struct ovl_fs *ofs,
+				  struct inode *dir, struct dentry *dentry)
+{
+	int err = vfs_whiteout(ovl_upper_mnt_userns(ofs), dir, dentry);
+	pr_debug("whiteout(%pd2) = %i\n", dentry, err);
+	return err;
+}
+
+static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs,
+					  struct dentry *dentry, umode_t mode)
+{
+	struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry };
+	struct file *file = vfs_tmpfile_open(ovl_upper_mnt_userns(ofs), &path, mode,
+					O_LARGEFILE | O_WRONLY, current_cred());
+	int err = PTR_ERR_OR_ZERO(file);
+
+	pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return file;
+}
+
+static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs,
+					      const char *name,
+					      struct dentry *base, int len)
+{
+	return lookup_one(ovl_upper_mnt_userns(ofs), name, base, len);
+}
+
+static inline bool ovl_open_flags_need_copy_up(int flags)
+{
+	if (!flags)
+		return false;
+
+	return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
+}
+
+static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs)
+{
+	/*
+	 * To avoid regressions in existing setups with overlay lower offline
+	 * changes, we allow lower changes only if none of the new features
+	 * are used.
+	 */
+	return (!ofs->config.index && !ofs->config.metacopy &&
+		!ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON);
+}
+
+
+/* util.c */
+int ovl_want_write(struct dentry *dentry);
+void ovl_drop_write(struct dentry *dentry);
+struct dentry *ovl_workdir(struct dentry *dentry);
+const struct cred *ovl_override_creds(struct super_block *sb);
+int ovl_can_decode_fh(struct super_block *sb);
+struct dentry *ovl_indexdir(struct super_block *sb);
+bool ovl_index_all(struct super_block *sb);
+bool ovl_verify_lower(struct super_block *sb);
+struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
+bool ovl_dentry_remote(struct dentry *dentry);
+void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry);
+void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry);
+void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry,
+			   unsigned int mask);
+bool ovl_dentry_weird(struct dentry *dentry);
+enum ovl_path_type ovl_path_type(struct dentry *dentry);
+void ovl_path_upper(struct dentry *dentry, struct path *path);
+void ovl_path_lower(struct dentry *dentry, struct path *path);
+void ovl_path_lowerdata(struct dentry *dentry, struct path *path);
+struct inode *ovl_i_path_real(struct inode *inode, struct path *path);
+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
+enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path);
+struct dentry *ovl_dentry_upper(struct dentry *dentry);
+struct dentry *ovl_dentry_lower(struct dentry *dentry);
+struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
+const struct ovl_layer *ovl_i_layer_lower(struct inode *inode);
+const struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
+struct dentry *ovl_dentry_real(struct dentry *dentry);
+struct dentry *ovl_i_dentry_upper(struct inode *inode);
+struct inode *ovl_inode_upper(struct inode *inode);
+struct inode *ovl_inode_lower(struct inode *inode);
+struct inode *ovl_inode_lowerdata(struct inode *inode);
+struct inode *ovl_inode_real(struct inode *inode);
+struct inode *ovl_inode_realdata(struct inode *inode);
+struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
+void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
+void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
+void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry);
+bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
+bool ovl_dentry_is_opaque(struct dentry *dentry);
+bool ovl_dentry_is_whiteout(struct dentry *dentry);
+void ovl_dentry_set_opaque(struct dentry *dentry);
+bool ovl_dentry_has_upper_alias(struct dentry *dentry);
+void ovl_dentry_set_upper_alias(struct dentry *dentry);
+bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
+bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags);
+bool ovl_has_upperdata(struct inode *inode);
+void ovl_set_upperdata(struct inode *inode);
+bool ovl_redirect_dir(struct super_block *sb);
+const char *ovl_dentry_get_redirect(struct dentry *dentry);
+void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
+void ovl_dir_modified(struct dentry *dentry, bool impurity);
+u64 ovl_dentry_version_get(struct dentry *dentry);
+bool ovl_is_whiteout(struct dentry *dentry);
+struct file *ovl_path_open(const struct path *path, int flags);
+int ovl_copy_up_start(struct dentry *dentry, int flags);
+void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_already_copied_up(struct dentry *dentry, int flags);
+bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
+			      enum ovl_xattr ox);
+bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
+
+static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs,
+					  struct dentry *upperdentry)
+{
+	struct path upperpath = {
+		.dentry = upperdentry,
+		.mnt = ovl_upper_mnt(ofs),
+	};
+	return ovl_path_check_origin_xattr(ofs, &upperpath);
+}
+
+int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
+		       enum ovl_xattr ox, const void *value, size_t size,
+		       int xerr);
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+bool ovl_inuse_trylock(struct dentry *dentry);
+void ovl_inuse_unlock(struct dentry *dentry);
+bool ovl_is_inuse(struct dentry *dentry);
+bool ovl_need_index(struct dentry *dentry);
+int ovl_nlink_start(struct dentry *dentry);
+void ovl_nlink_end(struct dentry *dentry);
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path);
+bool ovl_is_metacopy_dentry(struct dentry *dentry);
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding);
+int ovl_sync_status(struct ovl_fs *ofs);
+
+static inline void ovl_set_flag(unsigned long flag, struct inode *inode)
+{
+	set_bit(flag, &OVL_I(inode)->flags);
+}
+
+static inline void ovl_clear_flag(unsigned long flag, struct inode *inode)
+{
+	clear_bit(flag, &OVL_I(inode)->flags);
+}
+
+static inline bool ovl_test_flag(unsigned long flag, struct inode *inode)
+{
+	return test_bit(flag, &OVL_I(inode)->flags);
+}
+
+static inline bool ovl_is_impuredir(struct super_block *sb,
+				    struct dentry *upperdentry)
+{
+	struct ovl_fs *ofs = OVL_FS(sb);
+	struct path upperpath = {
+		.dentry = upperdentry,
+		.mnt = ovl_upper_mnt(ofs),
+	};
+
+	return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE);
+}
+
+/*
+ * With xino=auto, we do best effort to keep all inodes on same st_dev and
+ * d_ino consistent with st_ino.
+ * With xino=on, we do the same effort but we warn if we failed.
+ */
+static inline bool ovl_xino_warn(struct super_block *sb)
+{
+	return OVL_FS(sb)->config.xino == OVL_XINO_ON;
+}
+
+/* All layers on same fs? */
+static inline bool ovl_same_fs(struct super_block *sb)
+{
+	return OVL_FS(sb)->xino_mode == 0;
+}
+
+/* All overlay inodes have same st_dev? */
+static inline bool ovl_same_dev(struct super_block *sb)
+{
+	return OVL_FS(sb)->xino_mode >= 0;
+}
+
+static inline unsigned int ovl_xino_bits(struct super_block *sb)
+{
+	return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0;
+}
+
+static inline void ovl_inode_lock(struct inode *inode)
+{
+	mutex_lock(&OVL_I(inode)->lock);
+}
+
+static inline int ovl_inode_lock_interruptible(struct inode *inode)
+{
+	return mutex_lock_interruptible(&OVL_I(inode)->lock);
+}
+
+static inline void ovl_inode_unlock(struct inode *inode)
+{
+	mutex_unlock(&OVL_I(inode)->lock);
+}
+
+
+/* namei.c */
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len);
+
+static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+{
+	if (fh_len < sizeof(struct ovl_fh))
+		return -EINVAL;
+
+	return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET);
+}
+
+struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+				  struct vfsmount *mnt, bool connected);
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
+			struct dentry *upperdentry, struct ovl_path **stackp);
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+		      enum ovl_xattr ox, struct dentry *real, bool is_upper,
+		      bool set);
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
+int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
+int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+		       struct qstr *name);
+struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
+struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+				struct dentry *origin, bool verify);
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+			  unsigned int flags);
+bool ovl_lower_positive(struct dentry *dentry);
+
+static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper,
+				    struct dentry *origin, bool set)
+{
+	return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin,
+				 false, set);
+}
+
+static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index,
+				   struct dentry *upper, bool set)
+{
+	return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set);
+}
+
+/* readdir.c */
+extern const struct file_operations ovl_dir_operations;
+struct file *ovl_dir_real_file(const struct file *file, bool want_upper);
+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
+void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
+			   struct list_head *list);
+void ovl_cache_free(struct list_head *list);
+void ovl_dir_cache_free(struct inode *inode);
+int ovl_check_d_type_supported(const struct path *realpath);
+int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
+			struct vfsmount *mnt, struct dentry *dentry, int level);
+int ovl_indexdir_cleanup(struct ovl_fs *ofs);
+
+/*
+ * Can we iterate real dir directly?
+ *
+ * Non-merge dir may contain whiteouts from a time it was a merge upper, before
+ * lower dir was removed under it and possibly before it was rotated from upper
+ * to lower layer.
+ */
+static inline bool ovl_dir_is_real(struct dentry *dir)
+{
+	return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir));
+}
+
+/* inode.c */
+int ovl_set_nlink_upper(struct dentry *dentry);
+int ovl_set_nlink_lower(struct dentry *dentry);
+unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
+			   struct dentry *upperdentry,
+			   unsigned int fallback);
+int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+		struct iattr *attr);
+int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+		struct kstat *stat, u32 request_mask, unsigned int flags);
+int ovl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+		   int mask);
+int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+		  const void *value, size_t size, int flags);
+int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
+		  void *value, size_t size);
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
+
+#ifdef CONFIG_FS_POSIX_ACL
+struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu);
+#else
+#define ovl_get_acl	NULL
+#endif
+
+int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
+bool ovl_is_private_xattr(struct super_block *sb, const char *name);
+
+struct ovl_inode_params {
+	struct inode *newinode;
+	struct dentry *upperdentry;
+	struct ovl_path *lowerpath;
+	bool index;
+	unsigned int numlower;
+	char *redirect;
+	struct dentry *lowerdata;
+};
+void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip,
+		    unsigned long ino, int fsid);
+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
+			       bool is_upper);
+bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir);
+struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir);
+struct inode *ovl_get_inode(struct super_block *sb,
+			    struct ovl_inode_params *oip);
+void ovl_copyattr(struct inode *to);
+
+/* vfs inode flags copied from real to ovl inode */
+#define OVL_COPY_I_FLAGS_MASK	(S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE)
+/* vfs inode flags read from overlay.protattr xattr to ovl inode */
+#define OVL_PROT_I_FLAGS_MASK	(S_APPEND | S_IMMUTABLE)
+
+/*
+ * fileattr flags copied from lower to upper inode on copy up.
+ * We cannot copy up immutable/append-only flags, because that would prevent
+ * linking temp inode to upper dir, so we store them in xattr instead.
+ */
+#define OVL_COPY_FS_FLAGS_MASK	(FS_SYNC_FL | FS_NOATIME_FL)
+#define OVL_COPY_FSX_FLAGS_MASK	(FS_XFLAG_SYNC | FS_XFLAG_NOATIME)
+#define OVL_PROT_FS_FLAGS_MASK  (FS_APPEND_FL | FS_IMMUTABLE_FL)
+#define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE)
+
+void ovl_check_protattr(struct inode *inode, struct dentry *upper);
+int ovl_set_protattr(struct inode *inode, struct dentry *upper,
+		      struct fileattr *fa);
+
+static inline void ovl_copyflags(struct inode *from, struct inode *to)
+{
+	unsigned int mask = OVL_COPY_I_FLAGS_MASK;
+
+	inode_set_flags(to, from->i_flags & mask, mask);
+}
+
+/* dir.c */
+extern const struct inode_operations ovl_dir_inode_operations;
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
+			     struct dentry *dentry);
+struct ovl_cattr {
+	dev_t rdev;
+	umode_t mode;
+	const char *link;
+	struct dentry *hardlink;
+};
+
+#define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) })
+
+int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir,
+		   struct dentry **newdentry, umode_t mode);
+struct dentry *ovl_create_real(struct ovl_fs *ofs,
+			       struct inode *dir, struct dentry *newdentry,
+			       struct ovl_cattr *attr);
+int ovl_cleanup(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry);
+struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir);
+struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
+			       struct ovl_cattr *attr);
+
+/* file.c */
+extern const struct file_operations ovl_file_operations;
+int __init ovl_aio_request_cache_init(void);
+void ovl_aio_request_cache_destroy(void);
+int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
+int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
+int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
+int ovl_fileattr_set(struct user_namespace *mnt_userns,
+		     struct dentry *dentry, struct fileattr *fa);
+
+/* copy_up.c */
+int ovl_copy_up(struct dentry *dentry);
+int ovl_copy_up_with_data(struct dentry *dentry);
+int ovl_maybe_copy_up(struct dentry *dentry, int flags);
+int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new);
+int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat);
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+				  bool is_upper);
+int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
+		   struct dentry *upper);
+
+/* export.c */
+extern const struct export_operations ovl_export_operations;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
new file mode 100644
index 000000000..a479680a5
--- /dev/null
+++ b/fs/overlayfs/ovl_entry.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ * Copyright (C) 2016 Red Hat, Inc.
+ */
+
+struct ovl_config {
+	char *lowerdir;
+	char *upperdir;
+	char *workdir;
+	bool default_permissions;
+	bool redirect_dir;
+	bool redirect_follow;
+	const char *redirect_mode;
+	bool index;
+	bool uuid;
+	bool nfs_export;
+	int xino;
+	bool metacopy;
+	bool userxattr;
+	bool ovl_volatile;
+};
+
+struct ovl_sb {
+	struct super_block *sb;
+	dev_t pseudo_dev;
+	/* Unusable (conflicting) uuid */
+	bool bad_uuid;
+	/* Used as a lower layer (but maybe also as upper) */
+	bool is_lower;
+};
+
+struct ovl_layer {
+	/* ovl_free_fs() relies on @mnt being the first member! */
+	struct vfsmount *mnt;
+	/* Trap in ovl inode cache */
+	struct inode *trap;
+	struct ovl_sb *fs;
+	/* Index of this layer in fs root (upper idx == 0) */
+	int idx;
+	/* One fsid per unique underlying sb (upper fsid == 0) */
+	int fsid;
+};
+
+/*
+ * ovl_free_fs() relies on @mnt being the first member when unmounting
+ * the private mounts created for each layer. Let's check both the
+ * offset and type.
+ */
+static_assert(offsetof(struct ovl_layer, mnt) == 0);
+static_assert(__same_type(typeof_member(struct ovl_layer, mnt), struct vfsmount *));
+
+struct ovl_path {
+	const struct ovl_layer *layer;
+	struct dentry *dentry;
+};
+
+/* private information held for overlayfs's superblock */
+struct ovl_fs {
+	unsigned int numlayer;
+	/* Number of unique fs among layers including upper fs */
+	unsigned int numfs;
+	const struct ovl_layer *layers;
+	struct ovl_sb *fs;
+	/* workbasedir is the path at workdir= mount option */
+	struct dentry *workbasedir;
+	/* workdir is the 'work' directory under workbasedir */
+	struct dentry *workdir;
+	/* index directory listing overlay inodes by origin file handle */
+	struct dentry *indexdir;
+	long namelen;
+	/* pathnames of lower and upper dirs, for show_options */
+	struct ovl_config config;
+	/* creds of process who forced instantiation of super block */
+	const struct cred *creator_cred;
+	bool tmpfile;
+	bool noxattr;
+	/* Did we take the inuse lock? */
+	bool upperdir_locked;
+	bool workdir_locked;
+	bool share_whiteout;
+	/* Traps in ovl inode cache */
+	struct inode *workbasedir_trap;
+	struct inode *workdir_trap;
+	struct inode *indexdir_trap;
+	/* -1: disabled, 0: same fs, 1..32: number of unused ino bits */
+	int xino_mode;
+	/* For allocation of non-persistent inode numbers */
+	atomic_long_t last_ino;
+	/* Whiteout dentry cache */
+	struct dentry *whiteout;
+	/* r/o snapshot of upperdir sb's only taken on volatile mounts */
+	errseq_t errseq;
+};
+
+static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
+{
+	return ofs->layers[0].mnt;
+}
+
+static inline struct user_namespace *ovl_upper_mnt_userns(struct ovl_fs *ofs)
+{
+	return mnt_user_ns(ovl_upper_mnt(ofs));
+}
+
+static inline struct ovl_fs *OVL_FS(struct super_block *sb)
+{
+	return (struct ovl_fs *)sb->s_fs_info;
+}
+
+static inline bool ovl_should_sync(struct ovl_fs *ofs)
+{
+	return !ofs->config.ovl_volatile;
+}
+
+/* private information held for every overlayfs dentry */
+struct ovl_entry {
+	union {
+		struct {
+			unsigned long flags;
+		};
+		struct rcu_head rcu;
+	};
+	unsigned numlower;
+	struct ovl_path lowerstack[];
+};
+
+struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
+
+static inline struct ovl_entry *OVL_E(struct dentry *dentry)
+{
+	return (struct ovl_entry *) dentry->d_fsdata;
+}
+
+struct ovl_inode {
+	union {
+		struct ovl_dir_cache *cache;	/* directory */
+		struct inode *lowerdata;	/* regular file */
+	};
+	const char *redirect;
+	u64 version;
+	unsigned long flags;
+	struct inode vfs_inode;
+	struct dentry *__upperdentry;
+	struct ovl_path lowerpath;
+
+	/* synchronize copy up and more */
+	struct mutex lock;
+};
+
+static inline struct ovl_inode *OVL_I(struct inode *inode)
+{
+	return container_of(inode, struct ovl_inode, vfs_inode);
+}
+
+static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
+{
+	return READ_ONCE(oi->__upperdentry);
+}
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
new file mode 100644
index 000000000..2b2106400
--- /dev/null
+++ b/fs/overlayfs/readdir.c
@@ -0,0 +1,1235 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/xattr.h>
+#include <linux/rbtree.h>
+#include <linux/security.h>
+#include <linux/cred.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+struct ovl_cache_entry {
+	unsigned int len;
+	unsigned int type;
+	u64 real_ino;
+	u64 ino;
+	struct list_head l_node;
+	struct rb_node node;
+	struct ovl_cache_entry *next_maybe_whiteout;
+	bool is_upper;
+	bool is_whiteout;
+	char name[];
+};
+
+struct ovl_dir_cache {
+	long refcount;
+	u64 version;
+	struct list_head entries;
+	struct rb_root root;
+};
+
+struct ovl_readdir_data {
+	struct dir_context ctx;
+	struct dentry *dentry;
+	bool is_lowest;
+	struct rb_root *root;
+	struct list_head *list;
+	struct list_head middle;
+	struct ovl_cache_entry *first_maybe_whiteout;
+	int count;
+	int err;
+	bool is_upper;
+	bool d_type_supported;
+};
+
+struct ovl_dir_file {
+	bool is_real;
+	bool is_upper;
+	struct ovl_dir_cache *cache;
+	struct list_head *cursor;
+	struct file *realfile;
+	struct file *upperfile;
+};
+
+static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
+{
+	return rb_entry(n, struct ovl_cache_entry, node);
+}
+
+static bool ovl_cache_entry_find_link(const char *name, int len,
+				      struct rb_node ***link,
+				      struct rb_node **parent)
+{
+	bool found = false;
+	struct rb_node **newp = *link;
+
+	while (!found && *newp) {
+		int cmp;
+		struct ovl_cache_entry *tmp;
+
+		*parent = *newp;
+		tmp = ovl_cache_entry_from_node(*newp);
+		cmp = strncmp(name, tmp->name, len);
+		if (cmp > 0)
+			newp = &tmp->node.rb_right;
+		else if (cmp < 0 || len < tmp->len)
+			newp = &tmp->node.rb_left;
+		else
+			found = true;
+	}
+	*link = newp;
+
+	return found;
+}
+
+static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
+						    const char *name, int len)
+{
+	struct rb_node *node = root->rb_node;
+	int cmp;
+
+	while (node) {
+		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
+
+		cmp = strncmp(name, p->name, len);
+		if (cmp > 0)
+			node = p->node.rb_right;
+		else if (cmp < 0 || len < p->len)
+			node = p->node.rb_left;
+		else
+			return p;
+	}
+
+	return NULL;
+}
+
+static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
+			   struct ovl_cache_entry *p)
+{
+	/* Don't care if not doing ovl_iter() */
+	if (!rdd->dentry)
+		return false;
+
+	/* Always recalc d_ino when remapping lower inode numbers */
+	if (ovl_xino_bits(rdd->dentry->d_sb))
+		return true;
+
+	/* Always recalc d_ino for parent */
+	if (strcmp(p->name, "..") == 0)
+		return true;
+
+	/* If this is lower, then native d_ino will do */
+	if (!rdd->is_upper)
+		return false;
+
+	/*
+	 * Recalc d_ino for '.' and for all entries if dir is impure (contains
+	 * copied up entries)
+	 */
+	if ((p->name[0] == '.' && p->len == 1) ||
+	    ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
+		return true;
+
+	return false;
+}
+
+static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
+						   const char *name, int len,
+						   u64 ino, unsigned int d_type)
+{
+	struct ovl_cache_entry *p;
+	size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
+
+	p = kmalloc(size, GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	memcpy(p->name, name, len);
+	p->name[len] = '\0';
+	p->len = len;
+	p->type = d_type;
+	p->real_ino = ino;
+	p->ino = ino;
+	/* Defer setting d_ino for upper entry to ovl_iterate() */
+	if (ovl_calc_d_ino(rdd, p))
+		p->ino = 0;
+	p->is_upper = rdd->is_upper;
+	p->is_whiteout = false;
+
+	if (d_type == DT_CHR) {
+		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
+		rdd->first_maybe_whiteout = p;
+	}
+	return p;
+}
+
+static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
+				  const char *name, int len, u64 ino,
+				  unsigned int d_type)
+{
+	struct rb_node **newp = &rdd->root->rb_node;
+	struct rb_node *parent = NULL;
+	struct ovl_cache_entry *p;
+
+	if (ovl_cache_entry_find_link(name, len, &newp, &parent))
+		return true;
+
+	p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
+	if (p == NULL) {
+		rdd->err = -ENOMEM;
+		return false;
+	}
+
+	list_add_tail(&p->l_node, rdd->list);
+	rb_link_node(&p->node, parent, newp);
+	rb_insert_color(&p->node, rdd->root);
+
+	return true;
+}
+
+static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
+			   const char *name, int namelen,
+			   loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct ovl_cache_entry *p;
+
+	p = ovl_cache_entry_find(rdd->root, name, namelen);
+	if (p) {
+		list_move_tail(&p->l_node, &rdd->middle);
+	} else {
+		p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+		if (p == NULL)
+			rdd->err = -ENOMEM;
+		else
+			list_add_tail(&p->l_node, &rdd->middle);
+	}
+
+	return rdd->err == 0;
+}
+
+void ovl_cache_free(struct list_head *list)
+{
+	struct ovl_cache_entry *p;
+	struct ovl_cache_entry *n;
+
+	list_for_each_entry_safe(p, n, list, l_node)
+		kfree(p);
+
+	INIT_LIST_HEAD(list);
+}
+
+void ovl_dir_cache_free(struct inode *inode)
+{
+	struct ovl_dir_cache *cache = ovl_dir_cache(inode);
+
+	if (cache) {
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+	}
+}
+
+static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
+{
+	struct ovl_dir_cache *cache = od->cache;
+
+	WARN_ON(cache->refcount <= 0);
+	cache->refcount--;
+	if (!cache->refcount) {
+		if (ovl_dir_cache(d_inode(dentry)) == cache)
+			ovl_set_dir_cache(d_inode(dentry), NULL);
+
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+	}
+}
+
+static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
+			  int namelen, loff_t offset, u64 ino,
+			  unsigned int d_type)
+{
+	struct ovl_readdir_data *rdd =
+		container_of(ctx, struct ovl_readdir_data, ctx);
+
+	rdd->count++;
+	if (!rdd->is_lowest)
+		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
+	else
+		return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
+}
+
+static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
+{
+	int err;
+	struct ovl_cache_entry *p;
+	struct dentry *dentry, *dir = path->dentry;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(rdd->dentry->d_sb);
+
+	err = down_write_killable(&dir->d_inode->i_rwsem);
+	if (!err) {
+		while (rdd->first_maybe_whiteout) {
+			p = rdd->first_maybe_whiteout;
+			rdd->first_maybe_whiteout = p->next_maybe_whiteout;
+			dentry = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len);
+			if (!IS_ERR(dentry)) {
+				p->is_whiteout = ovl_is_whiteout(dentry);
+				dput(dentry);
+			}
+		}
+		inode_unlock(dir->d_inode);
+	}
+	revert_creds(old_cred);
+
+	return err;
+}
+
+static inline int ovl_dir_read(const struct path *realpath,
+			       struct ovl_readdir_data *rdd)
+{
+	struct file *realfile;
+	int err;
+
+	realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
+	if (IS_ERR(realfile))
+		return PTR_ERR(realfile);
+
+	rdd->first_maybe_whiteout = NULL;
+	rdd->ctx.pos = 0;
+	do {
+		rdd->count = 0;
+		rdd->err = 0;
+		err = iterate_dir(realfile, &rdd->ctx);
+		if (err >= 0)
+			err = rdd->err;
+	} while (!err && rdd->count);
+
+	if (!err && rdd->first_maybe_whiteout && rdd->dentry)
+		err = ovl_check_whiteouts(realpath, rdd);
+
+	fput(realfile);
+
+	return err;
+}
+
+static void ovl_dir_reset(struct file *file)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct ovl_dir_cache *cache = od->cache;
+	struct dentry *dentry = file->f_path.dentry;
+	bool is_real;
+
+	if (cache && ovl_dentry_version_get(dentry) != cache->version) {
+		ovl_cache_put(od, dentry);
+		od->cache = NULL;
+		od->cursor = NULL;
+	}
+	is_real = ovl_dir_is_real(dentry);
+	if (od->is_real != is_real) {
+		/* is_real can only become false when dir is copied up */
+		if (WARN_ON(is_real))
+			return;
+		od->is_real = false;
+	}
+}
+
+static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
+	struct rb_root *root)
+{
+	int err;
+	struct path realpath;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.dentry = dentry,
+		.list = list,
+		.root = root,
+		.is_lowest = false,
+	};
+	int idx, next;
+
+	for (idx = 0; idx != -1; idx = next) {
+		next = ovl_path_next(idx, dentry, &realpath);
+		rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
+
+		if (next != -1) {
+			err = ovl_dir_read(&realpath, &rdd);
+			if (err)
+				break;
+		} else {
+			/*
+			 * Insert lowest layer entries before upper ones, this
+			 * allows offsets to be reasonably constant
+			 */
+			list_add(&rdd.middle, rdd.list);
+			rdd.is_lowest = true;
+			err = ovl_dir_read(&realpath, &rdd);
+			list_del(&rdd.middle);
+		}
+	}
+	return err;
+}
+
+static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
+{
+	struct list_head *p;
+	loff_t off = 0;
+
+	list_for_each(p, &od->cache->entries) {
+		if (off >= pos)
+			break;
+		off++;
+	}
+	/* Cursor is safe since the cache is stable */
+	od->cursor = p;
+}
+
+static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
+{
+	int res;
+	struct ovl_dir_cache *cache;
+
+	cache = ovl_dir_cache(d_inode(dentry));
+	if (cache && ovl_dentry_version_get(dentry) == cache->version) {
+		WARN_ON(!cache->refcount);
+		cache->refcount++;
+		return cache;
+	}
+	ovl_set_dir_cache(d_inode(dentry), NULL);
+
+	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
+	if (!cache)
+		return ERR_PTR(-ENOMEM);
+
+	cache->refcount = 1;
+	INIT_LIST_HEAD(&cache->entries);
+	cache->root = RB_ROOT;
+
+	res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
+	if (res) {
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+		return ERR_PTR(res);
+	}
+
+	cache->version = ovl_dentry_version_get(dentry);
+	ovl_set_dir_cache(d_inode(dentry), cache);
+
+	return cache;
+}
+
+/* Map inode number to lower fs unique range */
+static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
+			       const char *name, int namelen, bool warn)
+{
+	unsigned int xinoshift = 64 - xinobits;
+
+	if (unlikely(ino >> xinoshift)) {
+		if (warn) {
+			pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
+					    namelen, name, ino, xinobits);
+		}
+		return ino;
+	}
+
+	/*
+	 * The lowest xinobit is reserved for mapping the non-peresistent inode
+	 * numbers range, but this range is only exposed via st_ino, not here.
+	 */
+	return ino | ((u64)fsid) << (xinoshift + 1);
+}
+
+/*
+ * Set d_ino for upper entries. Non-upper entries should always report
+ * the uppermost real inode ino and should not call this function.
+ *
+ * When not all layer are on same fs, report real ino also for upper.
+ *
+ * When all layers are on the same fs, and upper has a reference to
+ * copy up origin, call vfs_getattr() on the overlay entry to make
+ * sure that d_ino will be consistent with st_ino from stat(2).
+ */
+static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry *p)
+
+{
+	struct dentry *dir = path->dentry;
+	struct dentry *this = NULL;
+	enum ovl_path_type type;
+	u64 ino = p->real_ino;
+	int xinobits = ovl_xino_bits(dir->d_sb);
+	int err = 0;
+
+	if (!ovl_same_dev(dir->d_sb))
+		goto out;
+
+	if (p->name[0] == '.') {
+		if (p->len == 1) {
+			this = dget(dir);
+			goto get;
+		}
+		if (p->len == 2 && p->name[1] == '.') {
+			/* we shall not be moved */
+			this = dget(dir->d_parent);
+			goto get;
+		}
+	}
+	this = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len);
+	if (IS_ERR_OR_NULL(this) || !this->d_inode) {
+		/* Mark a stale entry */
+		p->is_whiteout = true;
+		if (IS_ERR(this)) {
+			err = PTR_ERR(this);
+			this = NULL;
+			goto fail;
+		}
+		goto out;
+	}
+
+get:
+	type = ovl_path_type(this);
+	if (OVL_TYPE_ORIGIN(type)) {
+		struct kstat stat;
+		struct path statpath = *path;
+
+		statpath.dentry = this;
+		err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
+		if (err)
+			goto fail;
+
+		/*
+		 * Directory inode is always on overlay st_dev.
+		 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
+		 * of xino bits overflow.
+		 */
+		WARN_ON_ONCE(S_ISDIR(stat.mode) &&
+			     dir->d_sb->s_dev != stat.dev);
+		ino = stat.ino;
+	} else if (xinobits && !OVL_TYPE_UPPER(type)) {
+		ino = ovl_remap_lower_ino(ino, xinobits,
+					  ovl_layer_lower(this)->fsid,
+					  p->name, p->len,
+					  ovl_xino_warn(dir->d_sb));
+	}
+
+out:
+	p->ino = ino;
+	dput(this);
+	return err;
+
+fail:
+	pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
+			    p->name, err);
+	goto out;
+}
+
+static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
+			  int namelen, loff_t offset, u64 ino,
+			  unsigned int d_type)
+{
+	struct ovl_cache_entry *p;
+	struct ovl_readdir_data *rdd =
+		container_of(ctx, struct ovl_readdir_data, ctx);
+
+	rdd->count++;
+	p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+	if (p == NULL) {
+		rdd->err = -ENOMEM;
+		return false;
+	}
+	list_add_tail(&p->l_node, rdd->list);
+
+	return true;
+}
+
+static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
+			       struct rb_root *root)
+{
+	int err;
+	struct path realpath;
+	struct ovl_cache_entry *p, *n;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_plain,
+		.list = list,
+		.root = root,
+	};
+
+	INIT_LIST_HEAD(list);
+	*root = RB_ROOT;
+	ovl_path_upper(path->dentry, &realpath);
+
+	err = ovl_dir_read(&realpath, &rdd);
+	if (err)
+		return err;
+
+	list_for_each_entry_safe(p, n, list, l_node) {
+		if (strcmp(p->name, ".") != 0 &&
+		    strcmp(p->name, "..") != 0) {
+			err = ovl_cache_update_ino(path, p);
+			if (err)
+				return err;
+		}
+		if (p->ino == p->real_ino) {
+			list_del(&p->l_node);
+			kfree(p);
+		} else {
+			struct rb_node **newp = &root->rb_node;
+			struct rb_node *parent = NULL;
+
+			if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
+							      &newp, &parent)))
+				return -EIO;
+
+			rb_link_node(&p->node, parent, newp);
+			rb_insert_color(&p->node, root);
+		}
+	}
+	return 0;
+}
+
+static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
+{
+	int res;
+	struct dentry *dentry = path->dentry;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct ovl_dir_cache *cache;
+
+	cache = ovl_dir_cache(d_inode(dentry));
+	if (cache && ovl_dentry_version_get(dentry) == cache->version)
+		return cache;
+
+	/* Impure cache is not refcounted, free it here */
+	ovl_dir_cache_free(d_inode(dentry));
+	ovl_set_dir_cache(d_inode(dentry), NULL);
+
+	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
+	if (!cache)
+		return ERR_PTR(-ENOMEM);
+
+	res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
+	if (res) {
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+		return ERR_PTR(res);
+	}
+	if (list_empty(&cache->entries)) {
+		/*
+		 * A good opportunity to get rid of an unneeded "impure" flag.
+		 * Removing the "impure" xattr is best effort.
+		 */
+		if (!ovl_want_write(dentry)) {
+			ovl_removexattr(ofs, ovl_dentry_upper(dentry),
+					OVL_XATTR_IMPURE);
+			ovl_drop_write(dentry);
+		}
+		ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
+		kfree(cache);
+		return NULL;
+	}
+
+	cache->version = ovl_dentry_version_get(dentry);
+	ovl_set_dir_cache(d_inode(dentry), cache);
+
+	return cache;
+}
+
+struct ovl_readdir_translate {
+	struct dir_context *orig_ctx;
+	struct ovl_dir_cache *cache;
+	struct dir_context ctx;
+	u64 parent_ino;
+	int fsid;
+	int xinobits;
+	bool xinowarn;
+};
+
+static bool ovl_fill_real(struct dir_context *ctx, const char *name,
+			   int namelen, loff_t offset, u64 ino,
+			   unsigned int d_type)
+{
+	struct ovl_readdir_translate *rdt =
+		container_of(ctx, struct ovl_readdir_translate, ctx);
+	struct dir_context *orig_ctx = rdt->orig_ctx;
+
+	if (rdt->parent_ino && strcmp(name, "..") == 0) {
+		ino = rdt->parent_ino;
+	} else if (rdt->cache) {
+		struct ovl_cache_entry *p;
+
+		p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
+		if (p)
+			ino = p->ino;
+	} else if (rdt->xinobits) {
+		ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
+					  name, namelen, rdt->xinowarn);
+	}
+
+	return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
+}
+
+static bool ovl_is_impure_dir(struct file *file)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct inode *dir = d_inode(file->f_path.dentry);
+
+	/*
+	 * Only upper dir can be impure, but if we are in the middle of
+	 * iterating a lower real dir, dir could be copied up and marked
+	 * impure. We only want the impure cache if we started iterating
+	 * a real upper dir to begin with.
+	 */
+	return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
+
+}
+
+static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
+{
+	int err;
+	struct ovl_dir_file *od = file->private_data;
+	struct dentry *dir = file->f_path.dentry;
+	const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
+	struct ovl_readdir_translate rdt = {
+		.ctx.actor = ovl_fill_real,
+		.orig_ctx = ctx,
+		.xinobits = ovl_xino_bits(dir->d_sb),
+		.xinowarn = ovl_xino_warn(dir->d_sb),
+	};
+
+	if (rdt.xinobits && lower_layer)
+		rdt.fsid = lower_layer->fsid;
+
+	if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
+		struct kstat stat;
+		struct path statpath = file->f_path;
+
+		statpath.dentry = dir->d_parent;
+		err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
+		if (err)
+			return err;
+
+		WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
+		rdt.parent_ino = stat.ino;
+	}
+
+	if (ovl_is_impure_dir(file)) {
+		rdt.cache = ovl_cache_get_impure(&file->f_path);
+		if (IS_ERR(rdt.cache))
+			return PTR_ERR(rdt.cache);
+	}
+
+	err = iterate_dir(od->realfile, &rdt.ctx);
+	ctx->pos = rdt.ctx.pos;
+
+	return err;
+}
+
+
+static int ovl_iterate(struct file *file, struct dir_context *ctx)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct dentry *dentry = file->f_path.dentry;
+	struct ovl_cache_entry *p;
+	const struct cred *old_cred;
+	int err;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	if (!ctx->pos)
+		ovl_dir_reset(file);
+
+	if (od->is_real) {
+		/*
+		 * If parent is merge, then need to adjust d_ino for '..', if
+		 * dir is impure then need to adjust d_ino for copied up
+		 * entries.
+		 */
+		if (ovl_xino_bits(dentry->d_sb) ||
+		    (ovl_same_fs(dentry->d_sb) &&
+		     (ovl_is_impure_dir(file) ||
+		      OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
+			err = ovl_iterate_real(file, ctx);
+		} else {
+			err = iterate_dir(od->realfile, ctx);
+		}
+		goto out;
+	}
+
+	if (!od->cache) {
+		struct ovl_dir_cache *cache;
+
+		cache = ovl_cache_get(dentry);
+		err = PTR_ERR(cache);
+		if (IS_ERR(cache))
+			goto out;
+
+		od->cache = cache;
+		ovl_seek_cursor(od, ctx->pos);
+	}
+
+	while (od->cursor != &od->cache->entries) {
+		p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
+		if (!p->is_whiteout) {
+			if (!p->ino) {
+				err = ovl_cache_update_ino(&file->f_path, p);
+				if (err)
+					goto out;
+			}
+		}
+		/* ovl_cache_update_ino() sets is_whiteout on stale entry */
+		if (!p->is_whiteout) {
+			if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
+				break;
+		}
+		od->cursor = p->l_node.next;
+		ctx->pos++;
+	}
+	err = 0;
+out:
+	revert_creds(old_cred);
+	return err;
+}
+
+static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t res;
+	struct ovl_dir_file *od = file->private_data;
+
+	inode_lock(file_inode(file));
+	if (!file->f_pos)
+		ovl_dir_reset(file);
+
+	if (od->is_real) {
+		res = vfs_llseek(od->realfile, offset, origin);
+		file->f_pos = od->realfile->f_pos;
+	} else {
+		res = -EINVAL;
+
+		switch (origin) {
+		case SEEK_CUR:
+			offset += file->f_pos;
+			break;
+		case SEEK_SET:
+			break;
+		default:
+			goto out_unlock;
+		}
+		if (offset < 0)
+			goto out_unlock;
+
+		if (offset != file->f_pos) {
+			file->f_pos = offset;
+			if (od->cache)
+				ovl_seek_cursor(od, offset);
+		}
+		res = offset;
+	}
+out_unlock:
+	inode_unlock(file_inode(file));
+
+	return res;
+}
+
+static struct file *ovl_dir_open_realfile(const struct file *file,
+					  const struct path *realpath)
+{
+	struct file *res;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
+	revert_creds(old_cred);
+
+	return res;
+}
+
+/*
+ * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
+ * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
+ *
+ * TODO: use same abstract type for file->private_data of dir and file so
+ * upperfile could also be cached for files as well.
+ */
+struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
+{
+
+	struct ovl_dir_file *od = file->private_data;
+	struct dentry *dentry = file->f_path.dentry;
+	struct file *old, *realfile = od->realfile;
+
+	if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
+		return want_upper ? NULL : realfile;
+
+	/*
+	 * Need to check if we started out being a lower dir, but got copied up
+	 */
+	if (!od->is_upper) {
+		realfile = READ_ONCE(od->upperfile);
+		if (!realfile) {
+			struct path upperpath;
+
+			ovl_path_upper(dentry, &upperpath);
+			realfile = ovl_dir_open_realfile(file, &upperpath);
+			if (IS_ERR(realfile))
+				return realfile;
+
+			old = cmpxchg_release(&od->upperfile, NULL, realfile);
+			if (old) {
+				fput(realfile);
+				realfile = old;
+			}
+		}
+	}
+
+	return realfile;
+}
+
+static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
+			 int datasync)
+{
+	struct file *realfile;
+	int err;
+
+	err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb));
+	if (err <= 0)
+		return err;
+
+	realfile = ovl_dir_real_file(file, true);
+	err = PTR_ERR_OR_ZERO(realfile);
+
+	/* Nothing to sync for lower */
+	if (!realfile || err)
+		return err;
+
+	return vfs_fsync_range(realfile, start, end, datasync);
+}
+
+static int ovl_dir_release(struct inode *inode, struct file *file)
+{
+	struct ovl_dir_file *od = file->private_data;
+
+	if (od->cache) {
+		inode_lock(inode);
+		ovl_cache_put(od, file->f_path.dentry);
+		inode_unlock(inode);
+	}
+	fput(od->realfile);
+	if (od->upperfile)
+		fput(od->upperfile);
+	kfree(od);
+
+	return 0;
+}
+
+static int ovl_dir_open(struct inode *inode, struct file *file)
+{
+	struct path realpath;
+	struct file *realfile;
+	struct ovl_dir_file *od;
+	enum ovl_path_type type;
+
+	od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
+	if (!od)
+		return -ENOMEM;
+
+	type = ovl_path_real(file->f_path.dentry, &realpath);
+	realfile = ovl_dir_open_realfile(file, &realpath);
+	if (IS_ERR(realfile)) {
+		kfree(od);
+		return PTR_ERR(realfile);
+	}
+	od->realfile = realfile;
+	od->is_real = ovl_dir_is_real(file->f_path.dentry);
+	od->is_upper = OVL_TYPE_UPPER(type);
+	file->private_data = od;
+
+	return 0;
+}
+
+const struct file_operations ovl_dir_operations = {
+	.read		= generic_read_dir,
+	.open		= ovl_dir_open,
+	.iterate	= ovl_iterate,
+	.llseek		= ovl_dir_llseek,
+	.fsync		= ovl_dir_fsync,
+	.release	= ovl_dir_release,
+};
+
+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
+{
+	int err;
+	struct ovl_cache_entry *p, *n;
+	struct rb_root root = RB_ROOT;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	err = ovl_dir_read_merged(dentry, list, &root);
+	revert_creds(old_cred);
+	if (err)
+		return err;
+
+	err = 0;
+
+	list_for_each_entry_safe(p, n, list, l_node) {
+		/*
+		 * Select whiteouts in upperdir, they should
+		 * be cleared when deleting this directory.
+		 */
+		if (p->is_whiteout) {
+			if (p->is_upper)
+				continue;
+			goto del_entry;
+		}
+
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				goto del_entry;
+			if (p->len == 2 && p->name[1] == '.')
+				goto del_entry;
+		}
+		err = -ENOTEMPTY;
+		break;
+
+del_entry:
+		list_del(&p->l_node);
+		kfree(p);
+	}
+
+	return err;
+}
+
+void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
+			   struct list_head *list)
+{
+	struct ovl_cache_entry *p;
+
+	inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
+	list_for_each_entry(p, list, l_node) {
+		struct dentry *dentry;
+
+		if (WARN_ON(!p->is_whiteout || !p->is_upper))
+			continue;
+
+		dentry = ovl_lookup_upper(ofs, p->name, upper, p->len);
+		if (IS_ERR(dentry)) {
+			pr_err("lookup '%s/%.*s' failed (%i)\n",
+			       upper->d_name.name, p->len, p->name,
+			       (int) PTR_ERR(dentry));
+			continue;
+		}
+		if (dentry->d_inode)
+			ovl_cleanup(ofs, upper->d_inode, dentry);
+		dput(dentry);
+	}
+	inode_unlock(upper->d_inode);
+}
+
+static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
+			  int namelen, loff_t offset, u64 ino,
+			  unsigned int d_type)
+{
+	struct ovl_readdir_data *rdd =
+		container_of(ctx, struct ovl_readdir_data, ctx);
+
+	/* Even if d_type is not supported, DT_DIR is returned for . and .. */
+	if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
+		return true;
+
+	if (d_type != DT_UNKNOWN)
+		rdd->d_type_supported = true;
+
+	return true;
+}
+
+/*
+ * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
+ * if error is encountered.
+ */
+int ovl_check_d_type_supported(const struct path *realpath)
+{
+	int err;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_check_d_type,
+		.d_type_supported = false,
+	};
+
+	err = ovl_dir_read(realpath, &rdd);
+	if (err)
+		return err;
+
+	return rdd.d_type_supported;
+}
+
+#define OVL_INCOMPATDIR_NAME "incompat"
+
+static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path,
+				       int level)
+{
+	int err;
+	struct inode *dir = path->dentry->d_inode;
+	LIST_HEAD(list);
+	struct rb_root root = RB_ROOT;
+	struct ovl_cache_entry *p;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.dentry = NULL,
+		.list = &list,
+		.root = &root,
+		.is_lowest = false,
+	};
+	bool incompat = false;
+
+	/*
+	 * The "work/incompat" directory is treated specially - if it is not
+	 * empty, instead of printing a generic error and mounting read-only,
+	 * we will error about incompat features and fail the mount.
+	 *
+	 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
+	 * starts with '#'.
+	 */
+	if (level == 2 &&
+	    !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
+		incompat = true;
+
+	err = ovl_dir_read(path, &rdd);
+	if (err)
+		goto out;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	list_for_each_entry(p, &list, l_node) {
+		struct dentry *dentry;
+
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				continue;
+			if (p->len == 2 && p->name[1] == '.')
+				continue;
+		} else if (incompat) {
+			pr_err("overlay with incompat feature '%s' cannot be mounted\n",
+				p->name);
+			err = -EINVAL;
+			break;
+		}
+		dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len);
+		if (IS_ERR(dentry))
+			continue;
+		if (dentry->d_inode)
+			err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level);
+		dput(dentry);
+		if (err)
+			break;
+	}
+	inode_unlock(dir);
+out:
+	ovl_cache_free(&list);
+	return err;
+}
+
+int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
+			struct vfsmount *mnt, struct dentry *dentry, int level)
+{
+	int err;
+
+	if (!d_is_dir(dentry) || level > 1) {
+		return ovl_cleanup(ofs, dir, dentry);
+	}
+
+	err = ovl_do_rmdir(ofs, dir, dentry);
+	if (err) {
+		struct path path = { .mnt = mnt, .dentry = dentry };
+
+		inode_unlock(dir);
+		err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
+		inode_lock_nested(dir, I_MUTEX_PARENT);
+		if (!err)
+			err = ovl_cleanup(ofs, dir, dentry);
+	}
+
+	return err;
+}
+
+int ovl_indexdir_cleanup(struct ovl_fs *ofs)
+{
+	int err;
+	struct dentry *indexdir = ofs->indexdir;
+	struct dentry *index = NULL;
+	struct inode *dir = indexdir->d_inode;
+	struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
+	LIST_HEAD(list);
+	struct rb_root root = RB_ROOT;
+	struct ovl_cache_entry *p;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.dentry = NULL,
+		.list = &list,
+		.root = &root,
+		.is_lowest = false,
+	};
+
+	err = ovl_dir_read(&path, &rdd);
+	if (err)
+		goto out;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	list_for_each_entry(p, &list, l_node) {
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				continue;
+			if (p->len == 2 && p->name[1] == '.')
+				continue;
+		}
+		index = ovl_lookup_upper(ofs, p->name, indexdir, p->len);
+		if (IS_ERR(index)) {
+			err = PTR_ERR(index);
+			index = NULL;
+			break;
+		}
+		/* Cleanup leftover from index create/cleanup attempt */
+		if (index->d_name.name[0] == '#') {
+			err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1);
+			if (err)
+				break;
+			goto next;
+		}
+		err = ovl_verify_index(ofs, index);
+		if (!err) {
+			goto next;
+		} else if (err == -ESTALE) {
+			/* Cleanup stale index entries */
+			err = ovl_cleanup(ofs, dir, index);
+		} else if (err != -ENOENT) {
+			/*
+			 * Abort mount to avoid corrupting the index if
+			 * an incompatible index entry was found or on out
+			 * of memory.
+			 */
+			break;
+		} else if (ofs->config.nfs_export) {
+			/*
+			 * Whiteout orphan index to block future open by
+			 * handle after overlay nlink dropped to zero.
+			 */
+			err = ovl_cleanup_and_whiteout(ofs, dir, index);
+		} else {
+			/* Cleanup orphan index entries */
+			err = ovl_cleanup(ofs, dir, index);
+		}
+
+		if (err)
+			break;
+
+next:
+		dput(index);
+		index = NULL;
+	}
+	dput(index);
+	inode_unlock(dir);
+out:
+	ovl_cache_free(&list);
+	if (err)
+		pr_err("failed index dir cleanup (%i)\n", err);
+	return err;
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
new file mode 100644
index 000000000..51eec4a8e
--- /dev/null
+++ b/fs/overlayfs/super.c
@@ -0,0 +1,2244 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ */
+
+#include <uapi/linux/magic.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/module.h>
+#include <linux/statfs.h>
+#include <linux/seq_file.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/exportfs.h>
+#include <linux/file.h>
+#include "overlayfs.h"
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Overlay filesystem");
+MODULE_LICENSE("GPL");
+
+
+struct ovl_dir_cache;
+
+#define OVL_MAX_STACK 500
+
+static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
+module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
+MODULE_PARM_DESC(redirect_dir,
+		 "Default to on or off for the redirect_dir feature");
+
+static bool ovl_redirect_always_follow =
+	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
+module_param_named(redirect_always_follow, ovl_redirect_always_follow,
+		   bool, 0644);
+MODULE_PARM_DESC(redirect_always_follow,
+		 "Follow redirects even if redirect_dir feature is turned off");
+
+static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
+module_param_named(index, ovl_index_def, bool, 0644);
+MODULE_PARM_DESC(index,
+		 "Default to on or off for the inodes index feature");
+
+static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
+module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
+MODULE_PARM_DESC(nfs_export,
+		 "Default to on or off for the NFS export feature");
+
+static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
+module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
+MODULE_PARM_DESC(xino_auto,
+		 "Auto enable xino feature");
+
+static void ovl_entry_stack_free(struct ovl_entry *oe)
+{
+	unsigned int i;
+
+	for (i = 0; i < oe->numlower; i++)
+		dput(oe->lowerstack[i].dentry);
+}
+
+static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
+module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
+MODULE_PARM_DESC(metacopy,
+		 "Default to on or off for the metadata only copy up feature");
+
+static void ovl_dentry_release(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe) {
+		ovl_entry_stack_free(oe);
+		kfree_rcu(oe, rcu);
+	}
+}
+
+static struct dentry *ovl_d_real(struct dentry *dentry,
+				 const struct inode *inode)
+{
+	struct dentry *real = NULL, *lower;
+
+	/* It's an overlay file */
+	if (inode && d_inode(dentry) == inode)
+		return dentry;
+
+	if (!d_is_reg(dentry)) {
+		if (!inode || inode == d_inode(dentry))
+			return dentry;
+		goto bug;
+	}
+
+	real = ovl_dentry_upper(dentry);
+	if (real && (inode == d_inode(real)))
+		return real;
+
+	if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
+		return real;
+
+	lower = ovl_dentry_lowerdata(dentry);
+	if (!lower)
+		goto bug;
+	real = lower;
+
+	/* Handle recursion */
+	real = d_real(real, inode);
+
+	if (!inode || inode == d_inode(real))
+		return real;
+bug:
+	WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
+	     __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
+	     inode ? inode->i_ino : 0, real,
+	     real && d_inode(real) ? d_inode(real)->i_ino : 0);
+	return dentry;
+}
+
+static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
+{
+	int ret = 1;
+
+	if (weak) {
+		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
+			ret =  d->d_op->d_weak_revalidate(d, flags);
+	} else if (d->d_flags & DCACHE_OP_REVALIDATE) {
+		ret = d->d_op->d_revalidate(d, flags);
+		if (!ret) {
+			if (!(flags & LOOKUP_RCU))
+				d_invalidate(d);
+			ret = -ESTALE;
+		}
+	}
+	return ret;
+}
+
+static int ovl_dentry_revalidate_common(struct dentry *dentry,
+					unsigned int flags, bool weak)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	struct inode *inode = d_inode_rcu(dentry);
+	struct dentry *upper;
+	unsigned int i;
+	int ret = 1;
+
+	/* Careful in RCU mode */
+	if (!inode)
+		return -ECHILD;
+
+	upper = ovl_i_dentry_upper(inode);
+	if (upper)
+		ret = ovl_revalidate_real(upper, flags, weak);
+
+	for (i = 0; ret > 0 && i < oe->numlower; i++) {
+		ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags,
+					  weak);
+	}
+	return ret;
+}
+
+static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	return ovl_dentry_revalidate_common(dentry, flags, false);
+}
+
+static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	return ovl_dentry_revalidate_common(dentry, flags, true);
+}
+
+static const struct dentry_operations ovl_dentry_operations = {
+	.d_release = ovl_dentry_release,
+	.d_real = ovl_d_real,
+	.d_revalidate = ovl_dentry_revalidate,
+	.d_weak_revalidate = ovl_dentry_weak_revalidate,
+};
+
+static struct kmem_cache *ovl_inode_cachep;
+
+static struct inode *ovl_alloc_inode(struct super_block *sb)
+{
+	struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL);
+
+	if (!oi)
+		return NULL;
+
+	oi->cache = NULL;
+	oi->redirect = NULL;
+	oi->version = 0;
+	oi->flags = 0;
+	oi->__upperdentry = NULL;
+	oi->lowerpath.dentry = NULL;
+	oi->lowerpath.layer = NULL;
+	oi->lowerdata = NULL;
+	mutex_init(&oi->lock);
+
+	return &oi->vfs_inode;
+}
+
+static void ovl_free_inode(struct inode *inode)
+{
+	struct ovl_inode *oi = OVL_I(inode);
+
+	kfree(oi->redirect);
+	mutex_destroy(&oi->lock);
+	kmem_cache_free(ovl_inode_cachep, oi);
+}
+
+static void ovl_destroy_inode(struct inode *inode)
+{
+	struct ovl_inode *oi = OVL_I(inode);
+
+	dput(oi->__upperdentry);
+	dput(oi->lowerpath.dentry);
+	if (S_ISDIR(inode->i_mode))
+		ovl_dir_cache_free(inode);
+	else
+		iput(oi->lowerdata);
+}
+
+static void ovl_free_fs(struct ovl_fs *ofs)
+{
+	struct vfsmount **mounts;
+	unsigned i;
+
+	iput(ofs->workbasedir_trap);
+	iput(ofs->indexdir_trap);
+	iput(ofs->workdir_trap);
+	dput(ofs->whiteout);
+	dput(ofs->indexdir);
+	dput(ofs->workdir);
+	if (ofs->workdir_locked)
+		ovl_inuse_unlock(ofs->workbasedir);
+	dput(ofs->workbasedir);
+	if (ofs->upperdir_locked)
+		ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
+
+	/* Hack!  Reuse ofs->layers as a vfsmount array before freeing it */
+	mounts = (struct vfsmount **) ofs->layers;
+	for (i = 0; i < ofs->numlayer; i++) {
+		iput(ofs->layers[i].trap);
+		mounts[i] = ofs->layers[i].mnt;
+	}
+	kern_unmount_array(mounts, ofs->numlayer);
+	kfree(ofs->layers);
+	for (i = 0; i < ofs->numfs; i++)
+		free_anon_bdev(ofs->fs[i].pseudo_dev);
+	kfree(ofs->fs);
+
+	kfree(ofs->config.lowerdir);
+	kfree(ofs->config.upperdir);
+	kfree(ofs->config.workdir);
+	kfree(ofs->config.redirect_mode);
+	if (ofs->creator_cred)
+		put_cred(ofs->creator_cred);
+	kfree(ofs);
+}
+
+static void ovl_put_super(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	ovl_free_fs(ofs);
+}
+
+/* Sync real dirty inodes in upper filesystem (if it exists) */
+static int ovl_sync_fs(struct super_block *sb, int wait)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct super_block *upper_sb;
+	int ret;
+
+	ret = ovl_sync_status(ofs);
+	/*
+	 * We have to always set the err, because the return value isn't
+	 * checked in syncfs, and instead indirectly return an error via
+	 * the sb's writeback errseq, which VFS inspects after this call.
+	 */
+	if (ret < 0) {
+		errseq_set(&sb->s_wb_err, -EIO);
+		return -EIO;
+	}
+
+	if (!ret)
+		return ret;
+
+	/*
+	 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
+	 * All the super blocks will be iterated, including upper_sb.
+	 *
+	 * If this is a syncfs(2) call, then we do need to call
+	 * sync_filesystem() on upper_sb, but enough if we do it when being
+	 * called with wait == 1.
+	 */
+	if (!wait)
+		return 0;
+
+	upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
+
+	down_read(&upper_sb->s_umount);
+	ret = sync_filesystem(upper_sb);
+	up_read(&upper_sb->s_umount);
+
+	return ret;
+}
+
+/**
+ * ovl_statfs
+ * @dentry: The dentry to query
+ * @buf: The struct kstatfs to fill in with stats
+ *
+ * Get the filesystem statistics.  As writes always target the upper layer
+ * filesystem pass the statfs to the upper filesystem (if it exists)
+ */
+static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct dentry *root_dentry = dentry->d_sb->s_root;
+	struct path path;
+	int err;
+
+	ovl_path_real(root_dentry, &path);
+
+	err = vfs_statfs(&path, buf);
+	if (!err) {
+		buf->f_namelen = ofs->namelen;
+		buf->f_type = OVERLAYFS_SUPER_MAGIC;
+	}
+
+	return err;
+}
+
+/* Will this overlay be forced to mount/remount ro? */
+static bool ovl_force_readonly(struct ovl_fs *ofs)
+{
+	return (!ovl_upper_mnt(ofs) || !ofs->workdir);
+}
+
+static const char *ovl_redirect_mode_def(void)
+{
+	return ovl_redirect_dir_def ? "on" : "off";
+}
+
+static const char * const ovl_xino_str[] = {
+	"off",
+	"auto",
+	"on",
+};
+
+static inline int ovl_xino_def(void)
+{
+	return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
+}
+
+/**
+ * ovl_show_options
+ * @m: the seq_file handle
+ * @dentry: The dentry to query
+ *
+ * Prints the mount options for a given superblock.
+ * Returns zero; does not fail.
+ */
+static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
+	if (ofs->config.upperdir) {
+		seq_show_option(m, "upperdir", ofs->config.upperdir);
+		seq_show_option(m, "workdir", ofs->config.workdir);
+	}
+	if (ofs->config.default_permissions)
+		seq_puts(m, ",default_permissions");
+	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
+		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
+	if (ofs->config.index != ovl_index_def)
+		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
+	if (!ofs->config.uuid)
+		seq_puts(m, ",uuid=off");
+	if (ofs->config.nfs_export != ovl_nfs_export_def)
+		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
+						"on" : "off");
+	if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
+		seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
+	if (ofs->config.metacopy != ovl_metacopy_def)
+		seq_printf(m, ",metacopy=%s",
+			   ofs->config.metacopy ? "on" : "off");
+	if (ofs->config.ovl_volatile)
+		seq_puts(m, ",volatile");
+	if (ofs->config.userxattr)
+		seq_puts(m, ",userxattr");
+	return 0;
+}
+
+static int ovl_remount(struct super_block *sb, int *flags, char *data)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct super_block *upper_sb;
+	int ret = 0;
+
+	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
+		return -EROFS;
+
+	if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
+		upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
+		if (ovl_should_sync(ofs)) {
+			down_read(&upper_sb->s_umount);
+			ret = sync_filesystem(upper_sb);
+			up_read(&upper_sb->s_umount);
+		}
+	}
+
+	return ret;
+}
+
+static const struct super_operations ovl_super_operations = {
+	.alloc_inode	= ovl_alloc_inode,
+	.free_inode	= ovl_free_inode,
+	.destroy_inode	= ovl_destroy_inode,
+	.drop_inode	= generic_delete_inode,
+	.put_super	= ovl_put_super,
+	.sync_fs	= ovl_sync_fs,
+	.statfs		= ovl_statfs,
+	.show_options	= ovl_show_options,
+	.remount_fs	= ovl_remount,
+};
+
+enum {
+	OPT_LOWERDIR,
+	OPT_UPPERDIR,
+	OPT_WORKDIR,
+	OPT_DEFAULT_PERMISSIONS,
+	OPT_REDIRECT_DIR,
+	OPT_INDEX_ON,
+	OPT_INDEX_OFF,
+	OPT_UUID_ON,
+	OPT_UUID_OFF,
+	OPT_NFS_EXPORT_ON,
+	OPT_USERXATTR,
+	OPT_NFS_EXPORT_OFF,
+	OPT_XINO_ON,
+	OPT_XINO_OFF,
+	OPT_XINO_AUTO,
+	OPT_METACOPY_ON,
+	OPT_METACOPY_OFF,
+	OPT_VOLATILE,
+	OPT_ERR,
+};
+
+static const match_table_t ovl_tokens = {
+	{OPT_LOWERDIR,			"lowerdir=%s"},
+	{OPT_UPPERDIR,			"upperdir=%s"},
+	{OPT_WORKDIR,			"workdir=%s"},
+	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
+	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
+	{OPT_INDEX_ON,			"index=on"},
+	{OPT_INDEX_OFF,			"index=off"},
+	{OPT_USERXATTR,			"userxattr"},
+	{OPT_UUID_ON,			"uuid=on"},
+	{OPT_UUID_OFF,			"uuid=off"},
+	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
+	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
+	{OPT_XINO_ON,			"xino=on"},
+	{OPT_XINO_OFF,			"xino=off"},
+	{OPT_XINO_AUTO,			"xino=auto"},
+	{OPT_METACOPY_ON,		"metacopy=on"},
+	{OPT_METACOPY_OFF,		"metacopy=off"},
+	{OPT_VOLATILE,			"volatile"},
+	{OPT_ERR,			NULL}
+};
+
+static char *ovl_next_opt(char **s)
+{
+	char *sbegin = *s;
+	char *p;
+
+	if (sbegin == NULL)
+		return NULL;
+
+	for (p = sbegin; *p; p++) {
+		if (*p == '\\') {
+			p++;
+			if (!*p)
+				break;
+		} else if (*p == ',') {
+			*p = '\0';
+			*s = p + 1;
+			return sbegin;
+		}
+	}
+	*s = NULL;
+	return sbegin;
+}
+
+static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
+{
+	if (strcmp(mode, "on") == 0) {
+		config->redirect_dir = true;
+		/*
+		 * Does not make sense to have redirect creation without
+		 * redirect following.
+		 */
+		config->redirect_follow = true;
+	} else if (strcmp(mode, "follow") == 0) {
+		config->redirect_follow = true;
+	} else if (strcmp(mode, "off") == 0) {
+		if (ovl_redirect_always_follow)
+			config->redirect_follow = true;
+	} else if (strcmp(mode, "nofollow") != 0) {
+		pr_err("bad mount option \"redirect_dir=%s\"\n",
+		       mode);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ovl_parse_opt(char *opt, struct ovl_config *config)
+{
+	char *p;
+	int err;
+	bool metacopy_opt = false, redirect_opt = false;
+	bool nfs_export_opt = false, index_opt = false;
+
+	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
+	if (!config->redirect_mode)
+		return -ENOMEM;
+
+	while ((p = ovl_next_opt(&opt)) != NULL) {
+		int token;
+		substring_t args[MAX_OPT_ARGS];
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, ovl_tokens, args);
+		switch (token) {
+		case OPT_UPPERDIR:
+			kfree(config->upperdir);
+			config->upperdir = match_strdup(&args[0]);
+			if (!config->upperdir)
+				return -ENOMEM;
+			break;
+
+		case OPT_LOWERDIR:
+			kfree(config->lowerdir);
+			config->lowerdir = match_strdup(&args[0]);
+			if (!config->lowerdir)
+				return -ENOMEM;
+			break;
+
+		case OPT_WORKDIR:
+			kfree(config->workdir);
+			config->workdir = match_strdup(&args[0]);
+			if (!config->workdir)
+				return -ENOMEM;
+			break;
+
+		case OPT_DEFAULT_PERMISSIONS:
+			config->default_permissions = true;
+			break;
+
+		case OPT_REDIRECT_DIR:
+			kfree(config->redirect_mode);
+			config->redirect_mode = match_strdup(&args[0]);
+			if (!config->redirect_mode)
+				return -ENOMEM;
+			redirect_opt = true;
+			break;
+
+		case OPT_INDEX_ON:
+			config->index = true;
+			index_opt = true;
+			break;
+
+		case OPT_INDEX_OFF:
+			config->index = false;
+			index_opt = true;
+			break;
+
+		case OPT_UUID_ON:
+			config->uuid = true;
+			break;
+
+		case OPT_UUID_OFF:
+			config->uuid = false;
+			break;
+
+		case OPT_NFS_EXPORT_ON:
+			config->nfs_export = true;
+			nfs_export_opt = true;
+			break;
+
+		case OPT_NFS_EXPORT_OFF:
+			config->nfs_export = false;
+			nfs_export_opt = true;
+			break;
+
+		case OPT_XINO_ON:
+			config->xino = OVL_XINO_ON;
+			break;
+
+		case OPT_XINO_OFF:
+			config->xino = OVL_XINO_OFF;
+			break;
+
+		case OPT_XINO_AUTO:
+			config->xino = OVL_XINO_AUTO;
+			break;
+
+		case OPT_METACOPY_ON:
+			config->metacopy = true;
+			metacopy_opt = true;
+			break;
+
+		case OPT_METACOPY_OFF:
+			config->metacopy = false;
+			metacopy_opt = true;
+			break;
+
+		case OPT_VOLATILE:
+			config->ovl_volatile = true;
+			break;
+
+		case OPT_USERXATTR:
+			config->userxattr = true;
+			break;
+
+		default:
+			pr_err("unrecognized mount option \"%s\" or missing value\n",
+					p);
+			return -EINVAL;
+		}
+	}
+
+	/* Workdir/index are useless in non-upper mount */
+	if (!config->upperdir) {
+		if (config->workdir) {
+			pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+				config->workdir);
+			kfree(config->workdir);
+			config->workdir = NULL;
+		}
+		if (config->index && index_opt) {
+			pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
+			index_opt = false;
+		}
+		config->index = false;
+	}
+
+	if (!config->upperdir && config->ovl_volatile) {
+		pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
+		config->ovl_volatile = false;
+	}
+
+	err = ovl_parse_redirect_mode(config, config->redirect_mode);
+	if (err)
+		return err;
+
+	/*
+	 * This is to make the logic below simpler.  It doesn't make any other
+	 * difference, since config->redirect_dir is only used for upper.
+	 */
+	if (!config->upperdir && config->redirect_follow)
+		config->redirect_dir = true;
+
+	/* Resolve metacopy -> redirect_dir dependency */
+	if (config->metacopy && !config->redirect_dir) {
+		if (metacopy_opt && redirect_opt) {
+			pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
+			       config->redirect_mode);
+			return -EINVAL;
+		}
+		if (redirect_opt) {
+			/*
+			 * There was an explicit redirect_dir=... that resulted
+			 * in this conflict.
+			 */
+			pr_info("disabling metacopy due to redirect_dir=%s\n",
+				config->redirect_mode);
+			config->metacopy = false;
+		} else {
+			/* Automatically enable redirect otherwise. */
+			config->redirect_follow = config->redirect_dir = true;
+		}
+	}
+
+	/* Resolve nfs_export -> index dependency */
+	if (config->nfs_export && !config->index) {
+		if (!config->upperdir && config->redirect_follow) {
+			pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+			config->nfs_export = false;
+		} else if (nfs_export_opt && index_opt) {
+			pr_err("conflicting options: nfs_export=on,index=off\n");
+			return -EINVAL;
+		} else if (index_opt) {
+			/*
+			 * There was an explicit index=off that resulted
+			 * in this conflict.
+			 */
+			pr_info("disabling nfs_export due to index=off\n");
+			config->nfs_export = false;
+		} else {
+			/* Automatically enable index otherwise. */
+			config->index = true;
+		}
+	}
+
+	/* Resolve nfs_export -> !metacopy dependency */
+	if (config->nfs_export && config->metacopy) {
+		if (nfs_export_opt && metacopy_opt) {
+			pr_err("conflicting options: nfs_export=on,metacopy=on\n");
+			return -EINVAL;
+		}
+		if (metacopy_opt) {
+			/*
+			 * There was an explicit metacopy=on that resulted
+			 * in this conflict.
+			 */
+			pr_info("disabling nfs_export due to metacopy=on\n");
+			config->nfs_export = false;
+		} else {
+			/*
+			 * There was an explicit nfs_export=on that resulted
+			 * in this conflict.
+			 */
+			pr_info("disabling metacopy due to nfs_export=on\n");
+			config->metacopy = false;
+		}
+	}
+
+
+	/* Resolve userxattr -> !redirect && !metacopy dependency */
+	if (config->userxattr) {
+		if (config->redirect_follow && redirect_opt) {
+			pr_err("conflicting options: userxattr,redirect_dir=%s\n",
+			       config->redirect_mode);
+			return -EINVAL;
+		}
+		if (config->metacopy && metacopy_opt) {
+			pr_err("conflicting options: userxattr,metacopy=on\n");
+			return -EINVAL;
+		}
+		/*
+		 * Silently disable default setting of redirect and metacopy.
+		 * This shall be the default in the future as well: these
+		 * options must be explicitly enabled if used together with
+		 * userxattr.
+		 */
+		config->redirect_dir = config->redirect_follow = false;
+		config->metacopy = false;
+	}
+
+	return 0;
+}
+
+#define OVL_WORKDIR_NAME "work"
+#define OVL_INDEXDIR_NAME "index"
+
+static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
+					 const char *name, bool persist)
+{
+	struct inode *dir =  ofs->workbasedir->d_inode;
+	struct vfsmount *mnt = ovl_upper_mnt(ofs);
+	struct dentry *work;
+	int err;
+	bool retried = false;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+retry:
+	work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name));
+
+	if (!IS_ERR(work)) {
+		struct iattr attr = {
+			.ia_valid = ATTR_MODE,
+			.ia_mode = S_IFDIR | 0,
+		};
+
+		if (work->d_inode) {
+			err = -EEXIST;
+			if (retried)
+				goto out_dput;
+
+			if (persist)
+				goto out_unlock;
+
+			retried = true;
+			err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0);
+			dput(work);
+			if (err == -EINVAL) {
+				work = ERR_PTR(err);
+				goto out_unlock;
+			}
+			goto retry;
+		}
+
+		err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode);
+		if (err)
+			goto out_dput;
+
+		/* Weird filesystem returning with hashed negative (kernfs)? */
+		err = -EINVAL;
+		if (d_really_is_negative(work))
+			goto out_dput;
+
+		/*
+		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
+		 *
+		 * a) success (there was a POSIX ACL xattr and was removed)
+		 * b) -ENODATA (there was no POSIX ACL xattr)
+		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
+		 *
+		 * There are various other error values that could effectively
+		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
+		 * if the xattr name is too long), but the set of filesystems
+		 * allowed as upper are limited to "normal" ones, where checking
+		 * for the above two errors is sufficient.
+		 */
+		err = ovl_do_removexattr(ofs, work,
+					 XATTR_NAME_POSIX_ACL_DEFAULT);
+		if (err && err != -ENODATA && err != -EOPNOTSUPP)
+			goto out_dput;
+
+		err = ovl_do_removexattr(ofs, work,
+					 XATTR_NAME_POSIX_ACL_ACCESS);
+		if (err && err != -ENODATA && err != -EOPNOTSUPP)
+			goto out_dput;
+
+		/* Clear any inherited mode bits */
+		inode_lock(work->d_inode);
+		err = ovl_do_notify_change(ofs, work, &attr);
+		inode_unlock(work->d_inode);
+		if (err)
+			goto out_dput;
+	} else {
+		err = PTR_ERR(work);
+		goto out_err;
+	}
+out_unlock:
+	inode_unlock(dir);
+	return work;
+
+out_dput:
+	dput(work);
+out_err:
+	pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
+		ofs->config.workdir, name, -err);
+	work = NULL;
+	goto out_unlock;
+}
+
+static void ovl_unescape(char *s)
+{
+	char *d = s;
+
+	for (;; s++, d++) {
+		if (*s == '\\')
+			s++;
+		*d = *s;
+		if (!*s)
+			break;
+	}
+}
+
+static int ovl_mount_dir_noesc(const char *name, struct path *path)
+{
+	int err = -EINVAL;
+
+	if (!*name) {
+		pr_err("empty lowerdir\n");
+		goto out;
+	}
+	err = kern_path(name, LOOKUP_FOLLOW, path);
+	if (err) {
+		pr_err("failed to resolve '%s': %i\n", name, err);
+		goto out;
+	}
+	err = -EINVAL;
+	if (ovl_dentry_weird(path->dentry)) {
+		pr_err("filesystem on '%s' not supported\n", name);
+		goto out_put;
+	}
+	if (!d_is_dir(path->dentry)) {
+		pr_err("'%s' not a directory\n", name);
+		goto out_put;
+	}
+	return 0;
+
+out_put:
+	path_put_init(path);
+out:
+	return err;
+}
+
+static int ovl_mount_dir(const char *name, struct path *path)
+{
+	int err = -ENOMEM;
+	char *tmp = kstrdup(name, GFP_KERNEL);
+
+	if (tmp) {
+		ovl_unescape(tmp);
+		err = ovl_mount_dir_noesc(tmp, path);
+
+		if (!err && path->dentry->d_flags & DCACHE_OP_REAL) {
+			pr_err("filesystem on '%s' not supported as upperdir\n",
+			       tmp);
+			path_put_init(path);
+			err = -EINVAL;
+		}
+		kfree(tmp);
+	}
+	return err;
+}
+
+static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs,
+			     const char *name)
+{
+	struct kstatfs statfs;
+	int err = vfs_statfs(path, &statfs);
+
+	if (err)
+		pr_err("statfs failed on '%s'\n", name);
+	else
+		ofs->namelen = max(ofs->namelen, statfs.f_namelen);
+
+	return err;
+}
+
+static int ovl_lower_dir(const char *name, struct path *path,
+			 struct ovl_fs *ofs, int *stack_depth)
+{
+	int fh_type;
+	int err;
+
+	err = ovl_mount_dir_noesc(name, path);
+	if (err)
+		return err;
+
+	err = ovl_check_namelen(path, ofs, name);
+	if (err)
+		return err;
+
+	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
+
+	/*
+	 * The inodes index feature and NFS export need to encode and decode
+	 * file handles, so they require that all layers support them.
+	 */
+	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
+	if ((ofs->config.nfs_export ||
+	     (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
+		ofs->config.index = false;
+		ofs->config.nfs_export = false;
+		pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
+			name);
+	}
+	/*
+	 * Decoding origin file handle is required for persistent st_ino.
+	 * Without persistent st_ino, xino=auto falls back to xino=off.
+	 */
+	if (ofs->config.xino == OVL_XINO_AUTO &&
+	    ofs->config.upperdir && !fh_type) {
+		ofs->config.xino = OVL_XINO_OFF;
+		pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n",
+			name);
+	}
+
+	/* Check if lower fs has 32bit inode numbers */
+	if (fh_type != FILEID_INO32_GEN)
+		ofs->xino_mode = -1;
+
+	return 0;
+}
+
+/* Workdir should not be subdir of upperdir and vice versa */
+static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
+{
+	bool ok = false;
+
+	if (workdir != upperdir) {
+		ok = (lock_rename(workdir, upperdir) == NULL);
+		unlock_rename(workdir, upperdir);
+	}
+	return ok;
+}
+
+static unsigned int ovl_split_lowerdirs(char *str)
+{
+	unsigned int ctr = 1;
+	char *s, *d;
+
+	for (s = d = str;; s++, d++) {
+		if (*s == '\\') {
+			s++;
+		} else if (*s == ':') {
+			*d = '\0';
+			ctr++;
+			continue;
+		}
+		*d = *s;
+		if (!*s)
+			break;
+	}
+	return ctr;
+}
+
+static int __maybe_unused
+ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, void *buffer, size_t size)
+{
+	return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
+}
+
+static int __maybe_unused
+ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
+			struct user_namespace *mnt_userns,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *value,
+			size_t size, int flags)
+{
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct inode *realinode = ovl_inode_real(inode);
+	struct posix_acl *acl = NULL;
+	int err;
+
+	/* Check that everything is OK before copy-up */
+	if (value) {
+		/* The above comment can be understood in two ways:
+		 *
+		 * 1. We just want to check whether the basic POSIX ACL format
+		 *    is ok. For example, if the header is correct and the size
+		 *    is sane.
+		 * 2. We want to know whether the ACL_{GROUP,USER} entries can
+		 *    be mapped according to the underlying filesystem.
+		 *
+		 * Currently, we only check 1. If we wanted to check 2. we
+		 * would need to pass the mnt_userns and the fs_userns of the
+		 * underlying filesystem. But frankly, I think checking 1. is
+		 * enough to start the copy-up.
+		 */
+		acl = vfs_set_acl_prepare(&init_user_ns, &init_user_ns, value, size);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+	}
+	err = -EOPNOTSUPP;
+	if (!IS_POSIXACL(d_inode(workdir)))
+		goto out_acl_release;
+	if (!realinode->i_op->set_acl)
+		goto out_acl_release;
+	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
+		err = acl ? -EACCES : 0;
+		goto out_acl_release;
+	}
+	err = -EPERM;
+	if (!inode_owner_or_capable(&init_user_ns, inode))
+		goto out_acl_release;
+
+	posix_acl_release(acl);
+
+	/*
+	 * Check if sgid bit needs to be cleared (actual setacl operation will
+	 * be done with mounter's capabilities and so that won't do it for us).
+	 */
+	if (unlikely(inode->i_mode & S_ISGID) &&
+	    handler->flags == ACL_TYPE_ACCESS &&
+	    !in_group_p(inode->i_gid) &&
+	    !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) {
+		struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
+
+		err = ovl_setattr(&init_user_ns, dentry, &iattr);
+		if (err)
+			return err;
+	}
+
+	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
+	return err;
+
+out_acl_release:
+	posix_acl_release(acl);
+	return err;
+}
+
+static int ovl_own_xattr_get(const struct xattr_handler *handler,
+			     struct dentry *dentry, struct inode *inode,
+			     const char *name, void *buffer, size_t size)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ovl_own_xattr_set(const struct xattr_handler *handler,
+			     struct user_namespace *mnt_userns,
+			     struct dentry *dentry, struct inode *inode,
+			     const char *name, const void *value,
+			     size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ovl_other_xattr_get(const struct xattr_handler *handler,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, void *buffer, size_t size)
+{
+	return ovl_xattr_get(dentry, inode, name, buffer, size);
+}
+
+static int ovl_other_xattr_set(const struct xattr_handler *handler,
+			       struct user_namespace *mnt_userns,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, const void *value,
+			       size_t size, int flags)
+{
+	return ovl_xattr_set(dentry, inode, name, value, size, flags);
+}
+
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_access_xattr_handler = {
+	.name = XATTR_NAME_POSIX_ACL_ACCESS,
+	.flags = ACL_TYPE_ACCESS,
+	.get = ovl_posix_acl_xattr_get,
+	.set = ovl_posix_acl_xattr_set,
+};
+
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_default_xattr_handler = {
+	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
+	.flags = ACL_TYPE_DEFAULT,
+	.get = ovl_posix_acl_xattr_get,
+	.set = ovl_posix_acl_xattr_set,
+};
+
+static const struct xattr_handler ovl_own_trusted_xattr_handler = {
+	.prefix	= OVL_XATTR_TRUSTED_PREFIX,
+	.get = ovl_own_xattr_get,
+	.set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_own_user_xattr_handler = {
+	.prefix	= OVL_XATTR_USER_PREFIX,
+	.get = ovl_own_xattr_get,
+	.set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_other_xattr_handler = {
+	.prefix	= "", /* catch all */
+	.get = ovl_other_xattr_get,
+	.set = ovl_other_xattr_set,
+};
+
+static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
+	&ovl_posix_acl_access_xattr_handler,
+	&ovl_posix_acl_default_xattr_handler,
+#endif
+	&ovl_own_trusted_xattr_handler,
+	&ovl_other_xattr_handler,
+	NULL
+};
+
+static const struct xattr_handler *ovl_user_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
+	&ovl_posix_acl_access_xattr_handler,
+	&ovl_posix_acl_default_xattr_handler,
+#endif
+	&ovl_own_user_xattr_handler,
+	&ovl_other_xattr_handler,
+	NULL
+};
+
+static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
+			  struct inode **ptrap, const char *name)
+{
+	struct inode *trap;
+	int err;
+
+	trap = ovl_get_trap_inode(sb, dir);
+	err = PTR_ERR_OR_ZERO(trap);
+	if (err) {
+		if (err == -ELOOP)
+			pr_err("conflicting %s path\n", name);
+		return err;
+	}
+
+	*ptrap = trap;
+	return 0;
+}
+
+/*
+ * Determine how we treat concurrent use of upperdir/workdir based on the
+ * index feature. This is papering over mount leaks of container runtimes,
+ * for example, an old overlay mount is leaked and now its upperdir is
+ * attempted to be used as a lower layer in a new overlay mount.
+ */
+static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
+{
+	if (ofs->config.index) {
+		pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
+		       name);
+		return -EBUSY;
+	} else {
+		pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
+			name);
+		return 0;
+	}
+}
+
+static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
+			 struct ovl_layer *upper_layer, struct path *upperpath)
+{
+	struct vfsmount *upper_mnt;
+	int err;
+
+	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
+	if (err)
+		goto out;
+
+	/* Upperdir path should not be r/o */
+	if (__mnt_is_readonly(upperpath->mnt)) {
+		pr_err("upper fs is r/o, try multi-lower layers mount\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
+	if (err)
+		goto out;
+
+	err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
+			     "upperdir");
+	if (err)
+		goto out;
+
+	upper_mnt = clone_private_mount(upperpath);
+	err = PTR_ERR(upper_mnt);
+	if (IS_ERR(upper_mnt)) {
+		pr_err("failed to clone upperpath\n");
+		goto out;
+	}
+
+	/* Don't inherit atime flags */
+	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
+	upper_layer->mnt = upper_mnt;
+	upper_layer->idx = 0;
+	upper_layer->fsid = 0;
+
+	/*
+	 * Inherit SB_NOSEC flag from upperdir.
+	 *
+	 * This optimization changes behavior when a security related attribute
+	 * (suid/sgid/security.*) is changed on an underlying layer.  This is
+	 * okay because we don't yet have guarantees in that case, but it will
+	 * need careful treatment once we want to honour changes to underlying
+	 * filesystems.
+	 */
+	if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
+		sb->s_flags |= SB_NOSEC;
+
+	if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
+		ofs->upperdir_locked = true;
+	} else {
+		err = ovl_report_in_use(ofs, "upperdir");
+		if (err)
+			goto out;
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+/*
+ * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and
+ * negative values if error is encountered.
+ */
+static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
+{
+	struct dentry *workdir = ofs->workdir;
+	struct inode *dir = d_inode(workdir);
+	struct dentry *temp;
+	struct dentry *dest;
+	struct dentry *whiteout;
+	struct name_snapshot name;
+	int err;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+
+	temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0));
+	err = PTR_ERR(temp);
+	if (IS_ERR(temp))
+		goto out_unlock;
+
+	dest = ovl_lookup_temp(ofs, workdir);
+	err = PTR_ERR(dest);
+	if (IS_ERR(dest)) {
+		dput(temp);
+		goto out_unlock;
+	}
+
+	/* Name is inline and stable - using snapshot as a copy helper */
+	take_dentry_name_snapshot(&name, temp);
+	err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT);
+	if (err) {
+		if (err == -EINVAL)
+			err = 0;
+		goto cleanup_temp;
+	}
+
+	whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len);
+	err = PTR_ERR(whiteout);
+	if (IS_ERR(whiteout))
+		goto cleanup_temp;
+
+	err = ovl_is_whiteout(whiteout);
+
+	/* Best effort cleanup of whiteout and temp file */
+	if (err)
+		ovl_cleanup(ofs, dir, whiteout);
+	dput(whiteout);
+
+cleanup_temp:
+	ovl_cleanup(ofs, dir, temp);
+	release_dentry_name_snapshot(&name);
+	dput(temp);
+	dput(dest);
+
+out_unlock:
+	inode_unlock(dir);
+
+	return err;
+}
+
+static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs,
+					   struct dentry *parent,
+					   const char *name, umode_t mode)
+{
+	size_t len = strlen(name);
+	struct dentry *child;
+
+	inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
+	child = ovl_lookup_upper(ofs, name, parent, len);
+	if (!IS_ERR(child) && !child->d_inode)
+		child = ovl_create_real(ofs, parent->d_inode, child,
+					OVL_CATTR(mode));
+	inode_unlock(parent->d_inode);
+	dput(parent);
+
+	return child;
+}
+
+/*
+ * Creates $workdir/work/incompat/volatile/dirty file if it is not already
+ * present.
+ */
+static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
+{
+	unsigned int ctr;
+	struct dentry *d = dget(ofs->workbasedir);
+	static const char *const volatile_path[] = {
+		OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
+	};
+	const char *const *name = volatile_path;
+
+	for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
+		d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
+		if (IS_ERR(d))
+			return PTR_ERR(d);
+	}
+	dput(d);
+	return 0;
+}
+
+static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
+			    const struct path *workpath)
+{
+	struct vfsmount *mnt = ovl_upper_mnt(ofs);
+	struct dentry *workdir;
+	struct file *tmpfile;
+	bool rename_whiteout;
+	bool d_type;
+	int fh_type;
+	int err;
+
+	err = mnt_want_write(mnt);
+	if (err)
+		return err;
+
+	workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
+	err = PTR_ERR(workdir);
+	if (IS_ERR_OR_NULL(workdir))
+		goto out;
+
+	ofs->workdir = workdir;
+
+	err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
+	if (err)
+		goto out;
+
+	/*
+	 * Upper should support d_type, else whiteouts are visible.  Given
+	 * workdir and upper are on same fs, we can do iterate_dir() on
+	 * workdir. This check requires successful creation of workdir in
+	 * previous step.
+	 */
+	err = ovl_check_d_type_supported(workpath);
+	if (err < 0)
+		goto out;
+
+	d_type = err;
+	if (!d_type)
+		pr_warn("upper fs needs to support d_type.\n");
+
+	/* Check if upper/work fs supports O_TMPFILE */
+	tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0);
+	ofs->tmpfile = !IS_ERR(tmpfile);
+	if (ofs->tmpfile)
+		fput(tmpfile);
+	else
+		pr_warn("upper fs does not support tmpfile.\n");
+
+
+	/* Check if upper/work fs supports RENAME_WHITEOUT */
+	err = ovl_check_rename_whiteout(ofs);
+	if (err < 0)
+		goto out;
+
+	rename_whiteout = err;
+	if (!rename_whiteout)
+		pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
+
+	/*
+	 * Check if upper/work fs supports (trusted|user).overlay.* xattr
+	 */
+	err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
+	if (err) {
+		pr_warn("failed to set xattr on upper\n");
+		ofs->noxattr = true;
+		if (ofs->config.index || ofs->config.metacopy) {
+			ofs->config.index = false;
+			ofs->config.metacopy = false;
+			pr_warn("...falling back to index=off,metacopy=off.\n");
+		}
+		/*
+		 * xattr support is required for persistent st_ino.
+		 * Without persistent st_ino, xino=auto falls back to xino=off.
+		 */
+		if (ofs->config.xino == OVL_XINO_AUTO) {
+			ofs->config.xino = OVL_XINO_OFF;
+			pr_warn("...falling back to xino=off.\n");
+		}
+		if (err == -EPERM && !ofs->config.userxattr)
+			pr_info("try mounting with 'userxattr' option\n");
+		err = 0;
+	} else {
+		ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
+	}
+
+	/*
+	 * We allowed sub-optimal upper fs configuration and don't want to break
+	 * users over kernel upgrade, but we never allowed remote upper fs, so
+	 * we can enforce strict requirements for remote upper fs.
+	 */
+	if (ovl_dentry_remote(ofs->workdir) &&
+	    (!d_type || !rename_whiteout || ofs->noxattr)) {
+		pr_err("upper fs missing required features.\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * For volatile mount, create a incompat/volatile/dirty file to keep
+	 * track of it.
+	 */
+	if (ofs->config.ovl_volatile) {
+		err = ovl_create_volatile_dirty(ofs);
+		if (err < 0) {
+			pr_err("Failed to create volatile/dirty file.\n");
+			goto out;
+		}
+	}
+
+	/* Check if upper/work fs supports file handles */
+	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
+	if (ofs->config.index && !fh_type) {
+		ofs->config.index = false;
+		pr_warn("upper fs does not support file handles, falling back to index=off.\n");
+	}
+
+	/* Check if upper fs has 32bit inode numbers */
+	if (fh_type != FILEID_INO32_GEN)
+		ofs->xino_mode = -1;
+
+	/* NFS export of r/w mount depends on index */
+	if (ofs->config.nfs_export && !ofs->config.index) {
+		pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
+		ofs->config.nfs_export = false;
+	}
+out:
+	mnt_drop_write(mnt);
+	return err;
+}
+
+static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
+			   const struct path *upperpath)
+{
+	int err;
+	struct path workpath = { };
+
+	err = ovl_mount_dir(ofs->config.workdir, &workpath);
+	if (err)
+		goto out;
+
+	err = -EINVAL;
+	if (upperpath->mnt != workpath.mnt) {
+		pr_err("workdir and upperdir must reside under the same mount\n");
+		goto out;
+	}
+	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
+		pr_err("workdir and upperdir must be separate subtrees\n");
+		goto out;
+	}
+
+	ofs->workbasedir = dget(workpath.dentry);
+
+	if (ovl_inuse_trylock(ofs->workbasedir)) {
+		ofs->workdir_locked = true;
+	} else {
+		err = ovl_report_in_use(ofs, "workdir");
+		if (err)
+			goto out;
+	}
+
+	err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
+			     "workdir");
+	if (err)
+		goto out;
+
+	err = ovl_make_workdir(sb, ofs, &workpath);
+
+out:
+	path_put(&workpath);
+
+	return err;
+}
+
+static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
+			    struct ovl_entry *oe, const struct path *upperpath)
+{
+	struct vfsmount *mnt = ovl_upper_mnt(ofs);
+	struct dentry *indexdir;
+	int err;
+
+	err = mnt_want_write(mnt);
+	if (err)
+		return err;
+
+	/* Verify lower root is upper root origin */
+	err = ovl_verify_origin(ofs, upperpath->dentry,
+				oe->lowerstack[0].dentry, true);
+	if (err) {
+		pr_err("failed to verify upper root origin\n");
+		goto out;
+	}
+
+	/* index dir will act also as workdir */
+	iput(ofs->workdir_trap);
+	ofs->workdir_trap = NULL;
+	dput(ofs->workdir);
+	ofs->workdir = NULL;
+	indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
+	if (IS_ERR(indexdir)) {
+		err = PTR_ERR(indexdir);
+	} else if (indexdir) {
+		ofs->indexdir = indexdir;
+		ofs->workdir = dget(indexdir);
+
+		err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
+				     "indexdir");
+		if (err)
+			goto out;
+
+		/*
+		 * Verify upper root is exclusively associated with index dir.
+		 * Older kernels stored upper fh in ".overlay.origin"
+		 * xattr. If that xattr exists, verify that it is a match to
+		 * upper dir file handle. In any case, verify or set xattr
+		 * ".overlay.upper" to indicate that index may have
+		 * directory entries.
+		 */
+		if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
+			err = ovl_verify_set_fh(ofs, ofs->indexdir,
+						OVL_XATTR_ORIGIN,
+						upperpath->dentry, true, false);
+			if (err)
+				pr_err("failed to verify index dir 'origin' xattr\n");
+		}
+		err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry,
+				       true);
+		if (err)
+			pr_err("failed to verify index dir 'upper' xattr\n");
+
+		/* Cleanup bad/stale/orphan index entries */
+		if (!err)
+			err = ovl_indexdir_cleanup(ofs);
+	}
+	if (err || !ofs->indexdir)
+		pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+
+out:
+	mnt_drop_write(mnt);
+	return err;
+}
+
+static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
+{
+	unsigned int i;
+
+	if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
+		return true;
+
+	/*
+	 * We allow using single lower with null uuid for index and nfs_export
+	 * for example to support those features with single lower squashfs.
+	 * To avoid regressions in setups of overlay with re-formatted lower
+	 * squashfs, do not allow decoding origin with lower null uuid unless
+	 * user opted-in to one of the new features that require following the
+	 * lower inode of non-dir upper.
+	 */
+	if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid))
+		return false;
+
+	for (i = 0; i < ofs->numfs; i++) {
+		/*
+		 * We use uuid to associate an overlay lower file handle with a
+		 * lower layer, so we can accept lower fs with null uuid as long
+		 * as all lower layers with null uuid are on the same fs.
+		 * if we detect multiple lower fs with the same uuid, we
+		 * disable lower file handle decoding on all of them.
+		 */
+		if (ofs->fs[i].is_lower &&
+		    uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
+			ofs->fs[i].bad_uuid = true;
+			return false;
+		}
+	}
+	return true;
+}
+
+/* Get a unique fsid for the layer */
+static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
+{
+	struct super_block *sb = path->mnt->mnt_sb;
+	unsigned int i;
+	dev_t dev;
+	int err;
+	bool bad_uuid = false;
+	bool warn = false;
+
+	for (i = 0; i < ofs->numfs; i++) {
+		if (ofs->fs[i].sb == sb)
+			return i;
+	}
+
+	if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
+		bad_uuid = true;
+		if (ofs->config.xino == OVL_XINO_AUTO) {
+			ofs->config.xino = OVL_XINO_OFF;
+			warn = true;
+		}
+		if (ofs->config.index || ofs->config.nfs_export) {
+			ofs->config.index = false;
+			ofs->config.nfs_export = false;
+			warn = true;
+		}
+		if (warn) {
+			pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n",
+				uuid_is_null(&sb->s_uuid) ? "null" :
+							    "conflicting",
+				path->dentry, ovl_xino_str[ofs->config.xino]);
+		}
+	}
+
+	err = get_anon_bdev(&dev);
+	if (err) {
+		pr_err("failed to get anonymous bdev for lowerpath\n");
+		return err;
+	}
+
+	ofs->fs[ofs->numfs].sb = sb;
+	ofs->fs[ofs->numfs].pseudo_dev = dev;
+	ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
+
+	return ofs->numfs++;
+}
+
+static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
+			  struct path *stack, unsigned int numlower,
+			  struct ovl_layer *layers)
+{
+	int err;
+	unsigned int i;
+
+	err = -ENOMEM;
+	ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
+	if (ofs->fs == NULL)
+		goto out;
+
+	/* idx/fsid 0 are reserved for upper fs even with lower only overlay */
+	ofs->numfs++;
+
+	/*
+	 * All lower layers that share the same fs as upper layer, use the same
+	 * pseudo_dev as upper layer.  Allocate fs[0].pseudo_dev even for lower
+	 * only overlay to simplify ovl_fs_free().
+	 * is_lower will be set if upper fs is shared with a lower layer.
+	 */
+	err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
+	if (err) {
+		pr_err("failed to get anonymous bdev for upper fs\n");
+		goto out;
+	}
+
+	if (ovl_upper_mnt(ofs)) {
+		ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
+		ofs->fs[0].is_lower = false;
+	}
+
+	for (i = 0; i < numlower; i++) {
+		struct vfsmount *mnt;
+		struct inode *trap;
+		int fsid;
+
+		err = fsid = ovl_get_fsid(ofs, &stack[i]);
+		if (err < 0)
+			goto out;
+
+		/*
+		 * Check if lower root conflicts with this overlay layers before
+		 * checking if it is in-use as upperdir/workdir of "another"
+		 * mount, because we do not bother to check in ovl_is_inuse() if
+		 * the upperdir/workdir is in fact in-use by our
+		 * upperdir/workdir.
+		 */
+		err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
+		if (err)
+			goto out;
+
+		if (ovl_is_inuse(stack[i].dentry)) {
+			err = ovl_report_in_use(ofs, "lowerdir");
+			if (err) {
+				iput(trap);
+				goto out;
+			}
+		}
+
+		mnt = clone_private_mount(&stack[i]);
+		err = PTR_ERR(mnt);
+		if (IS_ERR(mnt)) {
+			pr_err("failed to clone lowerpath\n");
+			iput(trap);
+			goto out;
+		}
+
+		/*
+		 * Make lower layers R/O.  That way fchmod/fchown on lower file
+		 * will fail instead of modifying lower fs.
+		 */
+		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
+
+		layers[ofs->numlayer].trap = trap;
+		layers[ofs->numlayer].mnt = mnt;
+		layers[ofs->numlayer].idx = ofs->numlayer;
+		layers[ofs->numlayer].fsid = fsid;
+		layers[ofs->numlayer].fs = &ofs->fs[fsid];
+		ofs->numlayer++;
+		ofs->fs[fsid].is_lower = true;
+	}
+
+	/*
+	 * When all layers on same fs, overlay can use real inode numbers.
+	 * With mount option "xino=<on|auto>", mounter declares that there are
+	 * enough free high bits in underlying fs to hold the unique fsid.
+	 * If overlayfs does encounter underlying inodes using the high xino
+	 * bits reserved for fsid, it emits a warning and uses the original
+	 * inode number or a non persistent inode number allocated from a
+	 * dedicated range.
+	 */
+	if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
+		if (ofs->config.xino == OVL_XINO_ON)
+			pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
+		ofs->xino_mode = 0;
+	} else if (ofs->config.xino == OVL_XINO_OFF) {
+		ofs->xino_mode = -1;
+	} else if (ofs->xino_mode < 0) {
+		/*
+		 * This is a roundup of number of bits needed for encoding
+		 * fsid, where fsid 0 is reserved for upper fs (even with
+		 * lower only overlay) +1 extra bit is reserved for the non
+		 * persistent inode number range that is used for resolving
+		 * xino lower bits overflow.
+		 */
+		BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
+		ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
+	}
+
+	if (ofs->xino_mode > 0) {
+		pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
+			ofs->xino_mode);
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
+				const char *lower, unsigned int numlower,
+				struct ovl_fs *ofs, struct ovl_layer *layers)
+{
+	int err;
+	struct path *stack = NULL;
+	unsigned int i;
+	struct ovl_entry *oe;
+
+	if (!ofs->config.upperdir && numlower == 1) {
+		pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL);
+	if (!stack)
+		return ERR_PTR(-ENOMEM);
+
+	err = -EINVAL;
+	for (i = 0; i < numlower; i++) {
+		err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth);
+		if (err)
+			goto out_err;
+
+		lower = strchr(lower, '\0') + 1;
+	}
+
+	err = -EINVAL;
+	sb->s_stack_depth++;
+	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("maximum fs stacking depth exceeded\n");
+		goto out_err;
+	}
+
+	err = ovl_get_layers(sb, ofs, stack, numlower, layers);
+	if (err)
+		goto out_err;
+
+	err = -ENOMEM;
+	oe = ovl_alloc_entry(numlower);
+	if (!oe)
+		goto out_err;
+
+	for (i = 0; i < numlower; i++) {
+		oe->lowerstack[i].dentry = dget(stack[i].dentry);
+		oe->lowerstack[i].layer = &ofs->layers[i+1];
+	}
+
+out:
+	for (i = 0; i < numlower; i++)
+		path_put(&stack[i]);
+	kfree(stack);
+
+	return oe;
+
+out_err:
+	oe = ERR_PTR(err);
+	goto out;
+}
+
+/*
+ * Check if this layer root is a descendant of:
+ * - another layer of this overlayfs instance
+ * - upper/work dir of any overlayfs instance
+ */
+static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
+			   struct dentry *dentry, const char *name,
+			   bool is_lower)
+{
+	struct dentry *next = dentry, *parent;
+	int err = 0;
+
+	if (!dentry)
+		return 0;
+
+	parent = dget_parent(next);
+
+	/* Walk back ancestors to root (inclusive) looking for traps */
+	while (!err && parent != next) {
+		if (is_lower && ovl_lookup_trap_inode(sb, parent)) {
+			err = -ELOOP;
+			pr_err("overlapping %s path\n", name);
+		} else if (ovl_is_inuse(parent)) {
+			err = ovl_report_in_use(ofs, name);
+		}
+		next = parent;
+		parent = dget_parent(next);
+		dput(next);
+	}
+
+	dput(parent);
+
+	return err;
+}
+
+/*
+ * Check if any of the layers or work dirs overlap.
+ */
+static int ovl_check_overlapping_layers(struct super_block *sb,
+					struct ovl_fs *ofs)
+{
+	int i, err;
+
+	if (ovl_upper_mnt(ofs)) {
+		err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
+				      "upperdir", false);
+		if (err)
+			return err;
+
+		/*
+		 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
+		 * this instance and covers overlapping work and index dirs,
+		 * unless work or index dir have been moved since created inside
+		 * workbasedir.  In that case, we already have their traps in
+		 * inode cache and we will catch that case on lookup.
+		 */
+		err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir",
+				      false);
+		if (err)
+			return err;
+	}
+
+	for (i = 1; i < ofs->numlayer; i++) {
+		err = ovl_check_layer(sb, ofs,
+				      ofs->layers[i].mnt->mnt_root,
+				      "lowerdir", true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static struct dentry *ovl_get_root(struct super_block *sb,
+				   struct dentry *upperdentry,
+				   struct ovl_entry *oe)
+{
+	struct dentry *root;
+	struct ovl_path *lowerpath = &oe->lowerstack[0];
+	unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
+	int fsid = lowerpath->layer->fsid;
+	struct ovl_inode_params oip = {
+		.upperdentry = upperdentry,
+		.lowerpath = lowerpath,
+	};
+
+	root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
+	if (!root)
+		return NULL;
+
+	root->d_fsdata = oe;
+
+	if (upperdentry) {
+		/* Root inode uses upper st_ino/i_ino */
+		ino = d_inode(upperdentry)->i_ino;
+		fsid = 0;
+		ovl_dentry_set_upper_alias(root);
+		if (ovl_is_impuredir(sb, upperdentry))
+			ovl_set_flag(OVL_IMPURE, d_inode(root));
+	}
+
+	/* Root is always merge -> can have whiteouts */
+	ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
+	ovl_dentry_set_flag(OVL_E_CONNECTED, root);
+	ovl_set_upperdata(d_inode(root));
+	ovl_inode_init(d_inode(root), &oip, ino, fsid);
+	ovl_dentry_init_flags(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
+
+	return root;
+}
+
+static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct path upperpath = { };
+	struct dentry *root_dentry;
+	struct ovl_entry *oe;
+	struct ovl_fs *ofs;
+	struct ovl_layer *layers;
+	struct cred *cred;
+	char *splitlower = NULL;
+	unsigned int numlower;
+	int err;
+
+	err = -EIO;
+	if (WARN_ON(sb->s_user_ns != current_user_ns()))
+		goto out;
+
+	sb->s_d_op = &ovl_dentry_operations;
+
+	err = -ENOMEM;
+	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
+	if (!ofs)
+		goto out;
+
+	err = -ENOMEM;
+	ofs->creator_cred = cred = prepare_creds();
+	if (!cred)
+		goto out_err;
+
+	/* Is there a reason anyone would want not to share whiteouts? */
+	ofs->share_whiteout = true;
+
+	ofs->config.index = ovl_index_def;
+	ofs->config.uuid = true;
+	ofs->config.nfs_export = ovl_nfs_export_def;
+	ofs->config.xino = ovl_xino_def();
+	ofs->config.metacopy = ovl_metacopy_def;
+	err = ovl_parse_opt((char *) data, &ofs->config);
+	if (err)
+		goto out_err;
+
+	err = -EINVAL;
+	if (!ofs->config.lowerdir) {
+		if (!silent)
+			pr_err("missing 'lowerdir'\n");
+		goto out_err;
+	}
+
+	err = -ENOMEM;
+	splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
+	if (!splitlower)
+		goto out_err;
+
+	err = -EINVAL;
+	numlower = ovl_split_lowerdirs(splitlower);
+	if (numlower > OVL_MAX_STACK) {
+		pr_err("too many lower directories, limit is %d\n",
+		       OVL_MAX_STACK);
+		goto out_err;
+	}
+
+	err = -ENOMEM;
+	layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
+	if (!layers)
+		goto out_err;
+
+	ofs->layers = layers;
+	/* Layer 0 is reserved for upper even if there's no upper */
+	ofs->numlayer = 1;
+
+	sb->s_stack_depth = 0;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	atomic_long_set(&ofs->last_ino, 1);
+	/* Assume underlying fs uses 32bit inodes unless proven otherwise */
+	if (ofs->config.xino != OVL_XINO_OFF) {
+		ofs->xino_mode = BITS_PER_LONG - 32;
+		if (!ofs->xino_mode) {
+			pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
+			ofs->config.xino = OVL_XINO_OFF;
+		}
+	}
+
+	/* alloc/destroy_inode needed for setting up traps in inode cache */
+	sb->s_op = &ovl_super_operations;
+
+	if (ofs->config.upperdir) {
+		struct super_block *upper_sb;
+
+		err = -EINVAL;
+		if (!ofs->config.workdir) {
+			pr_err("missing 'workdir'\n");
+			goto out_err;
+		}
+
+		err = ovl_get_upper(sb, ofs, &layers[0], &upperpath);
+		if (err)
+			goto out_err;
+
+		upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
+		if (!ovl_should_sync(ofs)) {
+			ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
+			if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
+				err = -EIO;
+				pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
+				goto out_err;
+			}
+		}
+
+		err = ovl_get_workdir(sb, ofs, &upperpath);
+		if (err)
+			goto out_err;
+
+		if (!ofs->workdir)
+			sb->s_flags |= SB_RDONLY;
+
+		sb->s_stack_depth = upper_sb->s_stack_depth;
+		sb->s_time_gran = upper_sb->s_time_gran;
+	}
+	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
+	err = PTR_ERR(oe);
+	if (IS_ERR(oe))
+		goto out_err;
+
+	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
+	if (!ovl_upper_mnt(ofs))
+		sb->s_flags |= SB_RDONLY;
+
+	if (!ofs->config.uuid && ofs->numfs > 1) {
+		pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n");
+		ofs->config.uuid = true;
+	}
+
+	if (!ovl_force_readonly(ofs) && ofs->config.index) {
+		err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
+		if (err)
+			goto out_free_oe;
+
+		/* Force r/o mount with no index dir */
+		if (!ofs->indexdir)
+			sb->s_flags |= SB_RDONLY;
+	}
+
+	err = ovl_check_overlapping_layers(sb, ofs);
+	if (err)
+		goto out_free_oe;
+
+	/* Show index=off in /proc/mounts for forced r/o mount */
+	if (!ofs->indexdir) {
+		ofs->config.index = false;
+		if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
+			pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
+			ofs->config.nfs_export = false;
+		}
+	}
+
+	if (ofs->config.metacopy && ofs->config.nfs_export) {
+		pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
+		ofs->config.nfs_export = false;
+	}
+
+	if (ofs->config.nfs_export)
+		sb->s_export_op = &ovl_export_operations;
+
+	/* Never override disk quota limits or use reserved space */
+	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
+
+	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
+	sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
+		ovl_trusted_xattr_handlers;
+	sb->s_fs_info = ofs;
+	sb->s_flags |= SB_POSIXACL;
+	sb->s_iflags |= SB_I_SKIP_SYNC;
+
+	err = -ENOMEM;
+	root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
+	if (!root_dentry)
+		goto out_free_oe;
+
+	mntput(upperpath.mnt);
+	kfree(splitlower);
+
+	sb->s_root = root_dentry;
+
+	return 0;
+
+out_free_oe:
+	ovl_entry_stack_free(oe);
+	kfree(oe);
+out_err:
+	kfree(splitlower);
+	path_put(&upperpath);
+	ovl_free_fs(ofs);
+out:
+	return err;
+}
+
+static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *raw_data)
+{
+	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
+}
+
+static struct file_system_type ovl_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "overlay",
+	.fs_flags	= FS_USERNS_MOUNT,
+	.mount		= ovl_mount,
+	.kill_sb	= kill_anon_super,
+};
+MODULE_ALIAS_FS("overlay");
+
+static void ovl_inode_init_once(void *foo)
+{
+	struct ovl_inode *oi = foo;
+
+	inode_init_once(&oi->vfs_inode);
+}
+
+static int __init ovl_init(void)
+{
+	int err;
+
+	ovl_inode_cachep = kmem_cache_create("ovl_inode",
+					     sizeof(struct ovl_inode), 0,
+					     (SLAB_RECLAIM_ACCOUNT|
+					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+					     ovl_inode_init_once);
+	if (ovl_inode_cachep == NULL)
+		return -ENOMEM;
+
+	err = ovl_aio_request_cache_init();
+	if (!err) {
+		err = register_filesystem(&ovl_fs_type);
+		if (!err)
+			return 0;
+
+		ovl_aio_request_cache_destroy();
+	}
+	kmem_cache_destroy(ovl_inode_cachep);
+
+	return err;
+}
+
+static void __exit ovl_exit(void)
+{
+	unregister_filesystem(&ovl_fs_type);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+	kmem_cache_destroy(ovl_inode_cachep);
+	ovl_aio_request_cache_destroy();
+}
+
+module_init(ovl_init);
+module_exit(ovl_exit);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
new file mode 100644
index 000000000..0d8f96168
--- /dev/null
+++ b/fs/overlayfs/util.c
@@ -0,0 +1,1136 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011 Novell Inc.
+ * Copyright (C) 2016 Red Hat, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/fileattr.h>
+#include <linux/uuid.h>
+#include <linux/namei.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+int ovl_want_write(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	return mnt_want_write(ovl_upper_mnt(ofs));
+}
+
+void ovl_drop_write(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	mnt_drop_write(ovl_upper_mnt(ofs));
+}
+
+struct dentry *ovl_workdir(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	return ofs->workdir;
+}
+
+const struct cred *ovl_override_creds(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return override_creds(ofs->creator_cred);
+}
+
+/*
+ * Check if underlying fs supports file handles and try to determine encoding
+ * type, in order to deduce maximum inode number used by fs.
+ *
+ * Return 0 if file handles are not supported.
+ * Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding.
+ * Return -1 if fs uses a non default encoding with unknown inode size.
+ */
+int ovl_can_decode_fh(struct super_block *sb)
+{
+	if (!capable(CAP_DAC_READ_SEARCH))
+		return 0;
+
+	if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
+		return 0;
+
+	return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
+}
+
+struct dentry *ovl_indexdir(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->indexdir;
+}
+
+/* Index all files on copy up. For now only enabled for NFS export */
+bool ovl_index_all(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->config.nfs_export && ofs->config.index;
+}
+
+/* Verify lower origin on lookup. For now only enabled for NFS export */
+bool ovl_verify_lower(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->config.nfs_export && ofs->config.index;
+}
+
+struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
+{
+	size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
+	struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
+
+	if (oe)
+		oe->numlower = numlower;
+
+	return oe;
+}
+
+#define OVL_D_REVALIDATE (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE)
+
+bool ovl_dentry_remote(struct dentry *dentry)
+{
+	return dentry->d_flags & OVL_D_REVALIDATE;
+}
+
+void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry)
+{
+	if (!ovl_dentry_remote(realdentry))
+		return;
+
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= realdentry->d_flags & OVL_D_REVALIDATE;
+	spin_unlock(&dentry->d_lock);
+}
+
+void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry)
+{
+	return ovl_dentry_init_flags(dentry, upperdentry, OVL_D_REVALIDATE);
+}
+
+void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry,
+			   unsigned int mask)
+{
+	struct ovl_entry *oe = OVL_E(dentry);
+	unsigned int i, flags = 0;
+
+	if (upperdentry)
+		flags |= upperdentry->d_flags;
+	for (i = 0; i < oe->numlower; i++)
+		flags |= oe->lowerstack[i].dentry->d_flags;
+
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags &= ~mask;
+	dentry->d_flags |= flags & mask;
+	spin_unlock(&dentry->d_lock);
+}
+
+bool ovl_dentry_weird(struct dentry *dentry)
+{
+	return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
+				  DCACHE_MANAGE_TRANSIT |
+				  DCACHE_OP_HASH |
+				  DCACHE_OP_COMPARE);
+}
+
+enum ovl_path_type ovl_path_type(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	enum ovl_path_type type = 0;
+
+	if (ovl_dentry_upper(dentry)) {
+		type = __OVL_PATH_UPPER;
+
+		/*
+		 * Non-dir dentry can hold lower dentry of its copy up origin.
+		 */
+		if (oe->numlower) {
+			if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry)))
+				type |= __OVL_PATH_ORIGIN;
+			if (d_is_dir(dentry) ||
+			    !ovl_has_upperdata(d_inode(dentry)))
+				type |= __OVL_PATH_MERGE;
+		}
+	} else {
+		if (oe->numlower > 1)
+			type |= __OVL_PATH_MERGE;
+	}
+	return type;
+}
+
+void ovl_path_upper(struct dentry *dentry, struct path *path)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+	path->mnt = ovl_upper_mnt(ofs);
+	path->dentry = ovl_dentry_upper(dentry);
+}
+
+void ovl_path_lower(struct dentry *dentry, struct path *path)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe->numlower) {
+		path->mnt = oe->lowerstack[0].layer->mnt;
+		path->dentry = oe->lowerstack[0].dentry;
+	} else {
+		*path = (struct path) { };
+	}
+}
+
+void ovl_path_lowerdata(struct dentry *dentry, struct path *path)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe->numlower) {
+		path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt;
+		path->dentry = oe->lowerstack[oe->numlower - 1].dentry;
+	} else {
+		*path = (struct path) { };
+	}
+}
+
+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
+{
+	enum ovl_path_type type = ovl_path_type(dentry);
+
+	if (!OVL_TYPE_UPPER(type))
+		ovl_path_lower(dentry, path);
+	else
+		ovl_path_upper(dentry, path);
+
+	return type;
+}
+
+enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path)
+{
+	enum ovl_path_type type = ovl_path_type(dentry);
+
+	WARN_ON_ONCE(d_is_dir(dentry));
+
+	if (!OVL_TYPE_UPPER(type) || OVL_TYPE_MERGE(type))
+		ovl_path_lowerdata(dentry, path);
+	else
+		ovl_path_upper(dentry, path);
+
+	return type;
+}
+
+struct dentry *ovl_dentry_upper(struct dentry *dentry)
+{
+	return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
+}
+
+struct dentry *ovl_dentry_lower(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->numlower ? oe->lowerstack[0].dentry : NULL;
+}
+
+const struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->numlower ? oe->lowerstack[0].layer : NULL;
+}
+
+/*
+ * ovl_dentry_lower() could return either a data dentry or metacopy dentry
+ * depending on what is stored in lowerstack[0]. At times we need to find
+ * lower dentry which has data (and not metacopy dentry). This helper
+ * returns the lower data dentry.
+ */
+struct dentry *ovl_dentry_lowerdata(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL;
+}
+
+struct dentry *ovl_dentry_real(struct dentry *dentry)
+{
+	return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
+}
+
+struct dentry *ovl_i_dentry_upper(struct inode *inode)
+{
+	return ovl_upperdentry_dereference(OVL_I(inode));
+}
+
+struct inode *ovl_i_path_real(struct inode *inode, struct path *path)
+{
+	path->dentry = ovl_i_dentry_upper(inode);
+	if (!path->dentry) {
+		path->dentry = OVL_I(inode)->lowerpath.dentry;
+		path->mnt = OVL_I(inode)->lowerpath.layer->mnt;
+	} else {
+		path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb));
+	}
+
+	return path->dentry ? d_inode_rcu(path->dentry) : NULL;
+}
+
+struct inode *ovl_inode_upper(struct inode *inode)
+{
+	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+
+	return upperdentry ? d_inode(upperdentry) : NULL;
+}
+
+struct inode *ovl_inode_lower(struct inode *inode)
+{
+	struct dentry *lowerdentry = OVL_I(inode)->lowerpath.dentry;
+
+	return lowerdentry ? d_inode(lowerdentry) : NULL;
+}
+
+struct inode *ovl_inode_real(struct inode *inode)
+{
+	return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
+}
+
+/* Return inode which contains lower data. Do not return metacopy */
+struct inode *ovl_inode_lowerdata(struct inode *inode)
+{
+	if (WARN_ON(!S_ISREG(inode->i_mode)))
+		return NULL;
+
+	return OVL_I(inode)->lowerdata ?: ovl_inode_lower(inode);
+}
+
+/* Return real inode which contains data. Does not return metacopy inode */
+struct inode *ovl_inode_realdata(struct inode *inode)
+{
+	struct inode *upperinode;
+
+	upperinode = ovl_inode_upper(inode);
+	if (upperinode && ovl_has_upperdata(inode))
+		return upperinode;
+
+	return ovl_inode_lowerdata(inode);
+}
+
+struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
+{
+	return OVL_I(inode)->cache;
+}
+
+void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
+{
+	OVL_I(inode)->cache = cache;
+}
+
+void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
+{
+	set_bit(flag, &OVL_E(dentry)->flags);
+}
+
+void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
+{
+	clear_bit(flag, &OVL_E(dentry)->flags);
+}
+
+bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
+{
+	return test_bit(flag, &OVL_E(dentry)->flags);
+}
+
+bool ovl_dentry_is_opaque(struct dentry *dentry)
+{
+	return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry);
+}
+
+bool ovl_dentry_is_whiteout(struct dentry *dentry)
+{
+	return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
+}
+
+void ovl_dentry_set_opaque(struct dentry *dentry)
+{
+	ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
+}
+
+/*
+ * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
+ * to return positive, while there's no actual upper alias for the inode.
+ * Copy up code needs to know about the existence of the upper alias, so it
+ * can't use ovl_dentry_upper().
+ */
+bool ovl_dentry_has_upper_alias(struct dentry *dentry)
+{
+	return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry);
+}
+
+void ovl_dentry_set_upper_alias(struct dentry *dentry)
+{
+	ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
+}
+
+static bool ovl_should_check_upperdata(struct inode *inode)
+{
+	if (!S_ISREG(inode->i_mode))
+		return false;
+
+	if (!ovl_inode_lower(inode))
+		return false;
+
+	return true;
+}
+
+bool ovl_has_upperdata(struct inode *inode)
+{
+	if (!ovl_should_check_upperdata(inode))
+		return true;
+
+	if (!ovl_test_flag(OVL_UPPERDATA, inode))
+		return false;
+	/*
+	 * Pairs with smp_wmb() in ovl_set_upperdata(). Main user of
+	 * ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure
+	 * if setting of OVL_UPPERDATA is visible, then effects of writes
+	 * before that are visible too.
+	 */
+	smp_rmb();
+	return true;
+}
+
+void ovl_set_upperdata(struct inode *inode)
+{
+	/*
+	 * Pairs with smp_rmb() in ovl_has_upperdata(). Make sure
+	 * if OVL_UPPERDATA flag is visible, then effects of write operations
+	 * before it are visible as well.
+	 */
+	smp_wmb();
+	ovl_set_flag(OVL_UPPERDATA, inode);
+}
+
+/* Caller should hold ovl_inode->lock */
+bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
+{
+	if (!ovl_open_flags_need_copy_up(flags))
+		return false;
+
+	return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry));
+}
+
+bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
+{
+	if (!ovl_open_flags_need_copy_up(flags))
+		return false;
+
+	return !ovl_has_upperdata(d_inode(dentry));
+}
+
+bool ovl_redirect_dir(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->config.redirect_dir && !ofs->noxattr;
+}
+
+const char *ovl_dentry_get_redirect(struct dentry *dentry)
+{
+	return OVL_I(d_inode(dentry))->redirect;
+}
+
+void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
+{
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
+
+	kfree(oi->redirect);
+	oi->redirect = redirect;
+}
+
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
+{
+	struct inode *upperinode = d_inode(upperdentry);
+
+	WARN_ON(OVL_I(inode)->__upperdentry);
+
+	/*
+	 * Make sure upperdentry is consistent before making it visible
+	 */
+	smp_wmb();
+	OVL_I(inode)->__upperdentry = upperdentry;
+	if (inode_unhashed(inode)) {
+		inode->i_private = upperinode;
+		__insert_inode_hash(inode, (unsigned long) upperinode);
+	}
+}
+
+static void ovl_dir_version_inc(struct dentry *dentry, bool impurity)
+{
+	struct inode *inode = d_inode(dentry);
+
+	WARN_ON(!inode_is_locked(inode));
+	WARN_ON(!d_is_dir(dentry));
+	/*
+	 * Version is used by readdir code to keep cache consistent.
+	 * For merge dirs (or dirs with origin) all changes need to be noted.
+	 * For non-merge dirs, cache contains only impure entries (i.e. ones
+	 * which have been copied up and have origins), so only need to note
+	 * changes to impure entries.
+	 */
+	if (!ovl_dir_is_real(dentry) || impurity)
+		OVL_I(inode)->version++;
+}
+
+void ovl_dir_modified(struct dentry *dentry, bool impurity)
+{
+	/* Copy mtime/ctime */
+	ovl_copyattr(d_inode(dentry));
+
+	ovl_dir_version_inc(dentry, impurity);
+}
+
+u64 ovl_dentry_version_get(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+
+	WARN_ON(!inode_is_locked(inode));
+	return OVL_I(inode)->version;
+}
+
+bool ovl_is_whiteout(struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+
+	return inode && IS_WHITEOUT(inode);
+}
+
+struct file *ovl_path_open(const struct path *path, int flags)
+{
+	struct inode *inode = d_inode(path->dentry);
+	struct user_namespace *real_mnt_userns = mnt_user_ns(path->mnt);
+	int err, acc_mode;
+
+	if (flags & ~(O_ACCMODE | O_LARGEFILE))
+		BUG();
+
+	switch (flags & O_ACCMODE) {
+	case O_RDONLY:
+		acc_mode = MAY_READ;
+		break;
+	case O_WRONLY:
+		acc_mode = MAY_WRITE;
+		break;
+	default:
+		BUG();
+	}
+
+	err = inode_permission(real_mnt_userns, inode, acc_mode | MAY_OPEN);
+	if (err)
+		return ERR_PTR(err);
+
+	/* O_NOATIME is an optimization, don't fail if not permitted */
+	if (inode_owner_or_capable(real_mnt_userns, inode))
+		flags |= O_NOATIME;
+
+	return dentry_open(path, flags, current_cred());
+}
+
+/* Caller should hold ovl_inode->lock */
+static bool ovl_already_copied_up_locked(struct dentry *dentry, int flags)
+{
+	bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
+
+	if (ovl_dentry_upper(dentry) &&
+	    (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
+	    !ovl_dentry_needs_data_copy_up_locked(dentry, flags))
+		return true;
+
+	return false;
+}
+
+bool ovl_already_copied_up(struct dentry *dentry, int flags)
+{
+	bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
+
+	/*
+	 * Check if copy-up has happened as well as for upper alias (in
+	 * case of hard links) is there.
+	 *
+	 * Both checks are lockless:
+	 *  - false negatives: will recheck under oi->lock
+	 *  - false positives:
+	 *    + ovl_dentry_upper() uses memory barriers to ensure the
+	 *      upper dentry is up-to-date
+	 *    + ovl_dentry_has_upper_alias() relies on locking of
+	 *      upper parent i_rwsem to prevent reordering copy-up
+	 *      with rename.
+	 */
+	if (ovl_dentry_upper(dentry) &&
+	    (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
+	    !ovl_dentry_needs_data_copy_up(dentry, flags))
+		return true;
+
+	return false;
+}
+
+int ovl_copy_up_start(struct dentry *dentry, int flags)
+{
+	struct inode *inode = d_inode(dentry);
+	int err;
+
+	err = ovl_inode_lock_interruptible(inode);
+	if (!err && ovl_already_copied_up_locked(dentry, flags)) {
+		err = 1; /* Already copied up */
+		ovl_inode_unlock(inode);
+	}
+
+	return err;
+}
+
+void ovl_copy_up_end(struct dentry *dentry)
+{
+	ovl_inode_unlock(d_inode(dentry));
+}
+
+bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
+{
+	int res;
+
+	res = ovl_path_getxattr(ofs, path, OVL_XATTR_ORIGIN, NULL, 0);
+
+	/* Zero size value means "copied up but origin unknown" */
+	if (res >= 0)
+		return true;
+
+	return false;
+}
+
+bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
+			       enum ovl_xattr ox)
+{
+	int res;
+	char val;
+
+	if (!d_is_dir(path->dentry))
+		return false;
+
+	res = ovl_path_getxattr(ofs, path, ox, &val, 1);
+	if (res == 1 && val == 'y')
+		return true;
+
+	return false;
+}
+
+#define OVL_XATTR_OPAQUE_POSTFIX	"opaque"
+#define OVL_XATTR_REDIRECT_POSTFIX	"redirect"
+#define OVL_XATTR_ORIGIN_POSTFIX	"origin"
+#define OVL_XATTR_IMPURE_POSTFIX	"impure"
+#define OVL_XATTR_NLINK_POSTFIX		"nlink"
+#define OVL_XATTR_UPPER_POSTFIX		"upper"
+#define OVL_XATTR_METACOPY_POSTFIX	"metacopy"
+#define OVL_XATTR_PROTATTR_POSTFIX	"protattr"
+
+#define OVL_XATTR_TAB_ENTRY(x) \
+	[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
+		[true] = OVL_XATTR_USER_PREFIX x ## _POSTFIX }
+
+const char *const ovl_xattr_table[][2] = {
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_IMPURE),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
+};
+
+int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
+		       enum ovl_xattr ox, const void *value, size_t size,
+		       int xerr)
+{
+	int err;
+
+	if (ofs->noxattr)
+		return xerr;
+
+	err = ovl_setxattr(ofs, upperdentry, ox, value, size);
+
+	if (err == -EOPNOTSUPP) {
+		pr_warn("cannot set %s xattr on upper\n", ovl_xattr(ofs, ox));
+		ofs->noxattr = true;
+		return xerr;
+	}
+
+	return err;
+}
+
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	int err;
+
+	if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
+		return 0;
+
+	/*
+	 * Do not fail when upper doesn't support xattrs.
+	 * Upper inodes won't have origin nor redirect xattr anyway.
+	 */
+	err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0);
+	if (!err)
+		ovl_set_flag(OVL_IMPURE, d_inode(dentry));
+
+	return err;
+}
+
+
+#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */
+
+void ovl_check_protattr(struct inode *inode, struct dentry *upper)
+{
+	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+	u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK;
+	char buf[OVL_PROTATTR_MAX+1];
+	int res, n;
+
+	res = ovl_getxattr_upper(ofs, upper, OVL_XATTR_PROTATTR, buf,
+				 OVL_PROTATTR_MAX);
+	if (res < 0)
+		return;
+
+	/*
+	 * Initialize inode flags from overlay.protattr xattr and upper inode
+	 * flags.  If upper inode has those fileattr flags set (i.e. from old
+	 * kernel), we do not clear them on ovl_get_inode(), but we will clear
+	 * them on next fileattr_set().
+	 */
+	for (n = 0; n < res; n++) {
+		if (buf[n] == 'a')
+			iflags |= S_APPEND;
+		else if (buf[n] == 'i')
+			iflags |= S_IMMUTABLE;
+		else
+			break;
+	}
+
+	if (!res || n < res) {
+		pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n",
+				    upper, res);
+	} else {
+		inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
+	}
+}
+
+int ovl_set_protattr(struct inode *inode, struct dentry *upper,
+		      struct fileattr *fa)
+{
+	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+	char buf[OVL_PROTATTR_MAX];
+	int len = 0, err = 0;
+	u32 iflags = 0;
+
+	BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX);
+
+	if (fa->flags & FS_APPEND_FL) {
+		buf[len++] = 'a';
+		iflags |= S_APPEND;
+	}
+	if (fa->flags & FS_IMMUTABLE_FL) {
+		buf[len++] = 'i';
+		iflags |= S_IMMUTABLE;
+	}
+
+	/*
+	 * Do not allow to set protection flags when upper doesn't support
+	 * xattrs, because we do not set those fileattr flags on upper inode.
+	 * Remove xattr if it exist and all protection flags are cleared.
+	 */
+	if (len) {
+		err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR,
+					 buf, len, -EPERM);
+	} else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) {
+		err = ovl_removexattr(ofs, upper, OVL_XATTR_PROTATTR);
+		if (err == -EOPNOTSUPP || err == -ENODATA)
+			err = 0;
+	}
+	if (err)
+		return err;
+
+	inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
+
+	/* Mask out the fileattr flags that should not be set in upper inode */
+	fa->flags &= ~OVL_PROT_FS_FLAGS_MASK;
+	fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK;
+
+	return 0;
+}
+
+/**
+ * Caller must hold a reference to inode to prevent it from being freed while
+ * it is marked inuse.
+ */
+bool ovl_inuse_trylock(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	bool locked = false;
+
+	spin_lock(&inode->i_lock);
+	if (!(inode->i_state & I_OVL_INUSE)) {
+		inode->i_state |= I_OVL_INUSE;
+		locked = true;
+	}
+	spin_unlock(&inode->i_lock);
+
+	return locked;
+}
+
+void ovl_inuse_unlock(struct dentry *dentry)
+{
+	if (dentry) {
+		struct inode *inode = d_inode(dentry);
+
+		spin_lock(&inode->i_lock);
+		WARN_ON(!(inode->i_state & I_OVL_INUSE));
+		inode->i_state &= ~I_OVL_INUSE;
+		spin_unlock(&inode->i_lock);
+	}
+}
+
+bool ovl_is_inuse(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	bool inuse;
+
+	spin_lock(&inode->i_lock);
+	inuse = (inode->i_state & I_OVL_INUSE);
+	spin_unlock(&inode->i_lock);
+
+	return inuse;
+}
+
+/*
+ * Does this overlay dentry need to be indexed on copy up?
+ */
+bool ovl_need_index(struct dentry *dentry)
+{
+	struct dentry *lower = ovl_dentry_lower(dentry);
+
+	if (!lower || !ovl_indexdir(dentry->d_sb))
+		return false;
+
+	/* Index all files for NFS export and consistency verification */
+	if (ovl_index_all(dentry->d_sb))
+		return true;
+
+	/* Index only lower hardlinks on copy up */
+	if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+		return true;
+
+	return false;
+}
+
+/* Caller must hold OVL_I(inode)->lock */
+static void ovl_cleanup_index(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
+	struct inode *dir = indexdir->d_inode;
+	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+	struct dentry *upperdentry = ovl_dentry_upper(dentry);
+	struct dentry *index = NULL;
+	struct inode *inode;
+	struct qstr name = { };
+	int err;
+
+	err = ovl_get_index_name(ofs, lowerdentry, &name);
+	if (err)
+		goto fail;
+
+	inode = d_inode(upperdentry);
+	if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
+		pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+				    upperdentry, inode->i_ino, inode->i_nlink);
+		/*
+		 * We either have a bug with persistent union nlink or a lower
+		 * hardlink was added while overlay is mounted. Adding a lower
+		 * hardlink and then unlinking all overlay hardlinks would drop
+		 * overlay nlink to zero before all upper inodes are unlinked.
+		 * As a safety measure, when that situation is detected, set
+		 * the overlay nlink to the index inode nlink minus one for the
+		 * index entry itself.
+		 */
+		set_nlink(d_inode(dentry), inode->i_nlink - 1);
+		ovl_set_nlink_upper(dentry);
+		goto out;
+	}
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	index = ovl_lookup_upper(ofs, name.name, indexdir, name.len);
+	err = PTR_ERR(index);
+	if (IS_ERR(index)) {
+		index = NULL;
+	} else if (ovl_index_all(dentry->d_sb)) {
+		/* Whiteout orphan index to block future open by handle */
+		err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb),
+					       dir, index);
+	} else {
+		/* Cleanup orphan index entries */
+		err = ovl_cleanup(ofs, dir, index);
+	}
+
+	inode_unlock(dir);
+	if (err)
+		goto fail;
+
+out:
+	kfree(name.name);
+	dput(index);
+	return;
+
+fail:
+	pr_err("cleanup index of '%pd2' failed (%i)\n", dentry, err);
+	goto out;
+}
+
+/*
+ * Operations that change overlay inode and upper inode nlink need to be
+ * synchronized with copy up for persistent nlink accounting.
+ */
+int ovl_nlink_start(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	const struct cred *old_cred;
+	int err;
+
+	if (WARN_ON(!inode))
+		return -ENOENT;
+
+	/*
+	 * With inodes index is enabled, we store the union overlay nlink
+	 * in an xattr on the index inode. When whiting out an indexed lower,
+	 * we need to decrement the overlay persistent nlink, but before the
+	 * first copy up, we have no upper index inode to store the xattr.
+	 *
+	 * As a workaround, before whiteout/rename over an indexed lower,
+	 * copy up to create the upper index. Creating the upper index will
+	 * initialize the overlay nlink, so it could be dropped if unlink
+	 * or rename succeeds.
+	 *
+	 * TODO: implement metadata only index copy up when called with
+	 *       ovl_copy_up_flags(dentry, O_PATH).
+	 */
+	if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) {
+		err = ovl_copy_up(dentry);
+		if (err)
+			return err;
+	}
+
+	err = ovl_inode_lock_interruptible(inode);
+	if (err)
+		return err;
+
+	if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode))
+		goto out;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	/*
+	 * The overlay inode nlink should be incremented/decremented IFF the
+	 * upper operation succeeds, along with nlink change of upper inode.
+	 * Therefore, before link/unlink/rename, we store the union nlink
+	 * value relative to the upper inode nlink in an upper inode xattr.
+	 */
+	err = ovl_set_nlink_upper(dentry);
+	revert_creds(old_cred);
+
+out:
+	if (err)
+		ovl_inode_unlock(inode);
+
+	return err;
+}
+
+void ovl_nlink_end(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
+		const struct cred *old_cred;
+
+		old_cred = ovl_override_creds(dentry->d_sb);
+		ovl_cleanup_index(dentry);
+		revert_creds(old_cred);
+	}
+
+	ovl_inode_unlock(inode);
+}
+
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
+{
+	/* Workdir should not be the same as upperdir */
+	if (workdir == upperdir)
+		goto err;
+
+	/* Workdir should not be subdir of upperdir and vice versa */
+	if (lock_rename(workdir, upperdir) != NULL)
+		goto err_unlock;
+
+	return 0;
+
+err_unlock:
+	unlock_rename(workdir, upperdir);
+err:
+	pr_err("failed to lock workdir+upperdir\n");
+	return -EIO;
+}
+
+/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path)
+{
+	int res;
+
+	/* Only regular files can have metacopy xattr */
+	if (!S_ISREG(d_inode(path->dentry)->i_mode))
+		return 0;
+
+	res = ovl_path_getxattr(ofs, path, OVL_XATTR_METACOPY, NULL, 0);
+	if (res < 0) {
+		if (res == -ENODATA || res == -EOPNOTSUPP)
+			return 0;
+		/*
+		 * getxattr on user.* may fail with EACCES in case there's no
+		 * read permission on the inode.  Not much we can do, other than
+		 * tell the caller that this is not a metacopy inode.
+		 */
+		if (ofs->config.userxattr && res == -EACCES)
+			return 0;
+		goto out;
+	}
+
+	return 1;
+out:
+	pr_warn_ratelimited("failed to get metacopy (%i)\n", res);
+	return res;
+}
+
+bool ovl_is_metacopy_dentry(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (!d_is_reg(dentry))
+		return false;
+
+	if (ovl_dentry_upper(dentry)) {
+		if (!ovl_has_upperdata(d_inode(dentry)))
+			return true;
+		return false;
+	}
+
+	return (oe->numlower > 1);
+}
+
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding)
+{
+	int res;
+	char *s, *next, *buf = NULL;
+
+	res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, NULL, 0);
+	if (res == -ENODATA || res == -EOPNOTSUPP)
+		return NULL;
+	if (res < 0)
+		goto fail;
+	if (res == 0)
+		goto invalid;
+
+	buf = kzalloc(res + padding + 1, GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, buf, res);
+	if (res < 0)
+		goto fail;
+	if (res == 0)
+		goto invalid;
+
+	if (buf[0] == '/') {
+		for (s = buf; *s++ == '/'; s = next) {
+			next = strchrnul(s, '/');
+			if (s == next)
+				goto invalid;
+		}
+	} else {
+		if (strchr(buf, '/') != NULL)
+			goto invalid;
+	}
+
+	return buf;
+invalid:
+	pr_warn_ratelimited("invalid redirect (%s)\n", buf);
+	res = -EINVAL;
+	goto err_free;
+fail:
+	pr_warn_ratelimited("failed to get redirect (%i)\n", res);
+err_free:
+	kfree(buf);
+	return ERR_PTR(res);
+}
+
+/*
+ * ovl_sync_status() - Check fs sync status for volatile mounts
+ *
+ * Returns 1 if this is not a volatile mount and a real sync is required.
+ *
+ * Returns 0 if syncing can be skipped because mount is volatile, and no errors
+ * have occurred on the upperdir since the mount.
+ *
+ * Returns -errno if it is a volatile mount, and the error that occurred since
+ * the last mount. If the error code changes, it'll return the latest error
+ * code.
+ */
+
+int ovl_sync_status(struct ovl_fs *ofs)
+{
+	struct vfsmount *mnt;
+
+	if (ovl_should_sync(ofs))
+		return 1;
+
+	mnt = ovl_upper_mnt(ofs);
+	if (!mnt)
+		return 0;
+
+	return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
+}
+
+/*
+ * ovl_copyattr() - copy inode attributes from layer to ovl inode
+ *
+ * When overlay copies inode information from an upper or lower layer to the
+ * relevant overlay inode it will apply the idmapping of the upper or lower
+ * layer when doing so ensuring that the ovl inode ownership will correctly
+ * reflect the ownership of the idmapped upper or lower layer. For example, an
+ * idmapped upper or lower layer mapping id 1001 to id 1000 will take care to
+ * map any lower or upper inode owned by id 1001 to id 1000. These mapping
+ * helpers are nops when the relevant layer isn't idmapped.
+ */
+void ovl_copyattr(struct inode *inode)
+{
+	struct path realpath;
+	struct inode *realinode;
+	struct user_namespace *real_mnt_userns;
+
+	realinode = ovl_i_path_real(inode, &realpath);
+	real_mnt_userns = mnt_user_ns(realpath.mnt);
+
+	inode->i_uid = i_uid_into_mnt(real_mnt_userns, realinode);
+	inode->i_gid = i_gid_into_mnt(real_mnt_userns, realinode);
+	inode->i_mode = realinode->i_mode;
+	inode->i_atime = realinode->i_atime;
+	inode->i_mtime = realinode->i_mtime;
+	inode->i_ctime = realinode->i_ctime;
+	i_size_write(inode, i_size_read(realinode));
+}
-- 
cgit v1.2.3