summaryrefslogtreecommitdiffstats
path: root/fs/d_path.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/d_path.c')
-rw-r--r--fs/d_path.c447
1 files changed, 447 insertions, 0 deletions
diff --git a/fs/d_path.c b/fs/d_path.c
new file mode 100644
index 000000000..56a6ee4c6
--- /dev/null
+++ b/fs/d_path.c
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/syscalls.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/fs_struct.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/prefetch.h>
+#include "mount.h"
+
+struct prepend_buffer {
+ char *buf;
+ int len;
+};
+#define DECLARE_BUFFER(__name, __buf, __len) \
+ struct prepend_buffer __name = {.buf = __buf + __len, .len = __len}
+
+static char *extract_string(struct prepend_buffer *p)
+{
+ if (likely(p->len >= 0))
+ return p->buf;
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+static bool prepend_char(struct prepend_buffer *p, unsigned char c)
+{
+ if (likely(p->len > 0)) {
+ p->len--;
+ *--p->buf = c;
+ return true;
+ }
+ p->len = -1;
+ return false;
+}
+
+/*
+ * The source of the prepend data can be an optimistic load
+ * of a dentry name and length. And because we don't hold any
+ * locks, the length and the pointer to the name may not be
+ * in sync if a concurrent rename happens, and the kernel
+ * copy might fault as a result.
+ *
+ * The end result will correct itself when we check the
+ * rename sequence count, but we need to be able to handle
+ * the fault gracefully.
+ */
+static bool prepend_copy(void *dst, const void *src, int len)
+{
+ if (unlikely(copy_from_kernel_nofault(dst, src, len))) {
+ memset(dst, 'x', len);
+ return false;
+ }
+ return true;
+}
+
+static bool prepend(struct prepend_buffer *p, const char *str, int namelen)
+{
+ // Already overflowed?
+ if (p->len < 0)
+ return false;
+
+ // Will overflow?
+ if (p->len < namelen) {
+ // Fill as much as possible from the end of the name
+ str += namelen - p->len;
+ p->buf -= p->len;
+ prepend_copy(p->buf, str, p->len);
+ p->len = -1;
+ return false;
+ }
+
+ // Fits fully
+ p->len -= namelen;
+ p->buf -= namelen;
+ return prepend_copy(p->buf, str, namelen);
+}
+
+/**
+ * prepend_name - prepend a pathname in front of current buffer pointer
+ * @p: prepend buffer which contains buffer pointer and allocated length
+ * @name: name string and length qstr structure
+ *
+ * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
+ * make sure that either the old or the new name pointer and length are
+ * fetched. However, there may be mismatch between length and pointer.
+ * But since the length cannot be trusted, we need to copy the name very
+ * carefully when doing the prepend_copy(). It also prepends "/" at
+ * the beginning of the name. The sequence number check at the caller will
+ * retry it again when a d_move() does happen. So any garbage in the buffer
+ * due to mismatched pointer and length will be discarded.
+ *
+ * Load acquire is needed to make sure that we see the new name data even
+ * if we might get the length wrong.
+ */
+static bool prepend_name(struct prepend_buffer *p, const struct qstr *name)
+{
+ const char *dname = smp_load_acquire(&name->name); /* ^^^ */
+ u32 dlen = READ_ONCE(name->len);
+
+ return prepend(p, dname, dlen) && prepend_char(p, '/');
+}
+
+static int __prepend_path(const struct dentry *dentry, const struct mount *mnt,
+ const struct path *root, struct prepend_buffer *p)
+{
+ while (dentry != root->dentry || &mnt->mnt != root->mnt) {
+ const struct dentry *parent = READ_ONCE(dentry->d_parent);
+
+ if (dentry == mnt->mnt.mnt_root) {
+ struct mount *m = READ_ONCE(mnt->mnt_parent);
+ struct mnt_namespace *mnt_ns;
+
+ if (likely(mnt != m)) {
+ dentry = READ_ONCE(mnt->mnt_mountpoint);
+ mnt = m;
+ continue;
+ }
+ /* Global root */
+ mnt_ns = READ_ONCE(mnt->mnt_ns);
+ /* open-coded is_mounted() to use local mnt_ns */
+ if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
+ return 1; // absolute root
+ else
+ return 2; // detached or not attached yet
+ }
+
+ if (unlikely(dentry == parent))
+ /* Escaped? */
+ return 3;
+
+ prefetch(parent);
+ if (!prepend_name(p, &dentry->d_name))
+ break;
+ dentry = parent;
+ }
+ return 0;
+}
+
+/**
+ * prepend_path - Prepend path string to a buffer
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @p: prepend buffer which contains buffer pointer and allocated length
+ *
+ * The function will first try to write out the pathname without taking any
+ * lock other than the RCU read lock to make sure that dentries won't go away.
+ * It only checks the sequence number of the global rename_lock as any change
+ * in the dentry's d_seq will be preceded by changes in the rename_lock
+ * sequence number. If the sequence number had been changed, it will restart
+ * the whole pathname back-tracing sequence again by taking the rename_lock.
+ * In this case, there is no need to take the RCU read lock as the recursive
+ * parent pointer references will keep the dentry chain alive as long as no
+ * rename operation is performed.
+ */
+static int prepend_path(const struct path *path,
+ const struct path *root,
+ struct prepend_buffer *p)
+{
+ unsigned seq, m_seq = 0;
+ struct prepend_buffer b;
+ int error;
+
+ rcu_read_lock();
+restart_mnt:
+ read_seqbegin_or_lock(&mount_lock, &m_seq);
+ seq = 0;
+ rcu_read_lock();
+restart:
+ b = *p;
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ error = __prepend_path(path->dentry, real_mount(path->mnt), root, &b);
+ if (!(seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+ done_seqretry(&rename_lock, seq);
+
+ if (!(m_seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&mount_lock, m_seq)) {
+ m_seq = 1;
+ goto restart_mnt;
+ }
+ done_seqretry(&mount_lock, m_seq);
+
+ if (unlikely(error == 3))
+ b = *p;
+
+ if (b.len == p->len)
+ prepend_char(&b, '/');
+
+ *p = b;
+ return error;
+}
+
+/**
+ * __d_path - return the path of a dentry
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name.
+ *
+ * Returns a pointer into the buffer or an error code if the
+ * path was too long.
+ *
+ * "buflen" should be positive.
+ *
+ * If the path is not reachable from the supplied root, return %NULL.
+ */
+char *__d_path(const struct path *path,
+ const struct path *root,
+ char *buf, int buflen)
+{
+ DECLARE_BUFFER(b, buf, buflen);
+
+ prepend_char(&b, 0);
+ if (unlikely(prepend_path(path, root, &b) > 0))
+ return NULL;
+ return extract_string(&b);
+}
+
+char *d_absolute_path(const struct path *path,
+ char *buf, int buflen)
+{
+ struct path root = {};
+ DECLARE_BUFFER(b, buf, buflen);
+
+ prepend_char(&b, 0);
+ if (unlikely(prepend_path(path, &root, &b) > 1))
+ return ERR_PTR(-EINVAL);
+ return extract_string(&b);
+}
+
+static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ *root = fs->root;
+ } while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/**
+ * d_path - return the path of a dentry
+ * @path: path to report
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * the string " (deleted)" is appended. Note that this is ambiguous.
+ *
+ * Returns a pointer into the buffer or an error code if the path was
+ * too long. Note: Callers should use the returned pointer, not the passed
+ * in buffer, to use the name! The implementation often starts at an offset
+ * into the buffer, and may leave 0 bytes at the start.
+ *
+ * "buflen" should be positive.
+ */
+char *d_path(const struct path *path, char *buf, int buflen)
+{
+ DECLARE_BUFFER(b, buf, buflen);
+ struct path root;
+
+ /*
+ * We have various synthetic filesystems that never get mounted. On
+ * these filesystems dentries are never used for lookup purposes, and
+ * thus don't need to be hashed. They also don't need a name until a
+ * user wants to identify the object in /proc/pid/fd/. The little hack
+ * below allows us to generate a name for these objects on demand:
+ *
+ * Some pseudo inodes are mountable. When they are mounted
+ * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
+ * and instead have d_path return the mounted path.
+ */
+ if (path->dentry->d_op && path->dentry->d_op->d_dname &&
+ (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
+ return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
+
+ rcu_read_lock();
+ get_fs_root_rcu(current->fs, &root);
+ if (unlikely(d_unlinked(path->dentry)))
+ prepend(&b, " (deleted)", 11);
+ else
+ prepend_char(&b, 0);
+ prepend_path(path, &root, &b);
+ rcu_read_unlock();
+
+ return extract_string(&b);
+}
+EXPORT_SYMBOL(d_path);
+
+/*
+ * Helper function for dentry_operations.d_dname() members
+ */
+char *dynamic_dname(char *buffer, int buflen, const char *fmt, ...)
+{
+ va_list args;
+ char temp[64];
+ int sz;
+
+ va_start(args, fmt);
+ sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
+ va_end(args);
+
+ if (sz > sizeof(temp) || sz > buflen)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ buffer += buflen - sz;
+ return memcpy(buffer, temp, sz);
+}
+
+char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ DECLARE_BUFFER(b, buffer, buflen);
+ /* these dentries are never renamed, so d_lock is not needed */
+ prepend(&b, " (deleted)", 11);
+ prepend(&b, dentry->d_name.name, dentry->d_name.len);
+ prepend_char(&b, '/');
+ return extract_string(&b);
+}
+
+/*
+ * Write full pathname from the root of the filesystem into the buffer.
+ */
+static char *__dentry_path(const struct dentry *d, struct prepend_buffer *p)
+{
+ const struct dentry *dentry;
+ struct prepend_buffer b;
+ int seq = 0;
+
+ rcu_read_lock();
+restart:
+ dentry = d;
+ b = *p;
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ while (!IS_ROOT(dentry)) {
+ const struct dentry *parent = dentry->d_parent;
+
+ prefetch(parent);
+ if (!prepend_name(&b, &dentry->d_name))
+ break;
+ dentry = parent;
+ }
+ if (!(seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+ done_seqretry(&rename_lock, seq);
+ if (b.len == p->len)
+ prepend_char(&b, '/');
+ return extract_string(&b);
+}
+
+char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
+{
+ DECLARE_BUFFER(b, buf, buflen);
+
+ prepend_char(&b, 0);
+ return __dentry_path(dentry, &b);
+}
+EXPORT_SYMBOL(dentry_path_raw);
+
+char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
+{
+ DECLARE_BUFFER(b, buf, buflen);
+
+ if (unlikely(d_unlinked(dentry)))
+ prepend(&b, "//deleted", 10);
+ else
+ prepend_char(&b, 0);
+ return __dentry_path(dentry, &b);
+}
+
+static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
+ struct path *pwd)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ *root = fs->root;
+ *pwd = fs->pwd;
+ } while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/*
+ * NOTE! The user-level library version returns a
+ * character pointer. The kernel system call just
+ * returns the length of the buffer filled (which
+ * includes the ending '\0' character), or a negative
+ * error value. So libc would do something like
+ *
+ * char *getcwd(char * buf, size_t size)
+ * {
+ * int retval;
+ *
+ * retval = sys_getcwd(buf, size);
+ * if (retval >= 0)
+ * return buf;
+ * errno = -retval;
+ * return NULL;
+ * }
+ */
+SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
+{
+ int error;
+ struct path pwd, root;
+ char *page = __getname();
+
+ if (!page)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
+
+ if (unlikely(d_unlinked(pwd.dentry))) {
+ rcu_read_unlock();
+ error = -ENOENT;
+ } else {
+ unsigned len;
+ DECLARE_BUFFER(b, page, PATH_MAX);
+
+ prepend_char(&b, 0);
+ if (unlikely(prepend_path(&pwd, &root, &b) > 0))
+ prepend(&b, "(unreachable)", 13);
+ rcu_read_unlock();
+
+ len = PATH_MAX - b.len;
+ if (unlikely(len > PATH_MAX))
+ error = -ENAMETOOLONG;
+ else if (unlikely(len > size))
+ error = -ERANGE;
+ else if (copy_to_user(buf, b.buf, len))
+ error = -EFAULT;
+ else
+ error = len;
+ }
+ __putname(page);
+ return error;
+}