diff options
Diffstat (limited to '')
-rw-r--r-- | fs/namespace.c | 637 |
1 files changed, 534 insertions, 103 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 3ffae74652..5a51315c66 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -32,6 +32,7 @@ #include <linux/fs_context.h> #include <linux/shmem_fs.h> #include <linux/mnt_idmapping.h> +#include <linux/nospec.h> #include "pnode.h" #include "internal.h" @@ -68,6 +69,9 @@ static u64 event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); +/* Don't allow confusion with old 32bit mount ID */ +static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32); + static struct hlist_head *mount_hashtable __ro_after_init; static struct hlist_head *mountpoint_hashtable __ro_after_init; static struct kmem_cache *mnt_cache __ro_after_init; @@ -131,6 +135,7 @@ static int mnt_alloc_id(struct mount *mnt) if (res < 0) return res; mnt->mnt_id = res; + mnt->mnt_id_unique = atomic64_inc_return(&mnt_id_ctr); return 0; } @@ -730,21 +735,6 @@ struct vfsmount *lookup_mnt(const struct path *path) return m; } -static inline void lock_ns_list(struct mnt_namespace *ns) -{ - spin_lock(&ns->ns_lock); -} - -static inline void unlock_ns_list(struct mnt_namespace *ns) -{ - spin_unlock(&ns->ns_lock); -} - -static inline bool mnt_is_cursor(struct mount *mnt) -{ - return mnt->mnt.mnt_flags & MNT_CURSOR; -} - /* * __is_local_mountpoint - Test to see if dentry is a mountpoint in the * current mount namespace. @@ -763,19 +753,15 @@ static inline bool mnt_is_cursor(struct mount *mnt) bool __is_local_mountpoint(struct dentry *dentry) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; - struct mount *mnt; + struct mount *mnt, *n; bool is_covered = false; down_read(&namespace_sem); - lock_ns_list(ns); - list_for_each_entry(mnt, &ns->list, mnt_list) { - if (mnt_is_cursor(mnt)) - continue; + rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) { is_covered = (mnt->mnt_mountpoint == dentry); if (is_covered) break; } - unlock_ns_list(ns); up_read(&namespace_sem); return is_covered; @@ -1022,6 +1008,30 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m mnt_add_count(old_parent, -1); } +static inline struct mount *node_to_mount(struct rb_node *node) +{ + return node ? rb_entry(node, struct mount, mnt_node) : NULL; +} + +static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt) +{ + struct rb_node **link = &ns->mounts.rb_node; + struct rb_node *parent = NULL; + + WARN_ON(mnt->mnt.mnt_flags & MNT_ONRB); + mnt->mnt_ns = ns; + while (*link) { + parent = *link; + if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + rb_link_node(&mnt->mnt_node, parent, link); + rb_insert_color(&mnt->mnt_node, &ns->mounts); + mnt->mnt.mnt_flags |= MNT_ONRB; +} + /* * vfsmount lock must be held for write */ @@ -1035,12 +1045,13 @@ static void commit_tree(struct mount *mnt) BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); - list_for_each_entry(m, &head, mnt_list) - m->mnt_ns = n; + while (!list_empty(&head)) { + m = list_first_entry(&head, typeof(*m), mnt_list); + list_del(&m->mnt_list); - list_splice(&head, n->list.prev); - - n->mounts += n->pending_mounts; + mnt_add_to_ns(n, m); + } + n->nr_mounts += n->pending_mounts; n->pending_mounts = 0; __attach_mnt(mnt, parent); @@ -1188,7 +1199,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, } mnt->mnt.mnt_flags = old->mnt.mnt_flags; - mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); + mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL|MNT_ONRB); atomic_inc(&sb->s_active); mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt)); @@ -1413,65 +1424,57 @@ struct vfsmount *mnt_clone_internal(const struct path *path) return &p->mnt; } -#ifdef CONFIG_PROC_FS -static struct mount *mnt_list_next(struct mnt_namespace *ns, - struct list_head *p) +/* + * Returns the mount which either has the specified mnt_id, or has the next + * smallest id afer the specified one. + */ +static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id) { - struct mount *mnt, *ret = NULL; + struct rb_node *node = ns->mounts.rb_node; + struct mount *ret = NULL; - lock_ns_list(ns); - list_for_each_continue(p, &ns->list) { - mnt = list_entry(p, typeof(*mnt), mnt_list); - if (!mnt_is_cursor(mnt)) { - ret = mnt; - break; + while (node) { + struct mount *m = node_to_mount(node); + + if (mnt_id <= m->mnt_id_unique) { + ret = node_to_mount(node); + if (mnt_id == m->mnt_id_unique) + break; + node = node->rb_left; + } else { + node = node->rb_right; } } - unlock_ns_list(ns); - return ret; } +#ifdef CONFIG_PROC_FS + /* iterator; we want it to have access to namespace_sem, thus here... */ static void *m_start(struct seq_file *m, loff_t *pos) { struct proc_mounts *p = m->private; - struct list_head *prev; down_read(&namespace_sem); - if (!*pos) { - prev = &p->ns->list; - } else { - prev = &p->cursor.mnt_list; - /* Read after we'd reached the end? */ - if (list_empty(prev)) - return NULL; - } - - return mnt_list_next(p->ns, prev); + return mnt_find_id_at(p->ns, *pos); } static void *m_next(struct seq_file *m, void *v, loff_t *pos) { - struct proc_mounts *p = m->private; - struct mount *mnt = v; + struct mount *next = NULL, *mnt = v; + struct rb_node *node = rb_next(&mnt->mnt_node); ++*pos; - return mnt_list_next(p->ns, &mnt->mnt_list); + if (node) { + next = node_to_mount(node); + *pos = next->mnt_id_unique; + } + return next; } static void m_stop(struct seq_file *m, void *v) { - struct proc_mounts *p = m->private; - struct mount *mnt = v; - - lock_ns_list(p->ns); - if (mnt) - list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list); - else - list_del_init(&p->cursor.mnt_list); - unlock_ns_list(p->ns); up_read(&namespace_sem); } @@ -1489,14 +1492,6 @@ const struct seq_operations mounts_op = { .show = m_show, }; -void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor) -{ - down_read(&namespace_sem); - lock_ns_list(ns); - list_del(&cursor->mnt_list); - unlock_ns_list(ns); - up_read(&namespace_sem); -} #endif /* CONFIG_PROC_FS */ /** @@ -1638,7 +1633,10 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) /* Gather the mounts to umount */ for (p = mnt; p; p = next_mnt(p, mnt)) { p->mnt.mnt_flags |= MNT_UMOUNT; - list_move(&p->mnt_list, &tmp_list); + if (p->mnt.mnt_flags & MNT_ONRB) + move_from_ns(p, &tmp_list); + else + list_move(&p->mnt_list, &tmp_list); } /* Hide the mounts from mnt_mounts */ @@ -1658,7 +1656,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) list_del_init(&p->mnt_list); ns = p->mnt_ns; if (ns) { - ns->mounts--; + ns->nr_mounts--; __touch_mnt_namespace(ns); } p->mnt_ns = NULL; @@ -1784,14 +1782,16 @@ static int do_umount(struct mount *mnt, int flags) event++; if (flags & MNT_DETACH) { - if (!list_empty(&mnt->mnt_list)) + if (mnt->mnt.mnt_flags & MNT_ONRB || + !list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE); retval = 0; } else { shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { - if (!list_empty(&mnt->mnt_list)) + if (mnt->mnt.mnt_flags & MNT_ONRB || + !list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); retval = 0; } @@ -2209,9 +2209,9 @@ int count_mounts(struct mnt_namespace *ns, struct mount *mnt) unsigned int mounts = 0; struct mount *p; - if (ns->mounts >= max) + if (ns->nr_mounts >= max) return -ENOSPC; - max -= ns->mounts; + max -= ns->nr_mounts; if (ns->pending_mounts >= max) return -ENOSPC; max -= ns->pending_mounts; @@ -2355,8 +2355,12 @@ static int attach_recursive_mnt(struct mount *source_mnt, touch_mnt_namespace(source_mnt->mnt_ns); } else { if (source_mnt->mnt_ns) { + LIST_HEAD(head); + /* move from anon - the caller will destroy */ - list_del_init(&source_mnt->mnt_ns->list); + for (p = source_mnt; p; p = next_mnt(p, source_mnt)) + move_from_ns(p, &head); + list_del_init(&head); } if (beneath) mnt_set_mountpoint_beneath(source_mnt, top_mnt, smp); @@ -2667,11 +2671,10 @@ static struct file *open_detached_copy(struct path *path, bool recursive) lock_mount_hash(); for (p = mnt; p; p = next_mnt(p, mnt)) { - p->mnt_ns = ns; - ns->mounts++; + mnt_add_to_ns(ns, p); + ns->nr_mounts++; } ns->root = mnt; - list_add_tail(&ns->list, &mnt->mnt_list); mntget(&mnt->mnt); unlock_mount_hash(); namespace_unlock(); @@ -3031,6 +3034,7 @@ static inline bool path_overmounted(const struct path *path) * can_move_mount_beneath - check that we can mount beneath the top mount * @from: mount to mount beneath * @to: mount under which to mount + * @mp: mountpoint of @to * * - Make sure that @to->dentry is actually the root of a mount under * which we can mount another mount. @@ -3745,9 +3749,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a if (!anon) new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); refcount_set(&new_ns->ns.count, 1); - INIT_LIST_HEAD(&new_ns->list); + new_ns->mounts = RB_ROOT; init_waitqueue_head(&new_ns->poll); - spin_lock_init(&new_ns->ns_lock); new_ns->user_ns = get_user_ns(user_ns); new_ns->ucounts = ucounts; return new_ns; @@ -3794,7 +3797,6 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, unlock_mount_hash(); } new_ns->root = new; - list_add_tail(&new_ns->list, &new->mnt_list); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -3804,8 +3806,8 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, p = old; q = new; while (p) { - q->mnt_ns = new_ns; - new_ns->mounts++; + mnt_add_to_ns(new_ns, q); + new_ns->nr_mounts++; if (new_fs) { if (&p->mnt == new_fs->root.mnt) { new_fs->root.mnt = mntget(&q->mnt); @@ -3847,10 +3849,9 @@ struct dentry *mount_subtree(struct vfsmount *m, const char *name) mntput(m); return ERR_CAST(ns); } - mnt->mnt_ns = ns; ns->root = mnt; - ns->mounts++; - list_add(&mnt->mnt_list, &ns->list); + ns->nr_mounts++; + mnt_add_to_ns(ns, mnt); err = vfs_path_lookup(m->mnt_root, m, name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); @@ -4028,10 +4029,9 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, goto err_path; } mnt = real_mount(newmount.mnt); - mnt->mnt_ns = ns; ns->root = mnt; - ns->mounts = 1; - list_add(&mnt->mnt_list, &ns->list); + ns->nr_mounts = 1; + mnt_add_to_ns(ns, mnt); mntget(newmount.mnt); /* Attach to an apparent O_PATH fd with a note that we need to unmount @@ -4299,7 +4299,7 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) * Creating an idmapped mount with the filesystem wide idmapping * doesn't make sense so block that. We don't allow mushy semantics. */ - if (!check_fsmapping(kattr->mnt_idmap, m->mnt_sb)) + if (kattr->mnt_userns == m->mnt_sb->s_user_ns) return -EINVAL; /* @@ -4692,6 +4692,444 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, return err; } +int show_path(struct seq_file *m, struct dentry *root) +{ + if (root->d_sb->s_op->show_path) + return root->d_sb->s_op->show_path(m, root); + + seq_dentry(m, root, " \t\n\\"); + return 0; +} + +static struct vfsmount *lookup_mnt_in_ns(u64 id, struct mnt_namespace *ns) +{ + struct mount *mnt = mnt_find_id_at(ns, id); + + if (!mnt || mnt->mnt_id_unique != id) + return NULL; + + return &mnt->mnt; +} + +struct kstatmount { + struct statmount __user *buf; + size_t bufsize; + struct vfsmount *mnt; + u64 mask; + struct path root; + struct statmount sm; + struct seq_file seq; +}; + +static u64 mnt_to_attr_flags(struct vfsmount *mnt) +{ + unsigned int mnt_flags = READ_ONCE(mnt->mnt_flags); + u64 attr_flags = 0; + + if (mnt_flags & MNT_READONLY) + attr_flags |= MOUNT_ATTR_RDONLY; + if (mnt_flags & MNT_NOSUID) + attr_flags |= MOUNT_ATTR_NOSUID; + if (mnt_flags & MNT_NODEV) + attr_flags |= MOUNT_ATTR_NODEV; + if (mnt_flags & MNT_NOEXEC) + attr_flags |= MOUNT_ATTR_NOEXEC; + if (mnt_flags & MNT_NODIRATIME) + attr_flags |= MOUNT_ATTR_NODIRATIME; + if (mnt_flags & MNT_NOSYMFOLLOW) + attr_flags |= MOUNT_ATTR_NOSYMFOLLOW; + + if (mnt_flags & MNT_NOATIME) + attr_flags |= MOUNT_ATTR_NOATIME; + else if (mnt_flags & MNT_RELATIME) + attr_flags |= MOUNT_ATTR_RELATIME; + else + attr_flags |= MOUNT_ATTR_STRICTATIME; + + if (is_idmapped_mnt(mnt)) + attr_flags |= MOUNT_ATTR_IDMAP; + + return attr_flags; +} + +static u64 mnt_to_propagation_flags(struct mount *m) +{ + u64 propagation = 0; + + if (IS_MNT_SHARED(m)) + propagation |= MS_SHARED; + if (IS_MNT_SLAVE(m)) + propagation |= MS_SLAVE; + if (IS_MNT_UNBINDABLE(m)) + propagation |= MS_UNBINDABLE; + if (!propagation) + propagation |= MS_PRIVATE; + + return propagation; +} + +static void statmount_sb_basic(struct kstatmount *s) +{ + struct super_block *sb = s->mnt->mnt_sb; + + s->sm.mask |= STATMOUNT_SB_BASIC; + s->sm.sb_dev_major = MAJOR(sb->s_dev); + s->sm.sb_dev_minor = MINOR(sb->s_dev); + s->sm.sb_magic = sb->s_magic; + s->sm.sb_flags = sb->s_flags & (SB_RDONLY|SB_SYNCHRONOUS|SB_DIRSYNC|SB_LAZYTIME); +} + +static void statmount_mnt_basic(struct kstatmount *s) +{ + struct mount *m = real_mount(s->mnt); + + s->sm.mask |= STATMOUNT_MNT_BASIC; + s->sm.mnt_id = m->mnt_id_unique; + s->sm.mnt_parent_id = m->mnt_parent->mnt_id_unique; + s->sm.mnt_id_old = m->mnt_id; + s->sm.mnt_parent_id_old = m->mnt_parent->mnt_id; + s->sm.mnt_attr = mnt_to_attr_flags(&m->mnt); + s->sm.mnt_propagation = mnt_to_propagation_flags(m); + s->sm.mnt_peer_group = IS_MNT_SHARED(m) ? m->mnt_group_id : 0; + s->sm.mnt_master = IS_MNT_SLAVE(m) ? m->mnt_master->mnt_group_id : 0; +} + +static void statmount_propagate_from(struct kstatmount *s) +{ + struct mount *m = real_mount(s->mnt); + + s->sm.mask |= STATMOUNT_PROPAGATE_FROM; + if (IS_MNT_SLAVE(m)) + s->sm.propagate_from = get_dominating_id(m, ¤t->fs->root); +} + +static int statmount_mnt_root(struct kstatmount *s, struct seq_file *seq) +{ + int ret; + size_t start = seq->count; + + ret = show_path(seq, s->mnt->mnt_root); + if (ret) + return ret; + + if (unlikely(seq_has_overflowed(seq))) + return -EAGAIN; + + /* + * Unescape the result. It would be better if supplied string was not + * escaped in the first place, but that's a pretty invasive change. + */ + seq->buf[seq->count] = '\0'; + seq->count = start; + seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL)); + return 0; +} + +static int statmount_mnt_point(struct kstatmount *s, struct seq_file *seq) +{ + struct vfsmount *mnt = s->mnt; + struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; + int err; + + err = seq_path_root(seq, &mnt_path, &s->root, ""); + return err == SEQ_SKIP ? 0 : err; +} + +static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq) +{ + struct super_block *sb = s->mnt->mnt_sb; + + seq_puts(seq, sb->s_type->name); + return 0; +} + +static int statmount_string(struct kstatmount *s, u64 flag) +{ + int ret; + size_t kbufsize; + struct seq_file *seq = &s->seq; + struct statmount *sm = &s->sm; + + switch (flag) { + case STATMOUNT_FS_TYPE: + sm->fs_type = seq->count; + ret = statmount_fs_type(s, seq); + break; + case STATMOUNT_MNT_ROOT: + sm->mnt_root = seq->count; + ret = statmount_mnt_root(s, seq); + break; + case STATMOUNT_MNT_POINT: + sm->mnt_point = seq->count; + ret = statmount_mnt_point(s, seq); + break; + default: + WARN_ON_ONCE(true); + return -EINVAL; + } + + if (unlikely(check_add_overflow(sizeof(*sm), seq->count, &kbufsize))) + return -EOVERFLOW; + if (kbufsize >= s->bufsize) + return -EOVERFLOW; + + /* signal a retry */ + if (unlikely(seq_has_overflowed(seq))) + return -EAGAIN; + + if (ret) + return ret; + + seq->buf[seq->count++] = '\0'; + sm->mask |= flag; + return 0; +} + +static int copy_statmount_to_user(struct kstatmount *s) +{ + struct statmount *sm = &s->sm; + struct seq_file *seq = &s->seq; + char __user *str = ((char __user *)s->buf) + sizeof(*sm); + size_t copysize = min_t(size_t, s->bufsize, sizeof(*sm)); + + if (seq->count && copy_to_user(str, seq->buf, seq->count)) + return -EFAULT; + + /* Return the number of bytes copied to the buffer */ + sm->size = copysize + seq->count; + if (copy_to_user(s->buf, sm, copysize)) + return -EFAULT; + + return 0; +} + +static int do_statmount(struct kstatmount *s) +{ + struct mount *m = real_mount(s->mnt); + int err; + + /* + * Don't trigger audit denials. We just want to determine what + * mounts to show users. + */ + if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) && + !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + err = security_sb_statfs(s->mnt->mnt_root); + if (err) + return err; + + if (s->mask & STATMOUNT_SB_BASIC) + statmount_sb_basic(s); + + if (s->mask & STATMOUNT_MNT_BASIC) + statmount_mnt_basic(s); + + if (s->mask & STATMOUNT_PROPAGATE_FROM) + statmount_propagate_from(s); + + if (s->mask & STATMOUNT_FS_TYPE) + err = statmount_string(s, STATMOUNT_FS_TYPE); + + if (!err && s->mask & STATMOUNT_MNT_ROOT) + err = statmount_string(s, STATMOUNT_MNT_ROOT); + + if (!err && s->mask & STATMOUNT_MNT_POINT) + err = statmount_string(s, STATMOUNT_MNT_POINT); + + if (err) + return err; + + return 0; +} + +static inline bool retry_statmount(const long ret, size_t *seq_size) +{ + if (likely(ret != -EAGAIN)) + return false; + if (unlikely(check_mul_overflow(*seq_size, 2, seq_size))) + return false; + if (unlikely(*seq_size > MAX_RW_COUNT)) + return false; + return true; +} + +static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, + struct statmount __user *buf, size_t bufsize, + size_t seq_size) +{ + if (!access_ok(buf, bufsize)) + return -EFAULT; + + memset(ks, 0, sizeof(*ks)); + ks->mask = kreq->param; + ks->buf = buf; + ks->bufsize = bufsize; + ks->seq.size = seq_size; + ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT); + if (!ks->seq.buf) + return -ENOMEM; + return 0; +} + +static int copy_mnt_id_req(const struct mnt_id_req __user *req, + struct mnt_id_req *kreq) +{ + int ret; + size_t usize; + + BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER0); + + ret = get_user(usize, &req->size); + if (ret) + return -EFAULT; + if (unlikely(usize > PAGE_SIZE)) + return -E2BIG; + if (unlikely(usize < MNT_ID_REQ_SIZE_VER0)) + return -EINVAL; + memset(kreq, 0, sizeof(*kreq)); + ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize); + if (ret) + return ret; + if (kreq->spare != 0) + return -EINVAL; + return 0; +} + +SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, + struct statmount __user *, buf, size_t, bufsize, + unsigned int, flags) +{ + struct vfsmount *mnt; + struct mnt_id_req kreq; + struct kstatmount ks; + /* We currently support retrieval of 3 strings. */ + size_t seq_size = 3 * PATH_MAX; + int ret; + + if (flags) + return -EINVAL; + + ret = copy_mnt_id_req(req, &kreq); + if (ret) + return ret; + +retry: + ret = prepare_kstatmount(&ks, &kreq, buf, bufsize, seq_size); + if (ret) + return ret; + + down_read(&namespace_sem); + mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns); + if (!mnt) { + up_read(&namespace_sem); + kvfree(ks.seq.buf); + return -ENOENT; + } + + ks.mnt = mnt; + get_fs_root(current->fs, &ks.root); + ret = do_statmount(&ks); + path_put(&ks.root); + up_read(&namespace_sem); + + if (!ret) + ret = copy_statmount_to_user(&ks); + kvfree(ks.seq.buf); + if (retry_statmount(ret, &seq_size)) + goto retry; + return ret; +} + +static struct mount *listmnt_next(struct mount *curr) +{ + return node_to_mount(rb_next(&curr->mnt_node)); +} + +static ssize_t do_listmount(struct mount *first, struct path *orig, + u64 mnt_parent_id, u64 __user *mnt_ids, + size_t nr_mnt_ids, const struct path *root) +{ + struct mount *r; + ssize_t ret; + + /* + * Don't trigger audit denials. We just want to determine what + * mounts to show users. + */ + if (!is_path_reachable(real_mount(orig->mnt), orig->dentry, root) && + !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + ret = security_sb_statfs(orig->dentry); + if (ret) + return ret; + + for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r)) { + if (r->mnt_id_unique == mnt_parent_id) + continue; + if (!is_path_reachable(r, r->mnt.mnt_root, orig)) + continue; + if (put_user(r->mnt_id_unique, mnt_ids)) + return -EFAULT; + mnt_ids++; + nr_mnt_ids--; + ret++; + } + return ret; +} + +SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *, + mnt_ids, size_t, nr_mnt_ids, unsigned int, flags) +{ + struct mnt_namespace *ns = current->nsproxy->mnt_ns; + struct mnt_id_req kreq; + struct mount *first; + struct path root, orig; + u64 mnt_parent_id, last_mnt_id; + const size_t maxcount = (size_t)-1 >> 3; + ssize_t ret; + + if (flags) + return -EINVAL; + + if (unlikely(nr_mnt_ids > maxcount)) + return -EFAULT; + + if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids))) + return -EFAULT; + + ret = copy_mnt_id_req(req, &kreq); + if (ret) + return ret; + mnt_parent_id = kreq.mnt_id; + last_mnt_id = kreq.param; + + down_read(&namespace_sem); + get_fs_root(current->fs, &root); + if (mnt_parent_id == LSMT_ROOT) { + orig = root; + } else { + ret = -ENOENT; + orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns); + if (!orig.mnt) + goto err; + orig.dentry = orig.mnt->mnt_root; + } + if (!last_mnt_id) + first = node_to_mount(rb_first(&ns->mounts)); + else + first = mnt_find_id_at(ns, last_mnt_id + 1); + + ret = do_listmount(first, &orig, mnt_parent_id, mnt_ids, nr_mnt_ids, &root); +err: + path_put(&root); + up_read(&namespace_sem); + return ret; +} + + static void __init init_mount_tree(void) { struct vfsmount *mnt; @@ -4707,10 +5145,9 @@ static void __init init_mount_tree(void) if (IS_ERR(ns)) panic("Can't allocate initial namespace"); m = real_mount(mnt); - m->mnt_ns = ns; ns->root = m; - ns->mounts = 1; - list_add(&m->mnt_list, &ns->list); + ns->nr_mounts = 1; + mnt_add_to_ns(ns, m); init_task.nsproxy->mnt_ns = ns; get_mnt_ns(ns); @@ -4837,18 +5274,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns, int *new_mnt_flags) { int new_flags = *new_mnt_flags; - struct mount *mnt; + struct mount *mnt, *n; bool visible = false; down_read(&namespace_sem); - lock_ns_list(ns); - list_for_each_entry(mnt, &ns->list, mnt_list) { + rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) { struct mount *child; int mnt_flags; - if (mnt_is_cursor(mnt)) - continue; - if (mnt->mnt.mnt_sb->s_type != sb->s_type) continue; @@ -4896,7 +5329,6 @@ static bool mnt_already_visible(struct mnt_namespace *ns, next: ; } found: - unlock_ns_list(ns); up_read(&namespace_sem); return visible; } @@ -5026,7 +5458,6 @@ static struct ctl_table fs_namespace_sysctls[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, }, - { } }; static int __init init_fs_namespace_sysctls(void) |