diff options
Diffstat (limited to 'kernel/pid.c')
-rw-r--r-- | kernel/pid.c | 55 |
1 files changed, 38 insertions, 17 deletions
diff --git a/kernel/pid.c b/kernel/pid.c index b52b108654..da76ed1873 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -42,6 +42,7 @@ #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/idr.h> +#include <linux/pidfs.h> #include <net/sock.h> #include <uapi/linux/pidfd.h> @@ -61,10 +62,13 @@ struct pid init_struct_pid = { int pid_max = PID_MAX_DEFAULT; -#define RESERVED_PIDS 300 - int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; +/* + * Pseudo filesystems start inode numbering after one. We use Reserved + * PIDs as a natural offset. + */ +static u64 pidfs_ino = RESERVED_PIDS; /* * PID-map pages start out as NULL, they get allocated upon @@ -272,6 +276,8 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, spin_lock_irq(&pidmap_lock); if (!(ns->pid_allocated & PIDNS_ADDING)) goto out_unlock; + pid->stashed = NULL; + pid->ino = ++pidfs_ino; for ( ; upid >= pid->numbers; --upid) { /* Make the PID visible to find_pid_ns. */ idr_replace(&upid->ns->idr, pid, upid->nr); @@ -349,6 +355,11 @@ static void __change_pid(struct task_struct *task, enum pid_type type, hlist_del_rcu(&task->pid_links[type]); *pid_ptr = new; + if (type == PIDTYPE_PID) { + WARN_ON_ONCE(pid_has_task(pid, PIDTYPE_PID)); + wake_up_all(&pid->wait_pidfd); + } + for (tmp = PIDTYPE_MAX; --tmp >= 0; ) if (pid_has_task(pid, tmp)) return; @@ -391,8 +402,7 @@ void exchange_tids(struct task_struct *left, struct task_struct *right) void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type type) { - if (type == PIDTYPE_PID) - new->thread_pid = old->thread_pid; + WARN_ON_ONCE(type == PIDTYPE_PID); hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]); } @@ -552,11 +562,6 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) * Return the task associated with @pidfd. The function takes a reference on * the returned task. The caller is responsible for releasing that reference. * - * Currently, the process identified by @pidfd is always a thread-group leader. - * This restriction currently exists for all aspects of pidfds including pidfd - * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling - * (only supports thread group leaders). - * * Return: On success, the task_struct associated with the pidfd. * On error, a negative errno number will be returned. */ @@ -595,7 +600,7 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags) * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. */ -int pidfd_create(struct pid *pid, unsigned int flags) +static int pidfd_create(struct pid *pid, unsigned int flags) { int pidfd; struct file *pidfd_file; @@ -615,11 +620,8 @@ int pidfd_create(struct pid *pid, unsigned int flags) * @flags: flags to pass * * This creates a new pid file descriptor with the O_CLOEXEC flag set for - * the process identified by @pid. Currently, the process identified by - * @pid must be a thread-group leader. This restriction currently exists - * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot - * be used with CLONE_THREAD) and pidfd polling (only supports thread group - * leaders). + * the task identified by @pid. Without PIDFD_THREAD flag the target task + * must be a thread-group leader. * * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. @@ -629,7 +631,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) int fd; struct pid *p; - if (flags & ~PIDFD_NONBLOCK) + if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD)) return -EINVAL; if (pid <= 0) @@ -682,7 +684,26 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd) up_read(&task->signal->exec_update_lock); - return file ?: ERR_PTR(-EBADF); + if (!file) { + /* + * It is possible that the target thread is exiting; it can be + * either: + * 1. before exit_signals(), which gives a real fd + * 2. before exit_files() takes the task_lock() gives a real fd + * 3. after exit_files() releases task_lock(), ->files is NULL; + * this has PF_EXITING, since it was set in exit_signals(), + * __pidfd_fget() returns EBADF. + * In case 3 we get EBADF, but that really means ESRCH, since + * the task is currently exiting and has freed its files + * struct, so we fix it up. + */ + if (task->flags & PF_EXITING) + file = ERR_PTR(-ESRCH); + else + file = ERR_PTR(-EBADF); + } + + return file; } static int pidfd_getfd(struct pid *pid, int fd) |