diff options
Diffstat (limited to 'io_uring')
-rw-r--r-- | io_uring/filetable.c | 10 | ||||
-rw-r--r-- | io_uring/io_uring.c | 44 | ||||
-rw-r--r-- | io_uring/io_uring.h | 1 | ||||
-rw-r--r-- | io_uring/net.c | 263 | ||||
-rw-r--r-- | io_uring/rsrc.c | 151 | ||||
-rw-r--r-- | io_uring/rsrc.h | 15 |
6 files changed, 157 insertions, 327 deletions
diff --git a/io_uring/filetable.c b/io_uring/filetable.c index b80614e7d..4660cb89e 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -95,12 +95,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, needs_switch = true; } - ret = io_scm_file_account(ctx, file); - if (!ret) { - *io_get_tag_slot(ctx->file_data, slot_index) = 0; - io_fixed_file_set(file_slot, file); - io_file_bitmap_set(&ctx->file_table, slot_index); - } + *io_get_tag_slot(ctx->file_data, slot_index) = 0; + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, slot_index); + return 0; err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 35894955b..415248c1f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -60,7 +60,6 @@ #include <linux/net.h> #include <net/sock.h> #include <net/af_unix.h> -#include <net/scm.h> #include <linux/anon_inodes.h> #include <linux/sched/mm.h> #include <linux/uaccess.h> @@ -153,19 +152,6 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx); static struct kmem_cache *req_cachep; -struct sock *io_uring_get_socket(struct file *file) -{ -#if defined(CONFIG_UNIX) - if (io_is_uring_fops(file)) { - struct io_ring_ctx *ctx = file->private_data; - - return ctx->ring_sock->sk; - } -#endif - return NULL; -} -EXPORT_SYMBOL(io_uring_get_socket); - static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) { if (!wq_list_empty(&ctx->submit_state.compl_reqs)) @@ -2641,12 +2627,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - ctx->ring_sock->file = NULL; /* so that iput() is called */ - sock_release(ctx->ring_sock); - } -#endif WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); if (ctx->mm_account) { @@ -3451,32 +3431,12 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this - * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, - * we have to tie this fd to a socket for file garbage collection purposes. + * fd to gain access to the SQ/CQ ring details. */ static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { - struct file *file; -#if defined(CONFIG_UNIX) - int ret; - - ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, - &ctx->ring_sock); - if (ret) - return ERR_PTR(ret); -#endif - - file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC, NULL); -#if defined(CONFIG_UNIX) - if (IS_ERR(file)) { - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; - } else { - ctx->ring_sock->file = file; - } -#endif - return file; } static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 019600570..59e6f755f 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -52,7 +52,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) } void __io_req_task_work_add(struct io_kiocb *req, bool allow_local); -bool io_is_uring_fops(struct file *file); bool io_alloc_async_data(struct io_kiocb *req); void io_req_task_queue(struct io_kiocb *req); void io_queue_iowq(struct io_kiocb *req, bool *dont_use); diff --git a/io_uring/net.c b/io_uring/net.c index c062ce66a..b1b564c04 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -183,16 +183,115 @@ static int io_setup_async_msg(struct io_kiocb *req, return -EAGAIN; } +#ifdef CONFIG_COMPAT +static int io_compat_msg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + struct compat_msghdr *msg, int ddir) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct compat_iovec __user *uiov; + int ret; + + if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) + return -EFAULT; + + uiov = compat_ptr(msg->msg_iov); + if (req->flags & REQ_F_BUFFER_SELECT) { + compat_ssize_t clen; + + iomsg->free_iov = NULL; + if (msg->msg_iovlen == 0) { + sr->len = 0; + } else if (msg->msg_iovlen > 1) { + return -EINVAL; + } else { + if (!access_ok(uiov, sizeof(*uiov))) + return -EFAULT; + if (__get_user(clen, &uiov->iov_len)) + return -EFAULT; + if (clen < 0) + return -EINVAL; + sr->len = clen; + } + + return 0; + } + + iomsg->free_iov = iomsg->fast_iov; + ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, + UIO_FASTIOV, &iomsg->free_iov, + &iomsg->msg.msg_iter, true); + if (unlikely(ret < 0)) + return ret; + + return 0; +} +#endif + +static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, + struct user_msghdr *msg, int ddir) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + int ret; + + if (copy_from_user(msg, sr->umsg, sizeof(*sr->umsg))) + return -EFAULT; + + if (req->flags & REQ_F_BUFFER_SELECT) { + if (msg->msg_iovlen == 0) { + sr->len = iomsg->fast_iov[0].iov_len = 0; + iomsg->fast_iov[0].iov_base = NULL; + iomsg->free_iov = NULL; + } else if (msg->msg_iovlen > 1) { + return -EINVAL; + } else { + if (copy_from_user(iomsg->fast_iov, msg->msg_iov, + sizeof(*msg->msg_iov))) + return -EFAULT; + sr->len = iomsg->fast_iov[0].iov_len; + iomsg->free_iov = NULL; + } + + return 0; + } + + iomsg->free_iov = iomsg->fast_iov; + ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV, + &iomsg->free_iov, &iomsg->msg.msg_iter, false); + if (unlikely(ret < 0)) + return ret; + + return 0; +} + static int io_sendmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct user_msghdr msg; int ret; iomsg->msg.msg_name = &iomsg->addr; - iomsg->free_iov = iomsg->fast_iov; - ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, - &iomsg->free_iov); + iomsg->msg.msg_iter.nr_segs = 0; + +#ifdef CONFIG_COMPAT + if (unlikely(req->ctx->compat)) { + struct compat_msghdr cmsg; + + ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE); + if (unlikely(ret)) + return ret; + + return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + } +#endif + + ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE); + if (unlikely(ret)) + return ret; + + ret = __copy_msghdr(&iomsg->msg, &msg, NULL); + /* save msg_control as sys_sendmsg() overwrites it */ sr->msg_control = iomsg->msg.msg_control_user; return ret; @@ -415,142 +514,77 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg) +static int io_recvmsg_mshot_prep(struct io_kiocb *req, + struct io_async_msghdr *iomsg, + int namelen, size_t controllen) { - int hdr; - - if (iomsg->namelen < 0) - return true; - if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out), - iomsg->namelen, &hdr)) - return true; - if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr)) - return true; + if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) == + (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) { + int hdr; + + if (unlikely(namelen < 0)) + return -EOVERFLOW; + if (check_add_overflow(sizeof(struct io_uring_recvmsg_out), + namelen, &hdr)) + return -EOVERFLOW; + if (check_add_overflow(hdr, controllen, &hdr)) + return -EOVERFLOW; + + iomsg->namelen = namelen; + iomsg->controllen = controllen; + return 0; + } - return false; + return 0; } -static int __io_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) +static int io_recvmsg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg) { - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct user_msghdr msg; int ret; - if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg))) - return -EFAULT; - - ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); - if (ret) - return ret; - - if (req->flags & REQ_F_BUFFER_SELECT) { - if (msg.msg_iovlen == 0) { - sr->len = iomsg->fast_iov[0].iov_len = 0; - iomsg->fast_iov[0].iov_base = NULL; - iomsg->free_iov = NULL; - } else if (msg.msg_iovlen > 1) { - return -EINVAL; - } else { - if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov))) - return -EFAULT; - sr->len = iomsg->fast_iov[0].iov_len; - iomsg->free_iov = NULL; - } - - if (req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, - &iomsg->free_iov, &iomsg->msg.msg_iter, - false); - if (ret > 0) - ret = 0; - } - - return ret; -} + iomsg->msg.msg_name = &iomsg->addr; + iomsg->msg.msg_iter.nr_segs = 0; #ifdef CONFIG_COMPAT -static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct compat_msghdr msg; - struct compat_iovec __user *uiov; - int ret; + if (unlikely(req->ctx->compat)) { + struct compat_msghdr cmsg; - if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg))) - return -EFAULT; - - ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); - if (ret) - return ret; - - uiov = compat_ptr(msg.msg_iov); - if (req->flags & REQ_F_BUFFER_SELECT) { - compat_ssize_t clen; - - iomsg->free_iov = NULL; - if (msg.msg_iovlen == 0) { - sr->len = 0; - } else if (msg.msg_iovlen > 1) { - return -EINVAL; - } else { - if (!access_ok(uiov, sizeof(*uiov))) - return -EFAULT; - if (__get_user(clen, &uiov->iov_len)) - return -EFAULT; - if (clen < 0) - return -EINVAL; - sr->len = clen; - } + ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST); + if (unlikely(ret)) + return ret; - if (req->flags & REQ_F_APOLL_MULTISHOT) { - iomsg->namelen = msg.msg_namelen; - iomsg->controllen = msg.msg_controllen; - if (io_recvmsg_multishot_overflow(iomsg)) - return -EOVERFLOW; - } - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen, - UIO_FASTIOV, &iomsg->free_iov, - &iomsg->msg.msg_iter, true); - if (ret < 0) + ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr); + if (unlikely(ret)) return ret; - } - return 0; -} + return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen, + cmsg.msg_controllen); + } #endif -static int io_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - iomsg->msg.msg_name = &iomsg->addr; - iomsg->msg.msg_iter.nr_segs = 0; + ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST); + if (unlikely(ret)) + return ret; -#ifdef CONFIG_COMPAT - if (req->ctx->compat) - return __io_compat_recvmsg_copy_hdr(req, iomsg); -#endif + ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr); + if (unlikely(ret)) + return ret; - return __io_recvmsg_copy_hdr(req, iomsg); + return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen, + msg.msg_controllen); } int io_recvmsg_prep_async(struct io_kiocb *req) { + struct io_async_msghdr *iomsg; int ret; if (!io_msg_alloc_async_prep(req)) return -ENOMEM; - ret = io_recvmsg_copy_hdr(req, req->async_data); + iomsg = req->async_data; + ret = io_recvmsg_copy_hdr(req, iomsg); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; @@ -842,7 +876,8 @@ retry_multishot: kfree(kmsg->free_iov); io_netmsg_recycle(req, issue_flags); req->flags &= ~REQ_F_NEED_CLEANUP; - } + } else if (ret == -EAGAIN) + return io_setup_async_msg(req, kmsg, issue_flags); return ret; } diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7ada0339b..ac658cfa8 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -494,11 +494,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - err = io_scm_file_account(ctx, file); - if (err) { - fput(file); - break; - } *io_get_tag_slot(data, i) = tag; io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); @@ -762,22 +757,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) for (i = 0; i < ctx->nr_user_files; i++) { struct file *file = io_file_from_index(&ctx->file_table, i); - /* skip scm accounted files, they'll be freed by ->ring_sock */ - if (!file || io_file_need_scm(file)) + if (!file) continue; io_file_bitmap_clear(&ctx->file_table, i); fput(file); } -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff *skb; - - while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) - kfree_skb(skb); - } -#endif io_free_file_tables(&ctx->file_table); io_file_table_set_alloc_range(ctx, 0, 0); io_rsrc_data_free(ctx->file_data); @@ -805,134 +790,11 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx) return ret; } -/* - * Ensure the UNIX gc is aware of our file set, so we are certain that - * the io_uring can be safely unregistered on process exit, even if we have - * loops in the file referencing. We account only files that can hold other - * files because otherwise they can't form a loop and so are not interesting - * for GC. - */ -int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) -{ -#if defined(CONFIG_UNIX) - struct sock *sk = ctx->ring_sock->sk; - struct sk_buff_head *head = &sk->sk_receive_queue; - struct scm_fp_list *fpl; - struct sk_buff *skb; - - if (likely(!io_file_need_scm(file))) - return 0; - - /* - * See if we can merge this file into an existing skb SCM_RIGHTS - * file set. If there's no room, fall back to allocating a new skb - * and filling it in. - */ - spin_lock_irq(&head->lock); - skb = skb_peek(head); - if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD) - __skb_unlink(skb, head); - else - skb = NULL; - spin_unlock_irq(&head->lock); - - if (!skb) { - fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); - if (!fpl) - return -ENOMEM; - - skb = alloc_skb(0, GFP_KERNEL); - if (!skb) { - kfree(fpl); - return -ENOMEM; - } - - fpl->user = get_uid(current_user()); - fpl->max = SCM_MAX_FD; - fpl->count = 0; - - UNIXCB(skb).fp = fpl; - skb->sk = sk; - skb->scm_io_uring = 1; - skb->destructor = unix_destruct_scm; - refcount_add(skb->truesize, &sk->sk_wmem_alloc); - } - - fpl = UNIXCB(skb).fp; - fpl->fp[fpl->count++] = get_file(file); - unix_inflight(fpl->user, file); - skb_queue_head(head, skb); - fput(file); -#endif - return 0; -} - static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) { struct file *file = prsrc->file; -#if defined(CONFIG_UNIX) - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff_head list, *head = &sock->sk_receive_queue; - struct sk_buff *skb; - int i; - - if (!io_file_need_scm(file)) { - fput(file); - return; - } - - __skb_queue_head_init(&list); - - /* - * Find the skb that holds this file in its SCM_RIGHTS. When found, - * remove this entry and rearrange the file array. - */ - skb = skb_dequeue(head); - while (skb) { - struct scm_fp_list *fp; - fp = UNIXCB(skb).fp; - for (i = 0; i < fp->count; i++) { - int left; - - if (fp->fp[i] != file) - continue; - - unix_notinflight(fp->user, fp->fp[i]); - left = fp->count - 1 - i; - if (left) { - memmove(&fp->fp[i], &fp->fp[i + 1], - left * sizeof(struct file *)); - } - fp->count--; - if (!fp->count) { - kfree_skb(skb); - skb = NULL; - } else { - __skb_queue_tail(&list, skb); - } - fput(file); - file = NULL; - break; - } - - if (!file) - break; - - __skb_queue_tail(&list, skb); - - skb = skb_dequeue(head); - } - - if (skb_peek(&list)) { - spin_lock_irq(&head->lock); - while ((skb = __skb_dequeue(&list)) != NULL) - __skb_queue_tail(head, skb); - spin_unlock_irq(&head->lock); - } -#else fput(file); -#endif } int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, @@ -986,21 +848,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; /* - * Don't allow io_uring instances to be registered. If UNIX - * isn't enabled, then this causes a reference cycle and this - * instance can never get freed. If UNIX is enabled we'll - * handle it just fine, but there's still no point in allowing - * a ring fd as it doesn't support regular read/write anyway. + * Don't allow io_uring instances to be registered. */ if (io_is_uring_fops(file)) { fput(file); goto fail; } - ret = io_scm_file_account(ctx, file); - if (ret) { - fput(file); - goto fail; - } file_slot = io_fixed_file_slot(&ctx->file_table, i); io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index acaf8dad0..85f145607 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -77,21 +77,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx); int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args, u64 __user *tags); -int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); - -static inline bool io_file_need_scm(struct file *filp) -{ - return false; -} - -static inline int io_scm_file_account(struct io_ring_ctx *ctx, - struct file *file) -{ - if (likely(!io_file_need_scm(file))) - return 0; - return __io_scm_file_account(ctx, file); -} - int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args); int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, |