summaryrefslogtreecommitdiffstats
path: root/io_uring/net.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/net.c')
-rw-r--r--io_uring/net.c360
1 files changed, 204 insertions, 156 deletions
diff --git a/io_uring/net.c b/io_uring/net.c
index 75d494dad..dbabe0058 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -60,6 +60,7 @@ struct io_sr_msg {
unsigned len;
unsigned done_io;
unsigned msg_flags;
+ unsigned nr_multishot_loops;
u16 flags;
/* initialised and used only by !msg send variants */
u16 addr_len;
@@ -70,18 +71,12 @@ struct io_sr_msg {
struct io_kiocb *notif;
};
-static inline bool io_check_multishot(struct io_kiocb *req,
- unsigned int issue_flags)
-{
- /*
- * When ->locked_cq is set we only allow to post CQEs from the original
- * task context. Usual request completions will be handled in other
- * generic paths but multipoll may decide to post extra cqes.
- */
- return !(issue_flags & IO_URING_F_IOWQ) ||
- !(issue_flags & IO_URING_F_MULTISHOT) ||
- !req->ctx->task_complete;
-}
+/*
+ * Number of times we'll try and do receives if there's more data. If we
+ * exceed this limit, then add us to the back of the queue and retry from
+ * there. This helps fairness between flooding clients.
+ */
+#define MULTISHOT_MAX_RETRY 32
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
@@ -196,16 +191,115 @@ static int io_setup_async_msg(struct io_kiocb *req,
return -EAGAIN;
}
+#ifdef CONFIG_COMPAT
+static int io_compat_msg_copy_hdr(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg,
+ struct compat_msghdr *msg, int ddir)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct compat_iovec __user *uiov;
+ int ret;
+
+ if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
+ return -EFAULT;
+
+ uiov = compat_ptr(msg->msg_iov);
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ compat_ssize_t clen;
+
+ iomsg->free_iov = NULL;
+ if (msg->msg_iovlen == 0) {
+ sr->len = 0;
+ } else if (msg->msg_iovlen > 1) {
+ return -EINVAL;
+ } else {
+ if (!access_ok(uiov, sizeof(*uiov)))
+ return -EFAULT;
+ if (__get_user(clen, &uiov->iov_len))
+ return -EFAULT;
+ if (clen < 0)
+ return -EINVAL;
+ sr->len = clen;
+ }
+
+ return 0;
+ }
+
+ iomsg->free_iov = iomsg->fast_iov;
+ ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
+ UIO_FASTIOV, &iomsg->free_iov,
+ &iomsg->msg.msg_iter, true);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 0;
+}
+#endif
+
+static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
+ struct user_msghdr *msg, int ddir)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ int ret;
+
+ if (copy_from_user(msg, sr->umsg, sizeof(*sr->umsg)))
+ return -EFAULT;
+
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ if (msg->msg_iovlen == 0) {
+ sr->len = iomsg->fast_iov[0].iov_len = 0;
+ iomsg->fast_iov[0].iov_base = NULL;
+ iomsg->free_iov = NULL;
+ } else if (msg->msg_iovlen > 1) {
+ return -EINVAL;
+ } else {
+ if (copy_from_user(iomsg->fast_iov, msg->msg_iov,
+ sizeof(*msg->msg_iov)))
+ return -EFAULT;
+ sr->len = iomsg->fast_iov[0].iov_len;
+ iomsg->free_iov = NULL;
+ }
+
+ return 0;
+ }
+
+ iomsg->free_iov = iomsg->fast_iov;
+ ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV,
+ &iomsg->free_iov, &iomsg->msg.msg_iter, false);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 0;
+}
+
static int io_sendmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct user_msghdr msg;
int ret;
iomsg->msg.msg_name = &iomsg->addr;
- iomsg->free_iov = iomsg->fast_iov;
- ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
- &iomsg->free_iov);
+ iomsg->msg.msg_iter.nr_segs = 0;
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(req->ctx->compat)) {
+ struct compat_msghdr cmsg;
+
+ ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
+ if (unlikely(ret))
+ return ret;
+
+ return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
+ }
+#endif
+
+ ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
+ if (unlikely(ret))
+ return ret;
+
+ ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
+
/* save msg_control as sys_sendmsg() overwrites it */
sr->msg_control = iomsg->msg.msg_control_user;
return ret;
@@ -217,7 +311,10 @@ int io_send_prep_async(struct io_kiocb *req)
struct io_async_msghdr *io;
int ret;
- if (!zc->addr || req_has_async_data(req))
+ if (req_has_async_data(req))
+ return 0;
+ zc->done_io = 0;
+ if (!zc->addr)
return 0;
io = io_msg_alloc_async_prep(req);
if (!io)
@@ -244,8 +341,10 @@ static int io_setup_async_addr(struct io_kiocb *req,
int io_sendmsg_prep_async(struct io_kiocb *req)
{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
int ret;
+ sr->done_io = 0;
if (!io_msg_alloc_async_prep(req))
return -ENOMEM;
ret = io_sendmsg_copy_hdr(req, req->async_data);
@@ -265,6 +364,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ sr->done_io = 0;
+
if (req->opcode == IORING_OP_SEND) {
if (READ_ONCE(sqe->__pad3[0]))
return -EINVAL;
@@ -287,7 +388,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
- sr->done_io = 0;
return 0;
}
@@ -427,142 +527,79 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
return IOU_OK;
}
-static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
+static int io_recvmsg_mshot_prep(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg,
+ int namelen, size_t controllen)
{
- int hdr;
-
- if (iomsg->namelen < 0)
- return true;
- if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out),
- iomsg->namelen, &hdr))
- return true;
- if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr))
- return true;
+ if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
+ (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
+ int hdr;
+
+ if (unlikely(namelen < 0))
+ return -EOVERFLOW;
+ if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
+ namelen, &hdr))
+ return -EOVERFLOW;
+ if (check_add_overflow(hdr, controllen, &hdr))
+ return -EOVERFLOW;
+
+ iomsg->namelen = namelen;
+ iomsg->controllen = controllen;
+ return 0;
+ }
- return false;
+ return 0;
}
-static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
+static int io_recvmsg_copy_hdr(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct user_msghdr msg;
int ret;
- if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg)))
- return -EFAULT;
-
- ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
- if (ret)
- return ret;
-
- if (req->flags & REQ_F_BUFFER_SELECT) {
- if (msg.msg_iovlen == 0) {
- sr->len = iomsg->fast_iov[0].iov_len = 0;
- iomsg->fast_iov[0].iov_base = NULL;
- iomsg->free_iov = NULL;
- } else if (msg.msg_iovlen > 1) {
- return -EINVAL;
- } else {
- if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov)))
- return -EFAULT;
- sr->len = iomsg->fast_iov[0].iov_len;
- iomsg->free_iov = NULL;
- }
-
- if (req->flags & REQ_F_APOLL_MULTISHOT) {
- iomsg->namelen = msg.msg_namelen;
- iomsg->controllen = msg.msg_controllen;
- if (io_recvmsg_multishot_overflow(iomsg))
- return -EOVERFLOW;
- }
- } else {
- iomsg->free_iov = iomsg->fast_iov;
- ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV,
- &iomsg->free_iov, &iomsg->msg.msg_iter,
- false);
- if (ret > 0)
- ret = 0;
- }
-
- return ret;
-}
+ iomsg->msg.msg_name = &iomsg->addr;
+ iomsg->msg.msg_iter.nr_segs = 0;
#ifdef CONFIG_COMPAT
-static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- struct compat_msghdr msg;
- struct compat_iovec __user *uiov;
- int ret;
-
- if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg)))
- return -EFAULT;
-
- ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
- if (ret)
- return ret;
-
- uiov = compat_ptr(msg.msg_iov);
- if (req->flags & REQ_F_BUFFER_SELECT) {
- compat_ssize_t clen;
+ if (unlikely(req->ctx->compat)) {
+ struct compat_msghdr cmsg;
- iomsg->free_iov = NULL;
- if (msg.msg_iovlen == 0) {
- sr->len = 0;
- } else if (msg.msg_iovlen > 1) {
- return -EINVAL;
- } else {
- if (!access_ok(uiov, sizeof(*uiov)))
- return -EFAULT;
- if (__get_user(clen, &uiov->iov_len))
- return -EFAULT;
- if (clen < 0)
- return -EINVAL;
- sr->len = clen;
- }
+ ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
+ if (unlikely(ret))
+ return ret;
- if (req->flags & REQ_F_APOLL_MULTISHOT) {
- iomsg->namelen = msg.msg_namelen;
- iomsg->controllen = msg.msg_controllen;
- if (io_recvmsg_multishot_overflow(iomsg))
- return -EOVERFLOW;
- }
- } else {
- iomsg->free_iov = iomsg->fast_iov;
- ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen,
- UIO_FASTIOV, &iomsg->free_iov,
- &iomsg->msg.msg_iter, true);
- if (ret < 0)
+ ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
+ if (unlikely(ret))
return ret;
- }
- return 0;
-}
+ return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
+ cmsg.msg_controllen);
+ }
#endif
-static int io_recvmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
-{
- iomsg->msg.msg_name = &iomsg->addr;
- iomsg->msg.msg_iter.nr_segs = 0;
+ ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
+ if (unlikely(ret))
+ return ret;
-#ifdef CONFIG_COMPAT
- if (req->ctx->compat)
- return __io_compat_recvmsg_copy_hdr(req, iomsg);
-#endif
+ ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
+ if (unlikely(ret))
+ return ret;
- return __io_recvmsg_copy_hdr(req, iomsg);
+ return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
+ msg.msg_controllen);
}
int io_recvmsg_prep_async(struct io_kiocb *req)
{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *iomsg;
int ret;
+ sr->done_io = 0;
if (!io_msg_alloc_async_prep(req))
return -ENOMEM;
- ret = io_recvmsg_copy_hdr(req, req->async_data);
+ iomsg = req->async_data;
+ ret = io_recvmsg_copy_hdr(req, iomsg);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
return ret;
@@ -574,6 +611,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ sr->done_io = 0;
+
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
@@ -610,7 +649,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
- sr->done_io = 0;
+ sr->nr_multishot_loops = 0;
return 0;
}
@@ -645,23 +684,35 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
return true;
}
- if (!mshot_finished) {
- if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
- *ret, cflags | IORING_CQE_F_MORE)) {
- io_recv_prep_retry(req);
- /* Known not-empty or unknown state, retry */
- if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
- msg->msg_inq == -1)
+ if (mshot_finished)
+ goto finish;
+
+ /*
+ * Fill CQE for this receive and see if we should keep trying to
+ * receive from this socket.
+ */
+ if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
+ *ret, cflags | IORING_CQE_F_MORE)) {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
+
+ io_recv_prep_retry(req);
+ /* Known not-empty or unknown state, retry */
+ if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) {
+ if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
return false;
- if (issue_flags & IO_URING_F_MULTISHOT)
- *ret = IOU_ISSUE_SKIP_COMPLETE;
- else
- *ret = -EAGAIN;
- return true;
+ /* mshot retries exceeded, force a requeue */
+ sr->nr_multishot_loops = 0;
+ mshot_retry_ret = IOU_REQUEUE;
}
- /* Otherwise stop multishot but use the current result. */
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ *ret = mshot_retry_ret;
+ else
+ *ret = -EAGAIN;
+ return true;
}
-
+ /* Otherwise stop multishot but use the current result. */
+finish:
io_req_set_res(req, *ret, cflags);
if (issue_flags & IO_URING_F_MULTISHOT)
@@ -782,9 +833,6 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return io_setup_async_msg(req, kmsg, issue_flags);
- if (!io_check_multishot(req, issue_flags))
- return io_setup_async_msg(req, kmsg, issue_flags);
-
retry_multishot:
if (io_do_buffer_select(req)) {
void __user *buf;
@@ -860,7 +908,8 @@ retry_multishot:
kfree(kmsg->free_iov);
io_netmsg_recycle(req, issue_flags);
req->flags &= ~REQ_F_NEED_CLEANUP;
- }
+ } else if (ret == -EAGAIN)
+ return io_setup_async_msg(req, kmsg, issue_flags);
return ret;
}
@@ -879,9 +928,6 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return -EAGAIN;
- if (!io_check_multishot(req, issue_flags))
- return -EAGAIN;
-
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
@@ -902,6 +948,7 @@ retry_multishot:
if (!buf)
return -ENOBUFS;
sr->buf = buf;
+ sr->len = len;
}
ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter);
@@ -981,6 +1028,9 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *notif;
+ zc->done_io = 0;
+ req->flags |= REQ_F_POLL_NO_LAZY;
+
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
return -EINVAL;
/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
@@ -1033,8 +1083,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (zc->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
- zc->done_io = 0;
-
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
zc->msg_flags |= MSG_CMSG_COMPAT;
@@ -1217,6 +1265,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
if (req_has_async_data(req)) {
kmsg = req->async_data;
+ kmsg->msg.msg_control_user = sr->msg_control;
} else {
ret = io_sendmsg_copy_hdr(req, &iomsg);
if (ret)
@@ -1279,7 +1328,7 @@ void io_sendrecv_fail(struct io_kiocb *req)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- if (req->flags & REQ_F_PARTIAL_IO)
+ if (sr->done_io)
req->cqe.res = sr->done_io;
if ((req->flags & REQ_F_NEED_CLEANUP) &&
@@ -1329,8 +1378,6 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
struct file *file;
int ret, fd;
- if (!io_check_multishot(req, issue_flags))
- return -EAGAIN;
retry:
if (!fixed) {
fd = __get_unused_fd_flags(accept->flags, accept->nofile);
@@ -1350,7 +1397,7 @@ retry:
* has already been done
*/
if (issue_flags & IO_URING_F_MULTISHOT)
- ret = IOU_ISSUE_SKIP_COMPLETE;
+ return IOU_ISSUE_SKIP_COMPLETE;
return ret;
}
if (ret == -ERESTARTSYS)
@@ -1375,7 +1422,8 @@ retry:
ret, IORING_CQE_F_MORE))
goto retry;
- return -ECANCELED;
+ io_req_set_res(req, ret, 0);
+ return IOU_STOP_MULTISHOT;
}
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)