summaryrefslogtreecommitdiffstats
path: root/io_uring/rw.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/rw.c')
-rw-r--r--io_uring/rw.c229
1 files changed, 182 insertions, 47 deletions
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 0a0c1c9db..c3c154790 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -10,7 +10,7 @@
#include <linux/poll.h>
#include <linux/nospec.h>
#include <linux/compat.h>
-#include <linux/io_uring.h>
+#include <linux/io_uring/cmd.h>
#include <uapi/linux/io_uring.h>
@@ -18,6 +18,7 @@
#include "opdef.h"
#include "kbuf.h"
#include "rsrc.h"
+#include "poll.h"
#include "rw.h"
struct io_rw {
@@ -83,18 +84,6 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
/* used for fixed read/write too - just read unconditionally */
req->buf_index = READ_ONCE(sqe->buf_index);
- if (req->opcode == IORING_OP_READ_FIXED ||
- req->opcode == IORING_OP_WRITE_FIXED) {
- struct io_ring_ctx *ctx = req->ctx;
- u16 index;
-
- if (unlikely(req->buf_index >= ctx->nr_user_bufs))
- return -EFAULT;
- index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
- req->imu = ctx->user_bufs[index];
- io_req_set_rsrc_node(req, ctx, 0);
- }
-
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
ret = ioprio_check_cap(ioprio);
@@ -110,45 +99,74 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
rw->addr = READ_ONCE(sqe->addr);
rw->len = READ_ONCE(sqe->len);
rw->flags = READ_ONCE(sqe->rw_flags);
+ return 0;
+}
+
+int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ int ret;
+
+ ret = io_prep_rw(req, sqe);
+ if (unlikely(ret))
+ return ret;
- /* Have to do this validation here, as this is in io_read() rw->len might
- * have chanaged due to buffer selection
+ /*
+ * Have to do this validation here, as this is in io_read() rw->len
+ * might have chanaged due to buffer selection
*/
- if (req->opcode == IORING_OP_READV && req->flags & REQ_F_BUFFER_SELECT) {
- ret = io_iov_buffer_select_prep(req);
- if (ret)
- return ret;
- }
+ if (req->flags & REQ_F_BUFFER_SELECT)
+ return io_iov_buffer_select_prep(req);
return 0;
}
-void io_readv_writev_cleanup(struct io_kiocb *req)
+int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_async_rw *io = req->async_data;
+ struct io_ring_ctx *ctx = req->ctx;
+ u16 index;
+ int ret;
- kfree(io->free_iovec);
+ ret = io_prep_rw(req, sqe);
+ if (unlikely(ret))
+ return ret;
+
+ if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+ return -EFAULT;
+ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+ req->imu = ctx->user_bufs[index];
+ io_req_set_rsrc_node(req, ctx, 0);
+ return 0;
}
-static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
+/*
+ * Multishot read is prepared just like a normal read/write request, only
+ * difference is that we set the MULTISHOT flag.
+ */
+int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- switch (ret) {
- case -EIOCBQUEUED:
- break;
- case -ERESTARTSYS:
- case -ERESTARTNOINTR:
- case -ERESTARTNOHAND:
- case -ERESTART_RESTARTBLOCK:
- /*
- * We can't just restart the syscall, since previously
- * submitted sqes may already be in progress. Just fail this
- * IO with EINTR.
- */
- ret = -EINTR;
- fallthrough;
- default:
- kiocb->ki_complete(kiocb, ret);
- }
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ int ret;
+
+ /* must be used with provided buffers */
+ if (!(req->flags & REQ_F_BUFFER_SELECT))
+ return -EINVAL;
+
+ ret = io_prep_rw(req, sqe);
+ if (unlikely(ret))
+ return ret;
+
+ if (rw->addr || rw->len)
+ return -EINVAL;
+
+ req->flags |= REQ_F_APOLL_MULTISHOT;
+ return 0;
+}
+
+void io_readv_writev_cleanup(struct io_kiocb *req)
+{
+ struct io_async_rw *io = req->async_data;
+
+ kfree(io->free_iovec);
}
static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
@@ -333,6 +351,33 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
smp_store_release(&req->iopoll_completed, 1);
}
+static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
+{
+ /* IO was queued async, completion will happen later */
+ if (ret == -EIOCBQUEUED)
+ return;
+
+ /* transform internal restart error codes */
+ if (unlikely(ret < 0)) {
+ switch (ret) {
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /*
+ * We can't just restart the syscall, since previously
+ * submitted sqes may already be in progress. Just fail
+ * this IO with EINTR.
+ */
+ ret = -EINTR;
+ break;
+ }
+ }
+
+ INDIRECT_CALL_2(kiocb->ki_complete, io_complete_rw_iopoll,
+ io_complete_rw, kiocb, ret);
+}
+
static int kiocb_done(struct io_kiocb *req, ssize_t ret,
unsigned int issue_flags)
{
@@ -388,8 +433,7 @@ static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req,
buf = u64_to_user_ptr(rw->addr);
sqe_len = rw->len;
- if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE ||
- (req->flags & REQ_F_BUFFER_SELECT)) {
+ if (!io_issue_defs[opcode].vectored || req->flags & REQ_F_BUFFER_SELECT) {
if (io_do_buffer_select(req)) {
buf = io_buffer_select(req, &sqe_len, issue_flags);
if (!buf)
@@ -527,6 +571,9 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
{
if (!force && !io_cold_defs[req->opcode].prep_async)
return 0;
+ /* opcode type doesn't need async data */
+ if (!io_cold_defs[req->opcode].async_size)
+ return 0;
if (!req_has_async_data(req)) {
struct io_async_rw *iorw;
@@ -712,7 +759,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
return 0;
}
-int io_read(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
struct io_rw_state __s, *s = &__s;
@@ -780,8 +827,11 @@ int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
req->flags &= ~REQ_F_REISSUE;
- /* if we can poll, just do that */
- if (req->opcode == IORING_OP_READ && file_can_poll(req->file))
+ /*
+ * If we can poll, just do that. For a vectored read, we'll
+ * need to copy state first.
+ */
+ if (file_can_poll(req->file) && !io_issue_defs[req->opcode].vectored)
return -EAGAIN;
/* IOPOLL retry should happen for io-wq threads */
if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -857,7 +907,92 @@ done:
/* it's faster to check here then delegate to kfree */
if (iovec)
kfree(iovec);
- return kiocb_done(req, ret, issue_flags);
+ return ret;
+}
+
+int io_read(struct io_kiocb *req, unsigned int issue_flags)
+{
+ int ret;
+
+ ret = __io_read(req, issue_flags);
+ if (ret >= 0)
+ return kiocb_done(req, ret, issue_flags);
+
+ return ret;
+}
+
+int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ unsigned int cflags = 0;
+ int ret;
+
+ /*
+ * Multishot MUST be used on a pollable file
+ */
+ if (!file_can_poll(req->file))
+ return -EBADFD;
+
+ ret = __io_read(req, issue_flags);
+
+ /*
+ * If the file doesn't support proper NOWAIT, then disable multishot
+ * and stay in single shot mode.
+ */
+ if (!io_file_supports_nowait(req))
+ req->flags &= ~REQ_F_APOLL_MULTISHOT;
+
+ /*
+ * If we get -EAGAIN, recycle our buffer and just let normal poll
+ * handling arm it.
+ */
+ if (ret == -EAGAIN) {
+ /*
+ * Reset rw->len to 0 again to avoid clamping future mshot
+ * reads, in case the buffer size varies.
+ */
+ if (io_kbuf_recycle(req, issue_flags))
+ rw->len = 0;
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ return IOU_ISSUE_SKIP_COMPLETE;
+ return -EAGAIN;
+ }
+
+ /*
+ * Any successful return value will keep the multishot read armed.
+ */
+ if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) {
+ /*
+ * Put our buffer and post a CQE. If we fail to post a CQE, then
+ * jump to the termination path. This request is then done.
+ */
+ cflags = io_put_kbuf(req, issue_flags);
+ rw->len = 0; /* similarly to above, reset len to 0 */
+
+ if (io_fill_cqe_req_aux(req,
+ issue_flags & IO_URING_F_COMPLETE_DEFER,
+ ret, cflags | IORING_CQE_F_MORE)) {
+ if (issue_flags & IO_URING_F_MULTISHOT) {
+ /*
+ * Force retry, as we might have more data to
+ * be read and otherwise it won't get retried
+ * until (if ever) another poll is triggered.
+ */
+ io_poll_multishot_retry(req);
+ return IOU_ISSUE_SKIP_COMPLETE;
+ }
+ return -EAGAIN;
+ }
+ }
+
+ /*
+ * Either an error, or we've hit overflow posting the CQE. For any
+ * multishot request, hitting overflow will terminate it.
+ */
+ io_req_set_res(req, ret, cflags);
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ return IOU_STOP_MULTISHOT;
+ return IOU_OK;
}
int io_write(struct io_kiocb *req, unsigned int issue_flags)