summaryrefslogtreecommitdiffstats
path: root/fs/netfs/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/netfs/io.c')
-rw-r--r--fs/netfs/io.c217
1 files changed, 172 insertions, 45 deletions
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index 7f753380e0..4261ad6c55 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -21,12 +21,7 @@
*/
static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
{
- struct iov_iter iter;
-
- iov_iter_xarray(&iter, ITER_DEST, &subreq->rreq->mapping->i_pages,
- subreq->start + subreq->transferred,
- subreq->len - subreq->transferred);
- iov_iter_zero(iov_iter_count(&iter), &iter);
+ iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
}
static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
@@ -46,14 +41,9 @@ static void netfs_read_from_cache(struct netfs_io_request *rreq,
enum netfs_read_from_hole read_hole)
{
struct netfs_cache_resources *cres = &rreq->cache_resources;
- struct iov_iter iter;
netfs_stat(&netfs_n_rh_read);
- iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages,
- subreq->start + subreq->transferred,
- subreq->len - subreq->transferred);
-
- cres->ops->read(cres, subreq->start, &iter, read_hole,
+ cres->ops->read(cres, subreq->start, &subreq->io_iter, read_hole,
netfs_cache_read_terminated, subreq);
}
@@ -88,6 +78,13 @@ static void netfs_read_from_server(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq)
{
netfs_stat(&netfs_n_rh_download);
+
+ if (rreq->origin != NETFS_DIO_READ &&
+ iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
+ pr_warn("R=%08x[%u] ITER PRE-MISMATCH %zx != %zx-%zx %lx\n",
+ rreq->debug_id, subreq->debug_index,
+ iov_iter_count(&subreq->io_iter), subreq->len,
+ subreq->transferred, subreq->flags);
rreq->netfs_ops->issue_read(subreq);
}
@@ -127,9 +124,10 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
/* We might have multiple writes from the same huge
* folio, but we mustn't unlock a folio more than once.
*/
- if (have_unlocked && folio_index(folio) <= unlocked)
+ if (have_unlocked && folio->index <= unlocked)
continue;
- unlocked = folio_index(folio);
+ unlocked = folio_next_index(folio) - 1;
+ trace_netfs_folio(folio, netfs_folio_trace_end_copy);
folio_end_fscache(folio);
have_unlocked = true;
}
@@ -201,7 +199,7 @@ static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
}
ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
- rreq->i_size, true);
+ subreq->len, rreq->i_size, true);
if (ret < 0) {
trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
@@ -260,6 +258,30 @@ static void netfs_rreq_short_read(struct netfs_io_request *rreq,
}
/*
+ * Reset the subrequest iterator prior to resubmission.
+ */
+static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
+ size_t remaining = subreq->len - subreq->transferred;
+ size_t count = iov_iter_count(&subreq->io_iter);
+
+ if (count == remaining)
+ return;
+
+ _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
+ rreq->debug_id, subreq->debug_index,
+ iov_iter_count(&subreq->io_iter), subreq->transferred,
+ subreq->len, rreq->i_size,
+ subreq->io_iter.iter_type);
+
+ if (count < remaining)
+ iov_iter_revert(&subreq->io_iter, remaining - count);
+ else
+ iov_iter_advance(&subreq->io_iter, count - remaining);
+}
+
+/*
* Resubmit any short or failed operations. Returns true if we got the rreq
* ref back.
*/
@@ -287,6 +309,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
atomic_inc(&rreq->nr_outstanding);
+ netfs_reset_subreq_iter(rreq, subreq);
netfs_read_from_server(rreq, subreq);
} else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) {
netfs_rreq_short_read(rreq, subreq);
@@ -321,6 +344,43 @@ static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq)
}
/*
+ * Determine how much we can admit to having read from a DIO read.
+ */
+static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *subreq;
+ unsigned int i;
+ size_t transferred = 0;
+
+ for (i = 0; i < rreq->direct_bv_count; i++)
+ flush_dcache_page(rreq->direct_bv[i].bv_page);
+
+ list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+ if (subreq->error || subreq->transferred == 0)
+ break;
+ transferred += subreq->transferred;
+ if (subreq->transferred < subreq->len)
+ break;
+ }
+
+ for (i = 0; i < rreq->direct_bv_count; i++)
+ flush_dcache_page(rreq->direct_bv[i].bv_page);
+
+ rreq->transferred = transferred;
+ task_io_account_read(transferred);
+
+ if (rreq->iocb) {
+ rreq->iocb->ki_pos += transferred;
+ if (rreq->iocb->ki_complete)
+ rreq->iocb->ki_complete(
+ rreq->iocb, rreq->error ? rreq->error : transferred);
+ }
+ if (rreq->netfs_ops->done)
+ rreq->netfs_ops->done(rreq);
+ inode_dio_end(rreq->inode);
+}
+
+/*
* Assess the state of a read request and decide what to do next.
*
* Note that we could be in an ordinary kernel thread, on a workqueue or in
@@ -340,8 +400,12 @@ again:
return;
}
- netfs_rreq_unlock_folios(rreq);
+ if (rreq->origin != NETFS_DIO_READ)
+ netfs_rreq_unlock_folios(rreq);
+ else
+ netfs_rreq_assess_dio(rreq);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
@@ -399,9 +463,9 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq = subreq->rreq;
int u;
- _enter("[%u]{%llx,%lx},%zd",
- subreq->debug_index, subreq->start, subreq->flags,
- transferred_or_error);
+ _enter("R=%x[%x]{%llx,%lx},%zd",
+ rreq->debug_id, subreq->debug_index,
+ subreq->start, subreq->flags, transferred_or_error);
switch (subreq->source) {
case NETFS_READ_FROM_CACHE:
@@ -501,15 +565,20 @@ static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_subrequest
*/
static enum netfs_io_source
netfs_rreq_prepare_read(struct netfs_io_request *rreq,
- struct netfs_io_subrequest *subreq)
+ struct netfs_io_subrequest *subreq,
+ struct iov_iter *io_iter)
{
- enum netfs_io_source source;
+ enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
+ struct netfs_inode *ictx = netfs_inode(rreq->inode);
+ size_t lsize;
_enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
- source = netfs_cache_prepare_read(subreq, rreq->i_size);
- if (source == NETFS_INVALID_READ)
- goto out;
+ if (rreq->origin != NETFS_DIO_READ) {
+ source = netfs_cache_prepare_read(subreq, rreq->i_size);
+ if (source == NETFS_INVALID_READ)
+ goto out;
+ }
if (source == NETFS_DOWNLOAD_FROM_SERVER) {
/* Call out to the netfs to let it shrink the request to fit
@@ -518,19 +587,52 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
* to make serial calls, it can indicate a short read and then
* we will call it again.
*/
+ if (rreq->origin != NETFS_DIO_READ) {
+ if (subreq->start >= ictx->zero_point) {
+ source = NETFS_FILL_WITH_ZEROES;
+ goto set;
+ }
+ if (subreq->len > ictx->zero_point - subreq->start)
+ subreq->len = ictx->zero_point - subreq->start;
+ }
if (subreq->len > rreq->i_size - subreq->start)
subreq->len = rreq->i_size - subreq->start;
+ if (rreq->rsize && subreq->len > rreq->rsize)
+ subreq->len = rreq->rsize;
if (rreq->netfs_ops->clamp_length &&
!rreq->netfs_ops->clamp_length(subreq)) {
source = NETFS_INVALID_READ;
goto out;
}
+
+ if (subreq->max_nr_segs) {
+ lsize = netfs_limit_iter(io_iter, 0, subreq->len,
+ subreq->max_nr_segs);
+ if (subreq->len > lsize) {
+ subreq->len = lsize;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
+ }
+ }
}
- if (WARN_ON(subreq->len == 0))
+set:
+ if (subreq->len > rreq->len)
+ pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n",
+ rreq->debug_id, subreq->debug_index,
+ subreq->len, rreq->len);
+
+ if (WARN_ON(subreq->len == 0)) {
source = NETFS_INVALID_READ;
+ goto out;
+ }
+ subreq->source = source;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+
+ subreq->io_iter = *io_iter;
+ iov_iter_truncate(&subreq->io_iter, subreq->len);
+ iov_iter_advance(io_iter, subreq->len);
out:
subreq->source = source;
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
@@ -541,6 +643,7 @@ out:
* Slice off a piece of a read request and submit an I/O request for it.
*/
static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
+ struct iov_iter *io_iter,
unsigned int *_debug_index)
{
struct netfs_io_subrequest *subreq;
@@ -552,7 +655,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
subreq->debug_index = (*_debug_index)++;
subreq->start = rreq->start + rreq->submitted;
- subreq->len = rreq->len - rreq->submitted;
+ subreq->len = io_iter->count;
_debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
@@ -565,7 +668,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
* (the starts must coincide), in which case, we go around the loop
* again and ask it to download the next piece.
*/
- source = netfs_rreq_prepare_read(rreq, subreq);
+ source = netfs_rreq_prepare_read(rreq, subreq, io_iter);
if (source == NETFS_INVALID_READ)
goto subreq_failed;
@@ -603,6 +706,7 @@ subreq_failed:
*/
int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
{
+ struct iov_iter io_iter;
unsigned int debug_index = 0;
int ret;
@@ -611,50 +715,73 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
if (rreq->len == 0) {
pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_zero_len);
return -EIO;
}
- INIT_WORK(&rreq->work, netfs_rreq_work);
+ if (rreq->origin == NETFS_DIO_READ)
+ inode_dio_begin(rreq->inode);
- if (sync)
- netfs_get_request(rreq, netfs_rreq_trace_get_hold);
+ // TODO: Use bounce buffer if requested
+ rreq->io_iter = rreq->iter;
+
+ INIT_WORK(&rreq->work, netfs_rreq_work);
/* Chop the read into slices according to what the cache and the netfs
* want and submit each one.
*/
+ netfs_get_request(rreq, netfs_rreq_trace_get_for_outstanding);
atomic_set(&rreq->nr_outstanding, 1);
+ io_iter = rreq->io_iter;
do {
- if (!netfs_rreq_submit_slice(rreq, &debug_index))
+ _debug("submit %llx + %zx >= %llx",
+ rreq->start, rreq->submitted, rreq->i_size);
+ if (rreq->origin == NETFS_DIO_READ &&
+ rreq->start + rreq->submitted >= rreq->i_size)
+ break;
+ if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index))
+ break;
+ if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
+ test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
break;
} while (rreq->submitted < rreq->len);
+ if (!rreq->submitted) {
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+ if (rreq->origin == NETFS_DIO_READ)
+ inode_dio_end(rreq->inode);
+ ret = 0;
+ goto out;
+ }
+
if (sync) {
- /* Keep nr_outstanding incremented so that the ref always belongs to
- * us, and the service code isn't punted off to a random thread pool to
- * process.
+ /* Keep nr_outstanding incremented so that the ref always
+ * belongs to us, and the service code isn't punted off to a
+ * random thread pool to process. Note that this might start
+ * further work, such as writing to the cache.
*/
- for (;;) {
- wait_var_event(&rreq->nr_outstanding,
- atomic_read(&rreq->nr_outstanding) == 1);
+ wait_var_event(&rreq->nr_outstanding,
+ atomic_read(&rreq->nr_outstanding) == 1);
+ if (atomic_dec_and_test(&rreq->nr_outstanding))
netfs_rreq_assess(rreq, false);
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
- break;
- cond_resched();
- }
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
+ wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE);
ret = rreq->error;
- if (ret == 0 && rreq->submitted < rreq->len) {
+ if (ret == 0 && rreq->submitted < rreq->len &&
+ rreq->origin != NETFS_DIO_READ) {
trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
ret = -EIO;
}
- netfs_put_request(rreq, false, netfs_rreq_trace_put_hold);
} else {
/* If we decrement nr_outstanding to 0, the ref belongs to us. */
if (atomic_dec_and_test(&rreq->nr_outstanding))
netfs_rreq_assess(rreq, false);
- ret = 0;
+ ret = -EIOCBQUEUED;
}
+
+out:
return ret;
}