From e6d73558dbbb7041d93e28ac0e75bfb196851eb3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 8 Apr 2024 18:58:16 +0200 Subject: Merging upstream version 6.1.82. Signed-off-by: Daniel Baumann --- block/bio.c | 2 +- block/blk-core.c | 11 ++++++++++- block/blk-iocost.c | 7 +++++++ block/blk-map.c | 13 ++++++++++--- block/blk-mq.c | 25 +++++++++++++++++++++++-- 5 files changed, 51 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/bio.c b/block/bio.c index 6c22dd7b6..74c2818c7 100644 --- a/block/bio.c +++ b/block/bio.c @@ -927,7 +927,7 @@ static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio, if ((addr1 | mask) != (addr2 | mask)) return false; - if (bv->bv_len + len > queue_max_segment_size(q)) + if (len > queue_max_segment_size(q) - bv->bv_len) return false; return __bio_try_merge_page(bio, page, len, offset, same_page); } diff --git a/block/blk-core.c b/block/blk-core.c index 6eaf2b0ad..aefdf07bd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -864,7 +864,16 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) */ blk_flush_plug(current->plug, false); - if (bio_queue_enter(bio)) + /* + * We need to be able to enter a frozen queue, similar to how + * timeouts also need to do that. If that is blocked, then we can + * have pending IO when a queue freeze is started, and then the + * wait for the freeze to finish will wait for polled requests to + * timeout as the poller is preventer from entering the queue and + * completing them. As long as we prevent new IO from being queued, + * that should be all that matters. + */ + if (!percpu_ref_tryget(&q->q_usage_counter)) return 0; if (queue_is_mq(q)) { ret = blk_mq_poll(q, cookie, iob, flags); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 7dd6a33e1..e6557024e 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1337,6 +1337,13 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) lockdep_assert_held(&iocg->waitq.lock); + /* + * If the delay is set by another CPU, we may be in the past. No need to + * change anything if so. This avoids decay calculation underflow. + */ + if (time_before64(now->now, iocg->delay_at)) + return false; + /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; if (iocg->delay) diff --git a/block/blk-map.c b/block/blk-map.c index 66da9e2b1..b337ae347 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -203,12 +203,19 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, /* * success */ - if ((iov_iter_rw(iter) == WRITE && - (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { + if (iov_iter_rw(iter) == WRITE && + (!map_data || !map_data->null_mapped)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else if (map_data && map_data->from_user) { + struct iov_iter iter2 = *iter; + + /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ + iter2.data_source = ITER_SOURCE; + ret = bio_copy_from_iter(bio, &iter2); + if (ret) + goto cleanup; } else { if (bmd->is_our_pages) zero_fill_bio(bio); diff --git a/block/blk-mq.c b/block/blk-mq.c index b3f99dda4..7ed6b9469 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -747,11 +747,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. + * For such case, force the completed nbytes to be equal to + * the BIO size so that bio_advance() sets the BIO remaining + * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) + if (bio->bi_iter.bi_size != nbytes) { bio->bi_status = BLK_STS_IOERR; - else + nbytes = bio->bi_iter.bi_size; + } else { bio->bi_iter.bi_sector = rq->__sector; + } } bio_advance(bio, nbytes); @@ -1859,6 +1864,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, wait->flags &= ~WQ_FLAG_EXCLUSIVE; __add_wait_queue(wq, wait); + /* + * Add one explicit barrier since blk_mq_get_driver_tag() may + * not imply barrier in case of failure. + * + * Order adding us to wait queue and allocating driver tag. + * + * The pair is the one implied in sbitmap_queue_wake_up() which + * orders clearing sbitmap tag bits and waitqueue_active() in + * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless + * + * Otherwise, re-order of adding wait queue and getting driver tag + * may cause __sbitmap_queue_wake_up() to wake up nothing because + * the waitqueue_active() may not observe us in wait queue. + */ + smp_mb(); + /* * It's possible that a tag was freed in the window between the * allocation failure and adding the hardware queue to the wait -- cgit v1.2.3