diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:35:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:39:31 +0000 |
commit | 85c675d0d09a45a135bddd15d7b385f8758c32fb (patch) | |
tree | 76267dbc9b9a130337be3640948fe397b04ac629 /block/blk-mq.c | |
parent | Adding upstream version 6.6.15. (diff) | |
download | linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.tar.xz linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.zip |
Adding upstream version 6.7.7.upstream/6.7.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 70 |
1 files changed, 37 insertions, 33 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 6041e17492..a02d3d922c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -426,6 +426,8 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data) rq_list_add(data->cached_rq, rq); nr++; } + if (!(data->rq_flags & RQF_SCHED_TAGS)) + blk_mq_add_active_requests(data->hctx, nr); /* caller already holds a reference, add for remainder */ percpu_ref_get_many(&data->q->q_usage_counter, nr - 1); data->nr_tags -= nr; @@ -510,6 +512,8 @@ retry: goto retry; } + if (!(data->rq_flags & RQF_SCHED_TAGS)) + blk_mq_inc_active_requests(data->hctx); rq = blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag); blk_mq_rq_time_init(rq, alloc_time_ns); return rq; @@ -669,6 +673,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, tag = blk_mq_get_tag(&data); if (tag == BLK_MQ_NO_TAG) goto out_queue_exit; + if (!(data.rq_flags & RQF_SCHED_TAGS)) + blk_mq_inc_active_requests(data.hctx); rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag); blk_mq_rq_time_init(rq, alloc_time_ns); rq->__data_len = 0; @@ -708,11 +714,10 @@ static void __blk_mq_free_request(struct request *rq) blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; - if (rq->rq_flags & RQF_MQ_INFLIGHT) - __blk_mq_dec_active_requests(hctx); - - if (rq->tag != BLK_MQ_NO_TAG) + if (rq->tag != BLK_MQ_NO_TAG) { + blk_mq_dec_active_requests(hctx); blk_mq_put_tag(hctx->tags, ctx, rq->tag); + } if (sched_tag != BLK_MQ_NO_TAG) blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); blk_mq_sched_restart(hctx); @@ -767,11 +772,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. + * For such case, force the completed nbytes to be equal to + * the BIO size so that bio_advance() sets the BIO remaining + * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) + if (bio->bi_iter.bi_size != nbytes) { bio->bi_status = BLK_STS_IOERR; - else + nbytes = bio->bi_iter.bi_size; + } else { bio->bi_iter.bi_sector = rq->__sector; + } } bio_advance(bio, nbytes); @@ -1061,12 +1071,7 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, { struct request_queue *q = hctx->queue; - /* - * All requests should have been marked as RQF_MQ_INFLIGHT, so - * update hctx->nr_active in batch - */ - if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) - __blk_mq_sub_active_requests(hctx, nr_tags); + blk_mq_sub_active_requests(hctx, nr_tags); blk_mq_put_tags(hctx->tags, tag_array, nr_tags); percpu_ref_put_many(&q->q_usage_counter, nr_tags); @@ -1259,6 +1264,7 @@ void blk_mq_start_request(struct request *rq) blk_add_timer(rq); WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); + rq->mq_hctx->tags->rqs[rq->tag] = rq; #ifdef CONFIG_BLK_DEV_INTEGRITY if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) @@ -1760,7 +1766,7 @@ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, return data.rq; } -static bool __blk_mq_alloc_driver_tag(struct request *rq) +bool __blk_mq_alloc_driver_tag(struct request *rq) { struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; @@ -1781,20 +1787,7 @@ static bool __blk_mq_alloc_driver_tag(struct request *rq) return false; rq->tag = tag + tag_offset; - return true; -} - -bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) -{ - if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) - return false; - - if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && - !(rq->rq_flags & RQF_MQ_INFLIGHT)) { - rq->rq_flags |= RQF_MQ_INFLIGHT; - __blk_mq_inc_active_requests(hctx); - } - hctx->tags->rqs[rq->tag] = rq; + blk_mq_inc_active_requests(rq->mq_hctx); return true; } @@ -1871,6 +1864,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, __add_wait_queue(wq, wait); /* + * Add one explicit barrier since blk_mq_get_driver_tag() may + * not imply barrier in case of failure. + * + * Order adding us to wait queue and allocating driver tag. + * + * The pair is the one implied in sbitmap_queue_wake_up() which + * orders clearing sbitmap tag bits and waitqueue_active() in + * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless + * + * Otherwise, re-order of adding wait queue and getting driver tag + * may cause __sbitmap_queue_wake_up() to wake up nothing because + * the waitqueue_active() may not observe us in wait queue. + */ + smp_mb(); + + /* * It's possible that a tag was freed in the window between the * allocation failure and adding the hardware queue to the wait * queue. @@ -2806,13 +2815,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) * If we do, we can dispatch the whole plug list in one go. We * already know at this point that all requests belong to the * same queue, caller must ensure that's the case. - * - * Since we pass off the full list to the driver at this point, - * we do not increment the active request count for the queue. - * Bypass shared tags for now because of that. */ - if (q->mq_ops->queue_rqs && - !(rq->mq_hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { + if (q->mq_ops->queue_rqs) { blk_mq_run_dispatch_ops(q, __blk_mq_flush_plug_list(q, plug)); if (rq_list_empty(plug->mq_list)) |