From e54def4ad8144ab15f826416e2e0f290ef1901b4 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 19 Jun 2024 23:00:30 +0200 Subject: Adding upstream version 6.9.2. Signed-off-by: Daniel Baumann --- drivers/block/amiflop.c | 2 +- drivers/block/aoe/aoeblk.c | 15 +- drivers/block/ataflop.c | 2 +- drivers/block/brd.c | 26 +- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 17 +- drivers/block/drbd/drbd_nl.c | 268 ++++++++-------- drivers/block/drbd/drbd_state.c | 24 +- drivers/block/drbd/drbd_state_change.h | 8 +- drivers/block/floppy.c | 18 +- drivers/block/loop.c | 75 ++--- drivers/block/mtip32xx/mtip32xx.c | 13 +- drivers/block/n64cart.c | 12 +- drivers/block/nbd.c | 43 ++- drivers/block/null_blk/main.c | 541 +++++++++------------------------ drivers/block/null_blk/null_blk.h | 24 +- drivers/block/null_blk/trace.h | 5 +- drivers/block/null_blk/zoned.c | 25 +- drivers/block/pktcdvd.c | 109 ++++--- drivers/block/ps3disk.c | 17 +- drivers/block/ps3vram.c | 6 +- drivers/block/rbd.c | 31 +- drivers/block/rnbd/rnbd-clt.c | 64 ++-- drivers/block/rnbd/rnbd-srv.c | 28 +- drivers/block/rnbd/rnbd-srv.h | 2 +- drivers/block/sunvdc.c | 20 +- drivers/block/swim.c | 8 +- drivers/block/swim3.c | 2 +- drivers/block/ublk_drv.c | 112 ++++--- drivers/block/virtio_blk.c | 303 +++++++++--------- drivers/block/xen-blkback/blkback.c | 4 +- drivers/block/xen-blkback/common.h | 4 +- drivers/block/xen-blkback/xenbus.c | 37 ++- drivers/block/xen-blkfront.c | 53 ++-- drivers/block/z2ram.c | 2 +- drivers/block/zram/zcomp.c | 5 +- drivers/block/zram/zcomp.h | 1 - drivers/block/zram/zram_drv.c | 79 +++-- drivers/block/zram/zram_drv.h | 2 +- 39 files changed, 873 insertions(+), 1138 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 2b98114a9..a25414228 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1779,7 +1779,7 @@ static int fd_alloc_disk(int drive, int system) struct gendisk *disk; int err; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index b1b47d88f..b6dac8cee 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -24,8 +24,8 @@ static DEFINE_MUTEX(aoeblk_mutex); static struct kmem_cache *buf_pool_cache; static struct dentry *aoe_debugfs_dir; -/* GPFS needs a larger value than the default. */ -static int aoe_maxsectors; +/* random default picked from the historic block max_sectors cap */ +static int aoe_maxsectors = 2560; module_param(aoe_maxsectors, int, 0644); MODULE_PARM_DESC(aoe_maxsectors, "When nonzero, set the maximum number of sectors per I/O request"); @@ -334,6 +334,10 @@ aoeblk_gdalloc(void *vp) mempool_t *mp; struct blk_mq_tag_set *set; sector_t ssize; + struct queue_limits lim = { + .max_hw_sectors = aoe_maxsectors, + .io_opt = SZ_2M, + }; ulong flags; int late = 0; int err; @@ -371,7 +375,7 @@ aoeblk_gdalloc(void *vp) goto err_mempool; } - gd = blk_mq_alloc_disk(set, d); + gd = blk_mq_alloc_disk(set, &lim, d); if (IS_ERR(gd)) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); @@ -384,14 +388,9 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->flags & DEVFL_TKILL); WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); - /* random number picked from the history block max_sectors cap */ - blk_queue_max_hw_sectors(gd->queue, 2560u); - blk_queue_io_opt(gd->queue, SZ_2M); d->bufpool = mp; d->blkq = gd->queue; d->gd = gd; - if (aoe_maxsectors) - blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors); gd->major = AOE_MAJOR; gd->first_minor = d->sysminor; gd->minors = AOE_PARTITIONS; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 509492077..cacc4ba94 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1994,7 +1994,7 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type) { struct gendisk *disk; - disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); + disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 970bd6ff3..e322cef65 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -318,6 +318,16 @@ static int brd_alloc(int i) struct gendisk *disk; char buf[DISK_NAME_LEN]; int err = -ENOMEM; + struct queue_limits lim = { + /* + * This is so fdisk will align partitions on 4k, because of + * direct_access API needing 4k alignment, returning a PFN + * (This is only a problem on very small devices <= 4M, + * otherwise fdisk will align on 1M. Regardless this call + * is harmless) + */ + .physical_block_size = PAGE_SIZE, + }; list_for_each_entry(brd, &brd_devices, brd_list) if (brd->brd_number == i) @@ -335,10 +345,11 @@ static int brd_alloc(int i) debugfs_create_u64(buf, 0444, brd_debugfs_dir, &brd->brd_nr_pages); - disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_free_dev; - + } disk->major = RAMDISK_MAJOR; disk->first_minor = i * max_part; disk->minors = max_part; @@ -347,15 +358,6 @@ static int brd_alloc(int i) strscpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); - /* - * This is so fdisk will align partitions on 4k, because of - * direct_access API needing 4k alignment, returning a PFN - * (This is only a problem on very small devices <= 4M, - * otherwise fdisk will align on 1M. Regardless this call - * is harmless) - */ - blk_queue_physical_block_size(disk->queue, PAGE_SIZE); - /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c21e37327..94dc0a235 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -524,9 +524,9 @@ struct drbd_md { struct drbd_backing_dev { struct block_device *backing_bdev; - struct bdev_handle *backing_bdev_handle; + struct file *backing_bdev_file; struct block_device *md_bdev; - struct bdev_handle *md_bdev_handle; + struct file *f_md_bdev; struct drbd_md md; struct disk_conf *disk_conf; /* RCU, for updates: resource->conf_update */ sector_t known_size; /* last known size of that backing device */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6bc86106c..113b441d4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2690,6 +2690,14 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig int id; int vnr = adm_ctx->volume; enum drbd_ret_code err = ERR_NOMEM; + struct queue_limits lim = { + /* + * Setting the max_hw_sectors to an odd value of 8kibyte here. + * This triggers a max_bio_size message upon first attach or + * connect. + */ + .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, + }; device = minor_to_device(minor); if (device) @@ -2708,9 +2716,11 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_no_disk; + } device->vdisk = disk; device->rq_queue = disk->queue; @@ -2727,9 +2737,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); blk_queue_write_cache(disk->queue, true, true); - /* Setting the max_hw_sectors to an odd value of 8kibyte here - This triggers a max_bio_size message upon first attach or connect */ - blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8); device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 43747a1aa..5d65c9754 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1189,9 +1189,31 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) return 0; } -static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity) +static unsigned int drbd_max_peer_bio_size(struct drbd_device *device) { - q->limits.discard_granularity = granularity; + /* + * We may ignore peer limits if the peer is modern enough. From 8.3.8 + * onwards the peer can use multiple BIOs for a single peer_request. + */ + if (device->state.conn < C_WF_REPORT_PARAMS) + return device->peer_max_bio_size; + + if (first_peer_device(device)->connection->agreed_pro_version < 94) + return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + + /* + * Correct old drbd (up to 8.3.7) if it believes it can do more than + * 32KiB. + */ + if (first_peer_device(device)->connection->agreed_pro_version == 94) + return DRBD_MAX_SIZE_H80_PACKET; + + /* + * drbd 8.3.8 onwards, before 8.4.0 + */ + if (first_peer_device(device)->connection->agreed_pro_version < 100) + return DRBD_MAX_BIO_SIZE_P95; + return DRBD_MAX_BIO_SIZE; } static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) @@ -1204,149 +1226,119 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) return AL_EXTENT_SIZE >> 9; } -static void decide_on_discard_support(struct drbd_device *device, +static bool drbd_discard_supported(struct drbd_connection *connection, struct drbd_backing_dev *bdev) { - struct drbd_connection *connection = - first_peer_device(device)->connection; - struct request_queue *q = device->rq_queue; - unsigned int max_discard_sectors; - if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev)) - goto not_supported; + return false; if (connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); - goto not_supported; + return false; } - /* - * We don't care for the granularity, really. - * - * Stacking limits below should fix it for the local device. Whether or - * not it is a suitable granularity on the remote device is not our - * problem, really. If you care, you need to use devices with similar - * topology on all peers. - */ - blk_queue_discard_granularity(q, 512); - max_discard_sectors = drbd_max_discard_sectors(connection); - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - return; - -not_supported: - blk_queue_discard_granularity(q, 0); - blk_queue_max_discard_sectors(q, 0); + return true; } -static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) +/* This is the workaround for "bio would need to, but cannot, be split" */ +static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) { - /* Fixup max_write_zeroes_sectors after blk_stack_limits(): - * if we can handle "zeroes" efficiently on the protocol, - * we want to do that, even if our backend does not announce - * max_write_zeroes_sectors itself. */ - struct drbd_connection *connection = first_peer_device(device)->connection; - /* If the peer announces WZEROES support, use it. Otherwise, rather - * send explicit zeroes than rely on some discard-zeroes-data magic. */ - if (connection->agreed_features & DRBD_FF_WZEROES) - q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; - else - q->limits.max_write_zeroes_sectors = 0; -} + unsigned int max_segments; -static void fixup_discard_support(struct drbd_device *device, struct request_queue *q) -{ - unsigned int max_discard = device->rq_queue->limits.max_discard_sectors; - unsigned int discard_granularity = - device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT; + rcu_read_lock(); + max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; + rcu_read_unlock(); - if (discard_granularity > max_discard) { - blk_queue_discard_granularity(q, 0); - blk_queue_max_discard_sectors(q, 0); - } + if (!max_segments) + return BLK_MAX_SEGMENTS; + return max_segments; } -static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, - unsigned int max_bio_size, struct o_qlim *o) +void drbd_reconsider_queue_parameters(struct drbd_device *device, + struct drbd_backing_dev *bdev, struct o_qlim *o) { + struct drbd_connection *connection = + first_peer_device(device)->connection; struct request_queue * const q = device->rq_queue; - unsigned int max_hw_sectors = max_bio_size >> 9; - unsigned int max_segments = 0; + unsigned int now = queue_max_hw_sectors(q) << 9; + struct queue_limits lim; struct request_queue *b = NULL; - struct disk_conf *dc; + unsigned int new; if (bdev) { b = bdev->backing_bdev->bd_disk->queue; - max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - rcu_read_lock(); - dc = rcu_dereference(device->ldev->disk_conf); - max_segments = dc->max_bio_bvecs; - rcu_read_unlock(); - - blk_set_stacking_limits(&q->limits); + device->local_max_bio_size = + queue_max_hw_sectors(b) << SECTOR_SHIFT; } - blk_queue_max_hw_sectors(q, max_hw_sectors); - /* This is the workaround for "bio would need to, but cannot, be split" */ - blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); - blk_queue_segment_boundary(q, PAGE_SIZE-1); - decide_on_discard_support(device, bdev); - - if (b) { - blk_stack_limits(&q->limits, &b->limits, 0); - disk_update_readahead(device->vdisk); + /* + * We may later detach and re-attach on a disconnected Primary. Avoid + * decreasing the value in this case. + * + * We want to store what we know the peer DRBD can handle, not what the + * peer IO backend can handle. + */ + new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size, + max(drbd_max_peer_bio_size(device), device->peer_max_bio_size)); + if (new != now) { + if (device->state.role == R_PRIMARY && new < now) + drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", + new, now); + drbd_info(device, "max BIO size = %u\n", new); } - fixup_write_zeroes(device, q); - fixup_discard_support(device, q); -} - -void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) -{ - unsigned int now, new, local, peer; - - now = queue_max_hw_sectors(device->rq_queue) << 9; - local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ - peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ + lim = queue_limits_start_update(q); if (bdev) { - local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9; - device->local_max_bio_size = local; + blk_set_stacking_limits(&lim); + lim.max_segments = drbd_backing_dev_max_segments(device); + } else { + lim.max_segments = BLK_MAX_SEGMENTS; } - local = min(local, DRBD_MAX_BIO_SIZE); - /* We may ignore peer limits if the peer is modern enough. - Because new from 8.3.8 onwards the peer can use multiple - BIOs for a single peer_request */ - if (device->state.conn >= C_WF_REPORT_PARAMS) { - if (first_peer_device(device)->connection->agreed_pro_version < 94) - peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); - /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ - else if (first_peer_device(device)->connection->agreed_pro_version == 94) - peer = DRBD_MAX_SIZE_H80_PACKET; - else if (first_peer_device(device)->connection->agreed_pro_version < 100) - peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ - else - peer = DRBD_MAX_BIO_SIZE; + lim.max_hw_sectors = new >> SECTOR_SHIFT; + lim.seg_boundary_mask = PAGE_SIZE - 1; - /* We may later detach and re-attach on a disconnected Primary. - * Avoid this setting to jump back in that case. - * We want to store what we know the peer DRBD can handle, - * not what the peer IO backend can handle. */ - if (peer > device->peer_max_bio_size) - device->peer_max_bio_size = peer; + /* + * We don't care for the granularity, really. + * + * Stacking limits below should fix it for the local device. Whether or + * not it is a suitable granularity on the remote device is not our + * problem, really. If you care, you need to use devices with similar + * topology on all peers. + */ + if (drbd_discard_supported(connection, bdev)) { + lim.discard_granularity = 512; + lim.max_hw_discard_sectors = + drbd_max_discard_sectors(connection); + } else { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; } - new = min(local, peer); - if (device->state.role == R_PRIMARY && new < now) - drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now); + if (bdev) + blk_stack_limits(&lim, &b->limits, 0); - if (new != now) - drbd_info(device, "max BIO size = %u\n", new); + /* + * If we can handle "zeroes" efficiently on the protocol, we want to do + * that, even if our backend does not announce max_write_zeroes_sectors + * itself. + */ + if (connection->agreed_features & DRBD_FF_WZEROES) + lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; + else + lim.max_write_zeroes_sectors = 0; + + if ((lim.discard_granularity >> SECTOR_SHIFT) > + lim.max_hw_discard_sectors) { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; + } - drbd_setup_queue_param(device, bdev, new, o); + if (queue_limits_commit_update(q, &lim)) + drbd_err(device, "setting new queue limits failed\n"); } /* Starts the worker thread */ @@ -1635,45 +1627,45 @@ success: return 0; } -static struct bdev_handle *open_backing_dev(struct drbd_device *device, +static struct file *open_backing_dev(struct drbd_device *device, const char *bdev_path, void *claim_ptr, bool do_bd_link) { - struct bdev_handle *handle; + struct file *file; int err = 0; - handle = bdev_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE, - claim_ptr, NULL); - if (IS_ERR(handle)) { + file = bdev_file_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE, + claim_ptr, NULL); + if (IS_ERR(file)) { drbd_err(device, "open(\"%s\") failed with %ld\n", - bdev_path, PTR_ERR(handle)); - return handle; + bdev_path, PTR_ERR(file)); + return file; } if (!do_bd_link) - return handle; + return file; - err = bd_link_disk_holder(handle->bdev, device->vdisk); + err = bd_link_disk_holder(file_bdev(file), device->vdisk); if (err) { - bdev_release(handle); + fput(file); drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n", bdev_path, err); - handle = ERR_PTR(err); + file = ERR_PTR(err); } - return handle; + return file; } static int open_backing_devices(struct drbd_device *device, struct disk_conf *new_disk_conf, struct drbd_backing_dev *nbc) { - struct bdev_handle *handle; + struct file *file; - handle = open_backing_dev(device, new_disk_conf->backing_dev, device, + file = open_backing_dev(device, new_disk_conf->backing_dev, device, true); - if (IS_ERR(handle)) + if (IS_ERR(file)) return ERR_OPEN_DISK; - nbc->backing_bdev = handle->bdev; - nbc->backing_bdev_handle = handle; + nbc->backing_bdev = file_bdev(file); + nbc->backing_bdev_file = file; /* * meta_dev_idx >= 0: external fixed size, possibly multiple @@ -1683,7 +1675,7 @@ static int open_backing_devices(struct drbd_device *device, * should check it for you already; but if you don't, or * someone fooled it, we need to double check here) */ - handle = open_backing_dev(device, new_disk_conf->meta_dev, + file = open_backing_dev(device, new_disk_conf->meta_dev, /* claim ptr: device, if claimed exclusively; shared drbd_m_holder, * if potentially shared with other drbd minors */ (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder, @@ -1691,21 +1683,21 @@ static int open_backing_devices(struct drbd_device *device, * as would happen with internal metadata. */ (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT && new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL)); - if (IS_ERR(handle)) + if (IS_ERR(file)) return ERR_OPEN_MD_DISK; - nbc->md_bdev = handle->bdev; - nbc->md_bdev_handle = handle; + nbc->md_bdev = file_bdev(file); + nbc->f_md_bdev = file; return NO_ERROR; } static void close_backing_dev(struct drbd_device *device, - struct bdev_handle *handle, bool do_bd_unlink) + struct file *bdev_file, bool do_bd_unlink) { - if (!handle) + if (!bdev_file) return; if (do_bd_unlink) - bd_unlink_disk_holder(handle->bdev, device->vdisk); - bdev_release(handle); + bd_unlink_disk_holder(file_bdev(bdev_file), device->vdisk); + fput(bdev_file); } void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev) @@ -1713,9 +1705,9 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev * if (ldev == NULL) return; - close_backing_dev(device, ldev->md_bdev_handle, + close_backing_dev(device, ldev->f_md_bdev, ldev->md_bdev != ldev->backing_bdev); - close_backing_dev(device, ldev->backing_bdev_handle, true); + close_backing_dev(device, ldev->backing_bdev_file, true); kfree(ldev->disk_conf); kfree(ldev); @@ -2131,9 +2123,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) fail: conn_reconfig_done(connection); if (nbc) { - close_backing_dev(device, nbc->md_bdev_handle, + close_backing_dev(device, nbc->f_md_bdev, nbc->md_bdev != nbc->backing_bdev); - close_backing_dev(device, nbc->backing_bdev_handle, true); + close_backing_dev(device, nbc->backing_bdev_file, true); kfree(nbc); } kfree(new_disk_conf); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 287a8d1d3..e858e7e03 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1542,9 +1542,10 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_resource_state_change *resource_state_change, + void *state_change, enum drbd_notification_type type) { + struct drbd_resource_state_change *resource_state_change = state_change; struct drbd_resource *resource = resource_state_change->resource; struct resource_info resource_info = { .res_role = resource_state_change->role[NEW], @@ -1558,13 +1559,14 @@ int notify_resource_state_change(struct sk_buff *skb, int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_connection_state_change *connection_state_change, + void *state_change, enum drbd_notification_type type) { - struct drbd_connection *connection = connection_state_change->connection; + struct drbd_connection_state_change *p = state_change; + struct drbd_connection *connection = p->connection; struct connection_info connection_info = { - .conn_connection_state = connection_state_change->cstate[NEW], - .conn_role = connection_state_change->peer_role[NEW], + .conn_connection_state = p->cstate[NEW], + .conn_role = p->peer_role[NEW], }; return notify_connection_state(skb, seq, connection, &connection_info, type); @@ -1572,9 +1574,10 @@ int notify_connection_state_change(struct sk_buff *skb, int notify_device_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_device_state_change *device_state_change, + void *state_change, enum drbd_notification_type type) { + struct drbd_device_state_change *device_state_change = state_change; struct drbd_device *device = device_state_change->device; struct device_info device_info = { .dev_disk_state = device_state_change->disk_state[NEW], @@ -1585,9 +1588,10 @@ int notify_device_state_change(struct sk_buff *skb, int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_peer_device_state_change *p, + void *state_change, enum drbd_notification_type type) { + struct drbd_peer_device_state_change *p = state_change; struct drbd_peer_device *peer_device = p->peer_device; struct peer_device_info peer_device_info = { .peer_repl_state = p->repl_state[NEW], @@ -1605,8 +1609,8 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - int (*last_func)(struct sk_buff *, unsigned int, void *, - enum drbd_notification_type) = NULL; + int (*last_func)(struct sk_buff *, unsigned int, + void *, enum drbd_notification_type) = NULL; void *last_arg = NULL; #define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW]) @@ -1616,7 +1620,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) }) #define REMEMBER_STATE_CHANGE(func, arg, type) \ ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \ - last_func = (typeof(last_func))func; \ + last_func = func; \ last_arg = arg; \ }) diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index 9d78d8e39..a56a57d67 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -46,19 +46,19 @@ extern void forget_state_change(struct drbd_state_change *); extern int notify_resource_state_change(struct sk_buff *, unsigned int, - struct drbd_resource_state_change *, + void *, enum drbd_notification_type type); extern int notify_connection_state_change(struct sk_buff *, unsigned int, - struct drbd_connection_state_change *, + void *, enum drbd_notification_type type); extern int notify_device_state_change(struct sk_buff *, unsigned int, - struct drbd_device_state_change *, + void *, enum drbd_notification_type type); extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, - struct drbd_peer_device_state_change *, + void *, enum drbd_notification_type type); #endif /* DRBD_STATE_CHANGE_H */ diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index d0e41d52d..25c9d8566 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -530,14 +530,13 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); /* this is called after the interrupt of the * main command */ void (*redo)(void); /* this is called to retry the operation */ void (*error)(void); /* this is called to tally an error */ - done_f done; /* this is called to say if the operation has + void (*done)(int); /* this is called to say if the operation has * succeeded/failed */ } *cont; @@ -985,6 +984,10 @@ static void empty(void) { } +static void empty_done(int result) +{ +} + static void (*floppy_work_fn)(void); static void floppy_work_workfn(struct work_struct *work) @@ -1998,14 +2001,14 @@ static const struct cont_t wakeup_cont = { .interrupt = empty, .redo = do_wakeup, .error = empty, - .done = (done_f)empty + .done = empty_done, }; static const struct cont_t intr_cont = { .interrupt = empty, .redo = process_fd_request, .error = empty, - .done = (done_f)empty + .done = empty_done, }; /* schedules handler, waiting for completion. May be interrupted, will then @@ -2784,7 +2787,6 @@ do_request: pending = set_next_request(); spin_unlock_irq(&floppy_lock); if (!pending) { - do_floppy = NULL; unlock_fdc(); return; } @@ -4513,13 +4515,15 @@ static bool floppy_available(int drive) static int floppy_alloc_disk(unsigned int drive, unsigned int type) { + struct queue_limits lim = { + .max_hw_sectors = 64, + }; struct gendisk *disk; - disk = blk_mq_alloc_disk(&tag_sets[drive], NULL); + disk = blk_mq_alloc_disk(&tag_sets[drive], &lim, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); - blk_queue_max_hw_sectors(disk->queue, 64); disk->major = FLOPPY_MAJOR; disk->first_minor = TOMINOR(drive) | (type << 2); disk->minors = 1; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f8145499d..28a95fd36 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -750,12 +750,13 @@ static void loop_sysfs_exit(struct loop_device *lo) &loop_attribute_group); } -static void loop_config_discard(struct loop_device *lo) +static void loop_config_discard(struct loop_device *lo, + struct queue_limits *lim) { struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; - struct request_queue *q = lo->lo_queue; - u32 granularity, max_discard_sectors; + u32 granularity = 0, max_discard_sectors = 0; + struct kstatfs sbuf; /* * If the backing device is a block device, mirror its zeroing @@ -775,29 +776,17 @@ static void loop_config_discard(struct loop_device *lo) * We use punch hole to reclaim the free space used by the * image a.k.a. discard. */ - } else if (!file->f_op->fallocate) { - max_discard_sectors = 0; - granularity = 0; - - } else { - struct kstatfs sbuf; - + } else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) { max_discard_sectors = UINT_MAX >> 9; - if (!vfs_statfs(&file->f_path, &sbuf)) - granularity = sbuf.f_bsize; - else - max_discard_sectors = 0; + granularity = sbuf.f_bsize; } - if (max_discard_sectors) { - q->limits.discard_granularity = granularity; - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - } else { - q->limits.discard_granularity = 0; - blk_queue_max_discard_sectors(q, 0); - blk_queue_max_write_zeroes_sectors(q, 0); - } + lim->max_hw_discard_sectors = max_discard_sectors; + lim->max_write_zeroes_sectors = max_discard_sectors; + if (max_discard_sectors) + lim->discard_granularity = granularity; + else + lim->discard_granularity = 0; } struct loop_worker { @@ -986,6 +975,20 @@ loop_set_status_from_info(struct loop_device *lo, return 0; } +static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize, + bool update_discard_settings) +{ + struct queue_limits lim; + + lim = queue_limits_start_update(lo->lo_queue); + lim.logical_block_size = bsize; + lim.physical_block_size = bsize; + lim.io_min = bsize; + if (update_discard_settings) + loop_config_discard(lo, &lim); + return queue_limits_commit_update(lo->lo_queue, &lim); +} + static int loop_configure(struct loop_device *lo, blk_mode_t mode, struct block_device *bdev, const struct loop_config *config) @@ -1083,11 +1086,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, else bsize = 512; - blk_queue_logical_block_size(lo->lo_queue, bsize); - blk_queue_physical_block_size(lo->lo_queue, bsize); - blk_queue_io_min(lo->lo_queue, bsize); + error = loop_reconfigure_limits(lo, bsize, true); + if (WARN_ON_ONCE(error)) + goto out_unlock; - loop_config_discard(lo); loop_update_rotational(lo); loop_update_dio(lo); loop_sysfs_init(lo); @@ -1154,9 +1156,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release) lo->lo_offset = 0; lo->lo_sizelimit = 0; memset(lo->lo_file_name, 0, LO_NAME_SIZE); - blk_queue_logical_block_size(lo->lo_queue, 512); - blk_queue_physical_block_size(lo->lo_queue, 512); - blk_queue_io_min(lo->lo_queue, 512); + loop_reconfigure_limits(lo, 512, false); invalidate_disk(lo->lo_disk); loop_sysfs_exit(lo); /* let user-space know about this change */ @@ -1488,9 +1488,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) invalidate_bdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); - blk_queue_logical_block_size(lo->lo_queue, arg); - blk_queue_physical_block_size(lo->lo_queue, arg); - blk_queue_io_min(lo->lo_queue, arg); + err = loop_reconfigure_limits(lo, arg, false); loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); @@ -1982,6 +1980,12 @@ static const struct blk_mq_ops loop_mq_ops = { static int loop_add(int i) { + struct queue_limits lim = { + /* + * Random number picked from the historic block max_sectors cap. + */ + .max_hw_sectors = 2560u, + }; struct loop_device *lo; struct gendisk *disk; int err; @@ -2025,16 +2029,13 @@ static int loop_add(int i) if (err) goto out_free_idr; - disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo); + disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, &lim, lo); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_cleanup_tags; } lo->lo_queue = lo->lo_disk->queue; - /* random number picked from the history block max_sectors cap */ - blk_queue_max_hw_sectors(lo->lo_queue, 2560u); - /* * By default, we do buffer IO, so it doesn't make sense to enable * merge because the I/O submitted to backing file is handled page by diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index b200950e8..43a187609 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3401,6 +3401,12 @@ static const struct blk_mq_ops mtip_mq_ops = { */ static int mtip_block_initialize(struct driver_data *dd) { + struct queue_limits lim = { + .physical_block_size = 4096, + .max_hw_sectors = 0xffff, + .max_segments = MTIP_MAX_SG, + .max_segment_size = 0x400000, + }; int rv = 0, wait_for_rebuild = 0; sector_t capacity; unsigned int index = 0; @@ -3431,7 +3437,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto block_queue_alloc_tag_error; } - dd->disk = blk_mq_alloc_disk(&dd->tags, dd); + dd->disk = blk_mq_alloc_disk(&dd->tags, &lim, dd); if (IS_ERR(dd->disk)) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); @@ -3481,12 +3487,7 @@ skip_create_disk: /* Set device limits. */ blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue); - blk_queue_max_segments(dd->queue, MTIP_MAX_SG); - blk_queue_physical_block_size(dd->queue, 4096); - blk_queue_max_hw_sectors(dd->queue, 0xffff); - blk_queue_max_segment_size(dd->queue, 0x400000); dma_set_max_seg_size(&dd->pdev->dev, 0x400000); - blk_queue_io_min(dd->queue, 4096); /* Set the capacity of the device in 512 byte sectors. */ if (!(mtip_hw_get_capacity(dd, &capacity))) { diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index d914156db..27b2187e7 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -114,6 +114,10 @@ static const struct block_device_operations n64cart_fops = { */ static int __init n64cart_probe(struct platform_device *pdev) { + struct queue_limits lim = { + .physical_block_size = 4096, + .logical_block_size = 4096, + }; struct gendisk *disk; int err = -ENOMEM; @@ -131,9 +135,11 @@ static int __init n64cart_probe(struct platform_device *pdev) if (IS_ERR(reg_base)) return PTR_ERR(reg_base); - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out; + } disk->first_minor = 0; disk->flags = GENHD_FL_NO_PART; @@ -145,8 +151,6 @@ static int __init n64cart_probe(struct platform_device *pdev) set_disk_ro(disk, 1); blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_physical_block_size(disk->queue, 4096); - blk_queue_logical_block_size(disk->queue, 4096); err = add_disk(disk); if (err) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b7c332528..9d4ec9273 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -316,9 +316,12 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, nsock->sent = 0; } -static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, +static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize, loff_t blksize) { + struct queue_limits lim; + int error; + if (!blksize) blksize = 1u << NBD_DEF_BLKSIZE_BITS; @@ -334,10 +337,16 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, if (!nbd->pid) return 0; + lim = queue_limits_start_update(nbd->disk->queue); if (nbd->config->flags & NBD_FLAG_SEND_TRIM) - blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); - blk_queue_logical_block_size(nbd->disk->queue, blksize); - blk_queue_physical_block_size(nbd->disk->queue, blksize); + lim.max_hw_discard_sectors = UINT_MAX; + else + lim.max_hw_discard_sectors = 0; + lim.logical_block_size = blksize; + lim.physical_block_size = blksize; + error = queue_limits_commit_update(nbd->disk->queue, &lim); + if (error) + return error; if (max_part) set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); @@ -346,6 +355,18 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, return 0; } +static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, + loff_t blksize) +{ + int error; + + blk_mq_freeze_queue(nbd->disk->queue); + error = __nbd_set_size(nbd, bytesize, blksize); + blk_mq_unfreeze_queue(nbd->disk->queue); + + return error; +} + static void nbd_complete_rq(struct request *req) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); @@ -1351,7 +1372,6 @@ static void nbd_config_put(struct nbd_device *nbd) nbd->config = NULL; nbd->tag_set.timeout = 0; - blk_queue_max_discard_sectors(nbd->disk->queue, 0); mutex_unlock(&nbd->config_lock); nbd_put(nbd); @@ -1783,6 +1803,12 @@ static const struct blk_mq_ops nbd_mq_ops = { static struct nbd_device *nbd_dev_add(int index, unsigned int refs) { + struct queue_limits lim = { + .max_hw_sectors = 65536, + .max_user_sectors = 256, + .max_segments = USHRT_MAX, + .max_segment_size = UINT_MAX, + }; struct nbd_device *nbd; struct gendisk *disk; int err = -ENOMEM; @@ -1823,7 +1849,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) if (err < 0) goto out_free_tags; - disk = blk_mq_alloc_disk(&nbd->tag_set, NULL); + disk = blk_mq_alloc_disk(&nbd->tag_set, &lim, NULL); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_free_idr; @@ -1843,11 +1869,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) * Tell the block layer that we are not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_max_discard_sectors(disk->queue, 0); - blk_queue_max_segment_size(disk->queue, UINT_MAX); - blk_queue_max_segments(disk->queue, USHRT_MAX); - blk_queue_max_hw_sectors(disk->queue, 65536); - disk->queue->limits.max_sectors = 256; mutex_init(&nbd->config_lock); refcount_set(&nbd->config_refs, 0); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 3584f389b..ed33cf719 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -115,6 +115,18 @@ module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444); MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=,,,"); #endif +/* + * Historic queue modes. + * + * These days nothing but NULL_Q_MQ is actually supported, but we keep it the + * enum for error reporting. + */ +enum { + NULL_Q_BIO = 0, + NULL_Q_RQ = 1, + NULL_Q_MQ = 2, +}; + static int g_queue_mode = NULL_Q_MQ; static int null_param_store_val(const char *str, int *val, int min, int max) @@ -165,8 +177,8 @@ static bool g_blocking; module_param_named(blocking, g_blocking, bool, 0444); MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); -static bool shared_tags; -module_param(shared_tags, bool, 0444); +static bool g_shared_tags; +module_param_named(shared_tags, g_shared_tags, bool, 0444); MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); static bool g_shared_tag_bitmap; @@ -426,6 +438,7 @@ NULLB_DEVICE_ATTR(zone_max_open, uint, NULL); NULLB_DEVICE_ATTR(zone_max_active, uint, NULL); NULLB_DEVICE_ATTR(virt_boundary, bool, NULL); NULLB_DEVICE_ATTR(no_sched, bool, NULL); +NULLB_DEVICE_ATTR(shared_tags, bool, NULL); NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) @@ -571,6 +584,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_zone_offline, &nullb_device_attr_virt_boundary, &nullb_device_attr_no_sched, + &nullb_device_attr_shared_tags, &nullb_device_attr_shared_tag_bitmap, NULL, }; @@ -653,10 +667,11 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page) "badblocks,blocking,blocksize,cache_size," "completion_nsec,discard,home_node,hw_queue_depth," "irqmode,max_sectors,mbps,memory_backed,no_sched," - "poll_queues,power,queue_mode,shared_tag_bitmap,size," - "submit_queues,use_per_node_hctx,virt_boundary,zoned," - "zone_capacity,zone_max_active,zone_max_open," - "zone_nr_conv,zone_offline,zone_readonly,zone_size\n"); + "poll_queues,power,queue_mode,shared_tag_bitmap," + "shared_tags,size,submit_queues,use_per_node_hctx," + "virt_boundary,zoned,zone_capacity,zone_max_active," + "zone_max_open,zone_nr_conv,zone_offline,zone_readonly," + "zone_size\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -738,6 +753,7 @@ static struct nullb_device *null_alloc_dev(void) dev->zone_max_active = g_zone_max_active; dev->virt_boundary = g_virt_boundary; dev->no_sched = g_no_sched; + dev->shared_tags = g_shared_tags; dev->shared_tag_bitmap = g_shared_tag_bitmap; return dev; } @@ -752,98 +768,11 @@ static void null_free_dev(struct nullb_device *dev) kfree(dev); } -static void put_tag(struct nullb_queue *nq, unsigned int tag) -{ - clear_bit_unlock(tag, nq->tag_map); - - if (waitqueue_active(&nq->wait)) - wake_up(&nq->wait); -} - -static unsigned int get_tag(struct nullb_queue *nq) -{ - unsigned int tag; - - do { - tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); - if (tag >= nq->queue_depth) - return -1U; - } while (test_and_set_bit_lock(tag, nq->tag_map)); - - return tag; -} - -static void free_cmd(struct nullb_cmd *cmd) -{ - put_tag(cmd->nq, cmd->tag); -} - -static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer); - -static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - unsigned int tag; - - tag = get_tag(nq); - if (tag != -1U) { - cmd = &nq->cmds[tag]; - cmd->tag = tag; - cmd->error = BLK_STS_OK; - cmd->nq = nq; - if (nq->dev->irqmode == NULL_IRQ_TIMER) { - hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - cmd->timer.function = null_cmd_timer_expired; - } - return cmd; - } - - return NULL; -} - -static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio) -{ - struct nullb_cmd *cmd; - DEFINE_WAIT(wait); - - do { - /* - * This avoids multiple return statements, multiple calls to - * __alloc_cmd() and a fast path call to prepare_to_wait(). - */ - cmd = __alloc_cmd(nq); - if (cmd) { - cmd->bio = bio; - return cmd; - } - prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); - io_schedule(); - finish_wait(&nq->wait, &wait); - } while (1); -} - -static void end_cmd(struct nullb_cmd *cmd) -{ - int queue_mode = cmd->nq->dev->queue_mode; - - switch (queue_mode) { - case NULL_Q_MQ: - blk_mq_end_request(cmd->rq, cmd->error); - return; - case NULL_Q_BIO: - cmd->bio->bi_status = cmd->error; - bio_endio(cmd->bio); - break; - } - - free_cmd(cmd); -} - static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) { - end_cmd(container_of(timer, struct nullb_cmd, timer)); + struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer); + blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error); return HRTIMER_NORESTART; } @@ -856,7 +785,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) static void null_complete_rq(struct request *rq) { - end_cmd(blk_mq_rq_to_pdu(rq)); + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + + blk_mq_end_request(rq, cmd->error); } static struct nullb_page *null_alloc_page(void) @@ -1273,7 +1204,7 @@ static int null_transfer(struct nullb *nullb, struct page *page, static int null_handle_rq(struct nullb_cmd *cmd) { - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb *nullb = cmd->nq->dev->nullb; int err; unsigned int len; @@ -1298,63 +1229,21 @@ static int null_handle_rq(struct nullb_cmd *cmd) return 0; } -static int null_handle_bio(struct nullb_cmd *cmd) -{ - struct bio *bio = cmd->bio; - struct nullb *nullb = cmd->nq->dev->nullb; - int err; - unsigned int len; - sector_t sector = bio->bi_iter.bi_sector; - struct bio_vec bvec; - struct bvec_iter iter; - - spin_lock_irq(&nullb->lock); - bio_for_each_segment(bvec, bio, iter) { - len = bvec.bv_len; - err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, - op_is_write(bio_op(bio)), sector, - bio->bi_opf & REQ_FUA); - if (err) { - spin_unlock_irq(&nullb->lock); - return err; - } - sector += len >> SECTOR_SHIFT; - } - spin_unlock_irq(&nullb->lock); - return 0; -} - -static void null_stop_queue(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_stop_hw_queues(q); -} - -static void null_restart_queue_async(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - blk_mq_start_stopped_hw_queues(q, true); -} - static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd) { struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; blk_status_t sts = BLK_STS_OK; - struct request *rq = cmd->rq; + struct request *rq = blk_mq_rq_from_pdu(cmd); if (!hrtimer_active(&nullb->bw_timer)) hrtimer_restart(&nullb->bw_timer); if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) { - null_stop_queue(nullb); + blk_mq_stop_hw_queues(nullb->q); /* race with timer */ if (atomic_long_read(&nullb->cur_bytes) > 0) - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); /* requeue request */ sts = BLK_STS_DEV_RESOURCE; } @@ -1381,37 +1270,29 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, sector_t nr_sectors) { struct nullb_device *dev = cmd->nq->dev; - int err; if (op == REQ_OP_DISCARD) return null_handle_discard(dev, sector, nr_sectors); + return errno_to_blk_status(null_handle_rq(cmd)); - if (dev->queue_mode == NULL_Q_BIO) - err = null_handle_bio(cmd); - else - err = null_handle_rq(cmd); - - return errno_to_blk_status(err); } static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb_device *dev = cmd->nq->dev; struct bio *bio; - if (dev->memory_backed) - return; - - if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) { - zero_fill_bio(cmd->bio); - } else if (req_op(cmd->rq) == REQ_OP_READ) { - __rq_for_each_bio(bio, cmd->rq) + if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) { + __rq_for_each_bio(bio, rq) zero_fill_bio(bio); } } static inline void nullb_complete_cmd(struct nullb_cmd *cmd) { + struct request *rq = blk_mq_rq_from_pdu(cmd); + /* * Since root privileges are required to configure the null_blk * driver, it is fine that this driver does not initialize the @@ -1425,20 +1306,10 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd) /* Complete IO by inline, softirq or timer */ switch (cmd->nq->dev->irqmode) { case NULL_IRQ_SOFTIRQ: - switch (cmd->nq->dev->queue_mode) { - case NULL_Q_MQ: - blk_mq_complete_request(cmd->rq); - break; - case NULL_Q_BIO: - /* - * XXX: no proper submitting cpu information available. - */ - end_cmd(cmd); - break; - } + blk_mq_complete_request(rq); break; case NULL_IRQ_NONE: - end_cmd(cmd); + blk_mq_end_request(rq, cmd->error); break; case NULL_IRQ_TIMER: null_cmd_end_timer(cmd); @@ -1499,7 +1370,7 @@ static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps)); - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); hrtimer_forward_now(&nullb->bw_timer, timer_interval); @@ -1516,26 +1387,6 @@ static void nullb_setup_bwtimer(struct nullb *nullb) hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL); } -static struct nullb_queue *nullb_to_queue(struct nullb *nullb) -{ - int index = 0; - - if (nullb->nr_queues != 1) - index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); - - return &nullb->queues[index]; -} - -static void null_submit_bio(struct bio *bio) -{ - sector_t sector = bio->bi_iter.bi_sector; - sector_t nr_sectors = bio_sectors(bio); - struct nullb *nullb = bio->bi_bdev->bd_disk->private_data; - struct nullb_queue *nq = nullb_to_queue(nullb); - - null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio)); -} - #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static bool should_timeout_request(struct request *rq) @@ -1655,7 +1506,7 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) blk_rq_sectors(req)); if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error, blk_mq_end_request_batch)) - end_cmd(cmd); + blk_mq_end_request(req, cmd->error); nr++; } @@ -1711,7 +1562,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; } - cmd->rq = rq; cmd->error = BLK_STS_OK; cmd->nq = nq; cmd->fake_timeout = should_timeout_request(rq) || @@ -1770,34 +1620,8 @@ static void null_queue_rqs(struct request **rqlist) *rqlist = requeue_list; } -static void cleanup_queue(struct nullb_queue *nq) -{ - bitmap_free(nq->tag_map); - kfree(nq->cmds); -} - -static void cleanup_queues(struct nullb *nullb) -{ - int i; - - for (i = 0; i < nullb->nr_queues; i++) - cleanup_queue(&nullb->queues[i]); - - kfree(nullb->queues); -} - -static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nullb_queue *nq = hctx->driver_data; - struct nullb *nullb = nq->dev->nullb; - - nullb->nr_queues--; -} - static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) { - init_waitqueue_head(&nq->wait); - nq->queue_depth = nullb->queue_depth; nq->dev = nullb->dev; INIT_LIST_HEAD(&nq->poll_list); spin_lock_init(&nq->poll_lock); @@ -1815,7 +1639,6 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, nq = &nullb->queues[hctx_idx]; hctx->driver_data = nq; null_init_queue(nullb, nq); - nullb->nr_queues++; return 0; } @@ -1828,7 +1651,6 @@ static const struct blk_mq_ops null_mq_ops = { .poll = null_poll, .map_queues = null_map_queues, .init_hctx = null_init_hctx, - .exit_hctx = null_exit_hctx, }; static void null_del_dev(struct nullb *nullb) @@ -1849,21 +1671,20 @@ static void null_del_dev(struct nullb *nullb) if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) { hrtimer_cancel(&nullb->bw_timer); atomic_long_set(&nullb->cur_bytes, LONG_MAX); - null_restart_queue_async(nullb); + blk_mq_start_stopped_hw_queues(nullb->q, true); } put_disk(nullb->disk); - if (dev->queue_mode == NULL_Q_MQ && - nullb->tag_set == &nullb->__tag_set) + if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); - cleanup_queues(nullb); + kfree(nullb->queues); if (null_cache_active(nullb)) null_free_device_storage(nullb->dev, true); kfree(nullb); dev->nullb = NULL; } -static void null_config_discard(struct nullb *nullb) +static void null_config_discard(struct nullb *nullb, struct queue_limits *lim) { if (nullb->dev->discard == false) return; @@ -1880,43 +1701,14 @@ static void null_config_discard(struct nullb *nullb) return; } - blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); + lim->max_hw_discard_sectors = UINT_MAX >> 9; } -static const struct block_device_operations null_bio_ops = { +static const struct block_device_operations null_ops = { .owner = THIS_MODULE, - .submit_bio = null_submit_bio, .report_zones = null_report_zones, }; -static const struct block_device_operations null_rq_ops = { - .owner = THIS_MODULE, - .report_zones = null_report_zones, -}; - -static int setup_commands(struct nullb_queue *nq) -{ - struct nullb_cmd *cmd; - int i; - - nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL); - if (!nq->cmds) - return -ENOMEM; - - nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL); - if (!nq->tag_map) { - kfree(nq->cmds); - return -ENOMEM; - } - - for (i = 0; i < nq->queue_depth; i++) { - cmd = &nq->cmds[i]; - cmd->tag = -1U; - } - - return 0; -} - static int setup_queues(struct nullb *nullb) { int nqueues = nr_cpu_ids; @@ -1929,101 +1721,66 @@ static int setup_queues(struct nullb *nullb) if (!nullb->queues) return -ENOMEM; - nullb->queue_depth = nullb->dev->hw_queue_depth; return 0; } -static int init_driver_queues(struct nullb *nullb) +static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues) { - struct nullb_queue *nq; - int i, ret = 0; - - for (i = 0; i < nullb->dev->submit_queues; i++) { - nq = &nullb->queues[i]; - - null_init_queue(nullb, nq); - - ret = setup_commands(nq); - if (ret) - return ret; - nullb->nr_queues++; + set->ops = &null_mq_ops; + set->cmd_size = sizeof(struct nullb_cmd); + set->timeout = 5 * HZ; + set->nr_maps = 1; + if (poll_queues) { + set->nr_hw_queues += poll_queues; + set->nr_maps += 2; } - return 0; + return blk_mq_alloc_tag_set(set); } -static int null_gendisk_register(struct nullb *nullb) +static int null_init_global_tag_set(void) { - sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT; - struct gendisk *disk = nullb->disk; - - set_capacity(disk, size); + int error; - disk->major = null_major; - disk->first_minor = nullb->index; - disk->minors = 1; - if (queue_is_mq(nullb->q)) - disk->fops = &null_rq_ops; - else - disk->fops = &null_bio_ops; - disk->private_data = nullb; - strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); - - if (nullb->dev->zoned) { - int ret = null_register_zoned_dev(nullb); + if (tag_set.ops) + return 0; - if (ret) - return ret; - } + tag_set.nr_hw_queues = g_submit_queues; + tag_set.queue_depth = g_hw_queue_depth; + tag_set.numa_node = g_home_node; + tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + if (g_no_sched) + tag_set.flags |= BLK_MQ_F_NO_SCHED; + if (g_shared_tag_bitmap) + tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED; + if (g_blocking) + tag_set.flags |= BLK_MQ_F_BLOCKING; - return add_disk(disk); + error = null_init_tag_set(&tag_set, g_poll_queues); + if (error) + tag_set.ops = NULL; + return error; } -static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) +static int null_setup_tagset(struct nullb *nullb) { - unsigned int flags = BLK_MQ_F_SHOULD_MERGE; - int hw_queues, numa_node; - unsigned int queue_depth; - int poll_queues; - - if (nullb) { - hw_queues = nullb->dev->submit_queues; - poll_queues = nullb->dev->poll_queues; - queue_depth = nullb->dev->hw_queue_depth; - numa_node = nullb->dev->home_node; - if (nullb->dev->no_sched) - flags |= BLK_MQ_F_NO_SCHED; - if (nullb->dev->shared_tag_bitmap) - flags |= BLK_MQ_F_TAG_HCTX_SHARED; - if (nullb->dev->blocking) - flags |= BLK_MQ_F_BLOCKING; - } else { - hw_queues = g_submit_queues; - poll_queues = g_poll_queues; - queue_depth = g_hw_queue_depth; - numa_node = g_home_node; - if (g_no_sched) - flags |= BLK_MQ_F_NO_SCHED; - if (g_shared_tag_bitmap) - flags |= BLK_MQ_F_TAG_HCTX_SHARED; - if (g_blocking) - flags |= BLK_MQ_F_BLOCKING; - } - - set->ops = &null_mq_ops; - set->cmd_size = sizeof(struct nullb_cmd); - set->flags = flags; - set->driver_data = nullb; - set->nr_hw_queues = hw_queues; - set->queue_depth = queue_depth; - set->numa_node = numa_node; - if (poll_queues) { - set->nr_hw_queues += poll_queues; - set->nr_maps = 3; - } else { - set->nr_maps = 1; + if (nullb->dev->shared_tags) { + nullb->tag_set = &tag_set; + return null_init_global_tag_set(); } - return blk_mq_alloc_tag_set(set); + nullb->tag_set = &nullb->__tag_set; + nullb->tag_set->driver_data = nullb; + nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues; + nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth; + nullb->tag_set->numa_node = nullb->dev->home_node; + nullb->tag_set->flags = BLK_MQ_F_SHOULD_MERGE; + if (nullb->dev->no_sched) + nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED; + if (nullb->dev->shared_tag_bitmap) + nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; + if (nullb->dev->blocking) + nullb->tag_set->flags |= BLK_MQ_F_BLOCKING; + return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues); } static int null_validate_conf(struct nullb_device *dev) @@ -2032,11 +1789,15 @@ static int null_validate_conf(struct nullb_device *dev) pr_err("legacy IO path is no longer available\n"); return -EINVAL; } + if (dev->queue_mode == NULL_Q_BIO) { + pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n"); + dev->queue_mode = NULL_Q_MQ; + } dev->blocksize = round_down(dev->blocksize, 512); dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); - if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) { + if (dev->use_per_node_hctx) { if (dev->submit_queues != nr_online_nodes) dev->submit_queues = nr_online_nodes; } else if (dev->submit_queues > nr_cpu_ids) @@ -2048,8 +1809,6 @@ static int null_validate_conf(struct nullb_device *dev) if (dev->poll_queues > g_poll_queues) dev->poll_queues = g_poll_queues; dev->prev_poll_queues = dev->poll_queues; - - dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); /* Do memory allocation, so set blocking */ @@ -2060,9 +1819,6 @@ static int null_validate_conf(struct nullb_device *dev) dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024, dev->cache_size); dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps); - /* can not stop a queue */ - if (dev->queue_mode == NULL_Q_BIO) - dev->mbps = 0; if (dev->zoned && (!dev->zone_size || !is_power_of_2(dev->zone_size))) { @@ -2102,6 +1858,12 @@ static bool null_setup_fault(void) static int null_add_dev(struct nullb_device *dev) { + struct queue_limits lim = { + .logical_block_size = dev->blocksize, + .physical_block_size = dev->blocksize, + .max_hw_sectors = dev->max_sectors, + }; + struct nullb *nullb; int rv; @@ -2123,36 +1885,25 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_free_nullb; - if (dev->queue_mode == NULL_Q_MQ) { - if (shared_tags) { - nullb->tag_set = &tag_set; - rv = 0; - } else { - nullb->tag_set = &nullb->__tag_set; - rv = null_init_tag_set(nullb, nullb->tag_set); - } + rv = null_setup_tagset(nullb); + if (rv) + goto out_cleanup_queues; + if (dev->virt_boundary) + lim.virt_boundary_mask = PAGE_SIZE - 1; + null_config_discard(nullb, &lim); + if (dev->zoned) { + rv = null_init_zoned_dev(dev, &lim); if (rv) - goto out_cleanup_queues; - - nullb->tag_set->timeout = 5 * HZ; - nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb); - if (IS_ERR(nullb->disk)) { - rv = PTR_ERR(nullb->disk); goto out_cleanup_tags; - } - nullb->q = nullb->disk->queue; - } else if (dev->queue_mode == NULL_Q_BIO) { - rv = -ENOMEM; - nullb->disk = blk_alloc_disk(nullb->dev->home_node); - if (!nullb->disk) - goto out_cleanup_queues; + } - nullb->q = nullb->disk->queue; - rv = init_driver_queues(nullb); - if (rv) - goto out_cleanup_disk; + nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb); + if (IS_ERR(nullb->disk)) { + rv = PTR_ERR(nullb->disk); + goto out_cleanup_zone; } + nullb->q = nullb->disk->queue; if (dev->mbps) { set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags); @@ -2164,12 +1915,6 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_write_cache(nullb->q, true, true); } - if (dev->zoned) { - rv = null_init_zoned_dev(dev, nullb->q); - if (rv) - goto out_cleanup_disk; - } - nullb->q->queuedata = nullb; blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); @@ -2177,22 +1922,12 @@ static int null_add_dev(struct nullb_device *dev) rv = ida_alloc(&nullb_indexes, GFP_KERNEL); if (rv < 0) { mutex_unlock(&lock); - goto out_cleanup_zone; + goto out_cleanup_disk; } nullb->index = rv; dev->index = rv; mutex_unlock(&lock); - blk_queue_logical_block_size(nullb->q, dev->blocksize); - blk_queue_physical_block_size(nullb->q, dev->blocksize); - if (dev->max_sectors) - blk_queue_max_hw_sectors(nullb->q, dev->max_sectors); - - if (dev->virt_boundary) - blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1); - - null_config_discard(nullb); - if (config_item_name(&dev->group.cg_item)) { /* Use configfs dir name as the device name */ snprintf(nullb->disk_name, sizeof(nullb->disk_name), @@ -2201,7 +1936,22 @@ static int null_add_dev(struct nullb_device *dev) sprintf(nullb->disk_name, "nullb%d", nullb->index); } - rv = null_gendisk_register(nullb); + set_capacity(nullb->disk, + ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT); + nullb->disk->major = null_major; + nullb->disk->first_minor = nullb->index; + nullb->disk->minors = 1; + nullb->disk->fops = &null_ops; + nullb->disk->private_data = nullb; + strscpy_pad(nullb->disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + + if (nullb->dev->zoned) { + rv = null_register_zoned_dev(nullb); + if (rv) + goto out_ida_free; + } + + rv = add_disk(nullb->disk); if (rv) goto out_ida_free; @@ -2215,15 +1965,15 @@ static int null_add_dev(struct nullb_device *dev) out_ida_free: ida_free(&nullb_indexes, nullb->index); -out_cleanup_zone: - null_free_zoned_dev(dev); out_cleanup_disk: put_disk(nullb->disk); +out_cleanup_zone: + null_free_zoned_dev(dev); out_cleanup_tags: - if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) + if (nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); out_cleanup_queues: - cleanup_queues(nullb); + kfree(nullb->queues); out_free_nullb: kfree(nullb); dev->nullb = NULL; @@ -2299,7 +2049,7 @@ static int __init null_init(void) return -EINVAL; } - if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { + if (g_use_per_node_hctx) { if (g_submit_queues != nr_online_nodes) { pr_warn("submit_queues param is set to %u.\n", nr_online_nodes); @@ -2311,18 +2061,12 @@ static int __init null_init(void) g_submit_queues = 1; } - if (g_queue_mode == NULL_Q_MQ && shared_tags) { - ret = null_init_tag_set(NULL, &tag_set); - if (ret) - return ret; - } - config_group_init(&nullb_subsys.su_group); mutex_init(&nullb_subsys.su_mutex); ret = configfs_register_subsystem(&nullb_subsys); if (ret) - goto err_tagset; + return ret; mutex_init(&lock); @@ -2349,9 +2093,6 @@ err_dev: unregister_blkdev(null_major, "nullb"); err_conf: configfs_unregister_subsystem(&nullb_subsys); -err_tagset: - if (g_queue_mode == NULL_Q_MQ && shared_tags) - blk_mq_free_tag_set(&tag_set); return ret; } @@ -2370,10 +2111,8 @@ static void __exit null_exit(void) } mutex_unlock(&lock); - if (g_queue_mode == NULL_Q_MQ && shared_tags) + if (tag_set.ops) blk_mq_free_tag_set(&tag_set); - - mutex_destroy(&lock); } module_init(null_init); diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 929f659dd..477b97746 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -16,11 +16,6 @@ #include struct nullb_cmd { - union { - struct request *rq; - struct bio *bio; - }; - unsigned int tag; blk_status_t error; bool fake_timeout; struct nullb_queue *nq; @@ -28,16 +23,11 @@ struct nullb_cmd { }; struct nullb_queue { - unsigned long *tag_map; - wait_queue_head_t wait; - unsigned int queue_depth; struct nullb_device *dev; unsigned int requeue_selection; struct list_head poll_list; spinlock_t poll_lock; - - struct nullb_cmd *cmds; }; struct nullb_zone { @@ -60,13 +50,6 @@ struct nullb_zone { unsigned int capacity; }; -/* Queue modes */ -enum { - NULL_Q_BIO = 0, - NULL_Q_RQ = 1, - NULL_Q_MQ = 2, -}; - struct nullb_device { struct nullb *nullb; struct config_group group; @@ -119,6 +102,7 @@ struct nullb_device { bool zoned; /* if device is zoned */ bool virt_boundary; /* virtual boundary on/off for the device */ bool no_sched; /* no IO scheduler for the device */ + bool shared_tags; /* share tag set between devices for blk-mq */ bool shared_tag_bitmap; /* use hostwide shared tags */ }; @@ -130,14 +114,12 @@ struct nullb { struct gendisk *disk; struct blk_mq_tag_set *tag_set; struct blk_mq_tag_set __tag_set; - unsigned int queue_depth; atomic_long_t cur_bytes; struct hrtimer bw_timer; unsigned long cache_flush_pos; spinlock_t lock; struct nullb_queue *queues; - unsigned int nr_queues; char disk_name[DISK_NAME_LEN]; }; @@ -147,7 +129,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, sector_t sector, unsigned int nr_sectors); #ifdef CONFIG_BLK_DEV_ZONED -int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q); +int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim); int null_register_zoned_dev(struct nullb *nullb); void null_free_zoned_dev(struct nullb_device *dev); int null_report_zones(struct gendisk *disk, sector_t sector, @@ -160,7 +142,7 @@ ssize_t zone_cond_store(struct nullb_device *dev, const char *page, size_t count, enum blk_zone_cond cond); #else static inline int null_init_zoned_dev(struct nullb_device *dev, - struct request_queue *q) + struct queue_limits *lim) { pr_err("CONFIG_BLK_DEV_ZONED not enabled\n"); return -EINVAL; diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h index 6b2b370e7..ef2d05d5f 100644 --- a/drivers/block/null_blk/trace.h +++ b/drivers/block/null_blk/trace.h @@ -41,10 +41,11 @@ TRACE_EVENT(nullb_zone_op, __field(unsigned int, zone_cond) ), TP_fast_assign( - __entry->op = req_op(cmd->rq); + __entry->op = req_op(blk_mq_rq_from_pdu(cmd)); __entry->zone_no = zone_no; __entry->zone_cond = zone_cond; - __assign_disk_name(__entry->disk, cmd->rq->q->disk); + __assign_disk_name(__entry->disk, + blk_mq_rq_from_pdu(cmd)->q->disk); ), TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s", __print_disk_name(__entry->disk), diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 6f5e09948..1689e2584 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -58,7 +58,8 @@ static inline void null_unlock_zone(struct nullb_device *dev, mutex_unlock(&zone->mutex); } -int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) +int null_init_zoned_dev(struct nullb_device *dev, + struct queue_limits *lim) { sector_t dev_capacity_sects, zone_capacity_sects; struct nullb_zone *zone; @@ -151,27 +152,22 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) sector += dev->zone_size_sects; } + lim->zoned = true; + lim->chunk_sectors = dev->zone_size_sects; + lim->max_zone_append_sectors = dev->zone_size_sects; + lim->max_open_zones = dev->zone_max_open; + lim->max_active_zones = dev->zone_max_active; return 0; } int null_register_zoned_dev(struct nullb *nullb) { - struct nullb_device *dev = nullb->dev; struct request_queue *q = nullb->q; - disk_set_zoned(nullb->disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); - blk_queue_chunk_sectors(q, dev->zone_size_sects); nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0); - blk_queue_max_zone_append_sectors(q, dev->zone_size_sects); - disk_set_max_open_zones(nullb->disk, dev->zone_max_open); - disk_set_max_active_zones(nullb->disk, dev->zone_max_active); - - if (queue_is_mq(q)) - return blk_revalidate_disk_zones(nullb->disk, NULL); - - return 0; + return blk_revalidate_disk_zones(nullb->disk, NULL); } void null_free_zoned_dev(struct nullb_device *dev) @@ -394,10 +390,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, */ if (append) { sector = zone->wp; - if (dev->queue_mode == NULL_Q_MQ) - cmd->rq->__sector = sector; - else - cmd->bio->bi_iter.bi_sector = sector; + blk_mq_rq_from_pdu(cmd)->__sector = sector; } else if (sector != zone->wp) { ret = BLK_STS_IOERR; goto unlock; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index d56d972aa..21728e9ea 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -340,8 +340,8 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu n += sysfs_emit_at(data, n, "%s %u:%u %u:%u\n", pd->disk->disk_name, MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev), - MAJOR(pd->bdev_handle->bdev->bd_dev), - MINOR(pd->bdev_handle->bdev->bd_dev)); + MAJOR(file_bdev(pd->bdev_file)->bd_dev), + MINOR(file_bdev(pd->bdev_file)->bd_dev)); } mutex_unlock(&ctl_mutex); return n; @@ -438,7 +438,7 @@ static int pkt_seq_show(struct seq_file *m, void *p) int states[PACKET_NUM_STATES]; seq_printf(m, "Writer %s mapped to %pg:\n", pd->disk->disk_name, - pd->bdev_handle->bdev); + file_bdev(pd->bdev_file)); seq_printf(m, "\nSettings:\n"); seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2); @@ -715,7 +715,7 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod */ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) { - struct request_queue *q = bdev_get_queue(pd->bdev_handle->bdev); + struct request_queue *q = bdev_get_queue(file_bdev(pd->bdev_file)); struct scsi_cmnd *scmd; struct request *rq; int ret = 0; @@ -828,6 +828,12 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, */ static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio) { + /* + * Some CDRW drives can not handle writes larger than one packet, + * even if the size is a multiple of the packet size. + */ + bio->bi_opf |= REQ_NOMERGE; + spin_lock(&pd->iosched.lock); if (bio_data_dir(bio) == READ) bio_list_add(&pd->iosched.read_queue, bio); @@ -1048,7 +1054,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) continue; bio = pkt->r_bios[f]; - bio_init(bio, pd->bdev_handle->bdev, bio->bi_inline_vecs, 1, + bio_init(bio, file_bdev(pd->bdev_file), bio->bi_inline_vecs, 1, REQ_OP_READ); bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); bio->bi_end_io = pkt_end_io_read; @@ -1264,7 +1270,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) struct device *ddev = disk_to_dev(pd->disk); int f; - bio_init(pkt->w_bio, pd->bdev_handle->bdev, pkt->w_bio->bi_inline_vecs, + bio_init(pkt->w_bio, file_bdev(pd->bdev_file), pkt->w_bio->bi_inline_vecs, pkt->frames, REQ_OP_WRITE); pkt->w_bio->bi_iter.bi_sector = pkt->sector; pkt->w_bio->bi_end_io = pkt_end_io_packet_write; @@ -2162,20 +2168,20 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) int ret; long lba; struct request_queue *q; - struct bdev_handle *bdev_handle; + struct file *bdev_file; /* * We need to re-open the cdrom device without O_NONBLOCK to be able * to read/write from/to it. It is already opened in O_NONBLOCK mode * so open should not fail. */ - bdev_handle = bdev_open_by_dev(pd->bdev_handle->bdev->bd_dev, + bdev_file = bdev_file_open_by_dev(file_bdev(pd->bdev_file)->bd_dev, BLK_OPEN_READ, pd, NULL); - if (IS_ERR(bdev_handle)) { - ret = PTR_ERR(bdev_handle); + if (IS_ERR(bdev_file)) { + ret = PTR_ERR(bdev_file); goto out; } - pd->open_bdev_handle = bdev_handle; + pd->f_open_bdev = bdev_file; ret = pkt_get_last_written(pd, &lba); if (ret) { @@ -2184,18 +2190,13 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) } set_capacity(pd->disk, lba << 2); - set_capacity_and_notify(pd->bdev_handle->bdev->bd_disk, lba << 2); + set_capacity_and_notify(file_bdev(pd->bdev_file)->bd_disk, lba << 2); - q = bdev_get_queue(pd->bdev_handle->bdev); + q = bdev_get_queue(file_bdev(pd->bdev_file)); if (write) { ret = pkt_open_write(pd); if (ret) goto out_putdev; - /* - * Some CDRW drives can not handle writes larger than one packet, - * even if the size is a multiple of the packet size. - */ - blk_queue_max_hw_sectors(q, pd->settings.size); set_bit(PACKET_WRITABLE, &pd->flags); } else { pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); @@ -2218,7 +2219,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) return 0; out_putdev: - bdev_release(bdev_handle); + fput(bdev_file); out: return ret; } @@ -2237,8 +2238,8 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_lock_door(pd, 0); pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); - bdev_release(pd->open_bdev_handle); - pd->open_bdev_handle = NULL; + fput(pd->f_open_bdev); + pd->f_open_bdev = NULL; pkt_shrink_pktlist(pd); } @@ -2326,7 +2327,7 @@ static void pkt_end_io_read_cloned(struct bio *bio) static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) { - struct bio *cloned_bio = bio_alloc_clone(pd->bdev_handle->bdev, bio, + struct bio *cloned_bio = bio_alloc_clone(file_bdev(pd->bdev_file), bio, GFP_NOIO, &pkt_bio_set); struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO); @@ -2338,9 +2339,9 @@ static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) pkt_queue_bio(pd, cloned_bio); } -static void pkt_make_request_write(struct request_queue *q, struct bio *bio) +static void pkt_make_request_write(struct bio *bio) { - struct pktcdvd_device *pd = q->queuedata; + struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data; sector_t zone; struct packet_data *pkt; int was_empty, blocked_bio; @@ -2432,7 +2433,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) static void pkt_submit_bio(struct bio *bio) { - struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata; + struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data; struct device *ddev = disk_to_dev(pd->disk); struct bio *split; @@ -2476,7 +2477,7 @@ static void pkt_submit_bio(struct bio *bio) split = bio; } - pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split); + pkt_make_request_write(split); } while (split != bio); return; @@ -2484,20 +2485,11 @@ end_io: bio_io_error(bio); } -static void pkt_init_queue(struct pktcdvd_device *pd) -{ - struct request_queue *q = pd->disk->queue; - - blk_queue_logical_block_size(q, CD_FRAMESIZE); - blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS); - q->queuedata = pd; -} - static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) { struct device *ddev = disk_to_dev(pd->disk); int i; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct scsi_device *sdev; if (pd->pkt_dev == dev) { @@ -2508,9 +2500,9 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) struct pktcdvd_device *pd2 = pkt_devs[i]; if (!pd2) continue; - if (pd2->bdev_handle->bdev->bd_dev == dev) { + if (file_bdev(pd2->bdev_file)->bd_dev == dev) { dev_err(ddev, "%pg already setup\n", - pd2->bdev_handle->bdev); + file_bdev(pd2->bdev_file)); return -EBUSY; } if (pd2->pkt_dev == dev) { @@ -2519,13 +2511,13 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) } } - bdev_handle = bdev_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY, + bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY, NULL, NULL); - if (IS_ERR(bdev_handle)) - return PTR_ERR(bdev_handle); - sdev = scsi_device_from_queue(bdev_handle->bdev->bd_disk->queue); + if (IS_ERR(bdev_file)) + return PTR_ERR(bdev_file); + sdev = scsi_device_from_queue(file_bdev(bdev_file)->bd_disk->queue); if (!sdev) { - bdev_release(bdev_handle); + fput(bdev_file); return -EINVAL; } put_device(&sdev->sdev_gendev); @@ -2533,10 +2525,8 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - pd->bdev_handle = bdev_handle; - set_blocksize(bdev_handle->bdev, CD_FRAMESIZE); - - pkt_init_queue(pd); + pd->bdev_file = bdev_file; + set_blocksize(file_bdev(bdev_file), CD_FRAMESIZE); atomic_set(&pd->cdrw.pending_bios, 0); pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name); @@ -2546,11 +2536,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) } proc_create_single_data(pd->disk->disk_name, 0, pkt_proc, pkt_seq_show, pd); - dev_notice(ddev, "writer mapped to %pg\n", bdev_handle->bdev); + dev_notice(ddev, "writer mapped to %pg\n", file_bdev(bdev_file)); return 0; out_mem: - bdev_release(bdev_handle); + fput(bdev_file); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return -ENOMEM; @@ -2605,9 +2595,9 @@ static unsigned int pkt_check_events(struct gendisk *disk, if (!pd) return 0; - if (!pd->bdev_handle) + if (!pd->bdev_file) return 0; - attached_disk = pd->bdev_handle->bdev->bd_disk; + attached_disk = file_bdev(pd->bdev_file)->bd_disk; if (!attached_disk || !attached_disk->fops->check_events) return 0; return attached_disk->fops->check_events(attached_disk, clearing); @@ -2634,6 +2624,10 @@ static const struct block_device_operations pktcdvd_ops = { */ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) { + struct queue_limits lim = { + .max_hw_sectors = PACKET_MAX_SECTORS, + .logical_block_size = CD_FRAMESIZE, + }; int idx; int ret = -ENOMEM; struct pktcdvd_device *pd; @@ -2673,10 +2667,11 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->write_congestion_on = write_congestion_on; pd->write_congestion_off = write_congestion_off; - ret = -ENOMEM; - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); + if (IS_ERR(disk)) { + ret = PTR_ERR(disk); goto out_mem; + } pd->disk = disk; disk->major = pktdev_major; disk->first_minor = idx; @@ -2692,7 +2687,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) goto out_mem2; /* inherit events of the host device */ - disk->events = pd->bdev_handle->bdev->bd_disk->events; + disk->events = file_bdev(pd->bdev_file)->bd_disk->events; ret = add_disk(disk); if (ret) @@ -2757,7 +2752,7 @@ static int pkt_remove_dev(dev_t pkt_dev) pkt_debugfs_dev_remove(pd); pkt_sysfs_dev_remove(pd); - bdev_release(pd->bdev_handle); + fput(pd->bdev_file); remove_proc_entry(pd->disk->disk_name, pkt_proc); dev_notice(ddev, "writer unmapped\n"); @@ -2784,7 +2779,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd) pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index); if (pd) { - ctrl_cmd->dev = new_encode_dev(pd->bdev_handle->bdev->bd_dev); + ctrl_cmd->dev = new_encode_dev(file_bdev(pd->bdev_file)->bd_dev); ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev); } else { ctrl_cmd->dev = 0; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 36d7b36c6..b810ac0a5 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -382,6 +382,14 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) struct ps3disk_private *priv; int error; unsigned int devidx; + struct queue_limits lim = { + .logical_block_size = dev->blk_size, + .max_hw_sectors = dev->bounce_size >> 9, + .max_segments = -1, + .max_segment_size = dev->bounce_size, + .dma_alignment = dev->blk_size - 1, + }; + struct request_queue *queue; struct gendisk *gendisk; @@ -431,7 +439,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) if (error) goto fail_teardown; - gendisk = blk_mq_alloc_disk(&priv->tag_set, dev); + gendisk = blk_mq_alloc_disk(&priv->tag_set, &lim, dev); if (IS_ERR(gendisk)) { dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n", __func__, __LINE__); @@ -441,15 +449,8 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) queue = gendisk->queue; - blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9); - blk_queue_dma_alignment(queue, dev->blk_size-1); - blk_queue_logical_block_size(queue, dev->blk_size); - blk_queue_write_cache(queue, true, false); - blk_queue_max_segments(queue, -1); - blk_queue_max_segment_size(queue, dev->bounce_size); - priv->gendisk = gendisk; gendisk->major = ps3disk_major; gendisk->first_minor = devidx * PS3DISK_MINORS; diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 38d42af01..bdcf083b4 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -730,10 +730,10 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) ps3vram_proc_init(dev); - gendisk = blk_alloc_disk(NUMA_NO_NODE); - if (!gendisk) { + gendisk = blk_alloc_disk(NULL, NUMA_NO_NODE); + if (IS_ERR(gendisk)) { dev_err(&dev->core, "blk_alloc_disk failed\n"); - error = -ENOMEM; + error = PTR_ERR(gendisk); goto out_cache_cleanup; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 12b5d53ec..26ff5cd2b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -575,7 +575,7 @@ static const struct attribute_group rbd_bus_group = { }; __ATTRIBUTE_GROUPS(rbd_bus); -static struct bus_type rbd_bus_type = { +static const struct bus_type rbd_bus_type = { .name = "rbd", .bus_groups = rbd_bus_groups, }; @@ -4952,6 +4952,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) struct request_queue *q; unsigned int objset_bytes = rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; + struct queue_limits lim = { + .max_hw_sectors = objset_bytes >> SECTOR_SHIFT, + .max_user_sectors = objset_bytes >> SECTOR_SHIFT, + .io_min = rbd_dev->opts->alloc_size, + .io_opt = rbd_dev->opts->alloc_size, + .max_segments = USHRT_MAX, + .max_segment_size = UINT_MAX, + }; int err; memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); @@ -4966,7 +4974,13 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) if (err) return err; - disk = blk_mq_alloc_disk(&rbd_dev->tag_set, rbd_dev); + if (rbd_dev->opts->trim) { + lim.discard_granularity = rbd_dev->opts->alloc_size; + lim.max_hw_discard_sectors = objset_bytes >> SECTOR_SHIFT; + lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT; + } + + disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_tag_set; @@ -4987,19 +5001,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ - blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT); - q->limits.max_sectors = queue_max_hw_sectors(q); - blk_queue_max_segments(q, USHRT_MAX); - blk_queue_max_segment_size(q, UINT_MAX); - blk_queue_io_min(q, rbd_dev->opts->alloc_size); - blk_queue_io_opt(q, rbd_dev->opts->alloc_size); - - if (rbd_dev->opts->trim) { - q->limits.discard_granularity = rbd_dev->opts->alloc_size; - blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); - blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); - } - if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 4044c369d..b7ffe03c6 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1329,43 +1329,6 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev) } } -static void setup_request_queue(struct rnbd_clt_dev *dev, - struct rnbd_msg_open_rsp *rsp) -{ - blk_queue_logical_block_size(dev->queue, - le16_to_cpu(rsp->logical_block_size)); - blk_queue_physical_block_size(dev->queue, - le16_to_cpu(rsp->physical_block_size)); - blk_queue_max_hw_sectors(dev->queue, - dev->sess->max_io_size / SECTOR_SIZE); - - /* - * we don't support discards to "discontiguous" segments - * in on request - */ - blk_queue_max_discard_segments(dev->queue, 1); - - blk_queue_max_discard_sectors(dev->queue, - le32_to_cpu(rsp->max_discard_sectors)); - dev->queue->limits.discard_granularity = - le32_to_cpu(rsp->discard_granularity); - dev->queue->limits.discard_alignment = - le32_to_cpu(rsp->discard_alignment); - if (le16_to_cpu(rsp->secure_discard)) - blk_queue_max_secure_erase_sectors(dev->queue, - le32_to_cpu(rsp->max_discard_sectors)); - blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); - blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); - blk_queue_max_segments(dev->queue, dev->sess->max_segments); - blk_queue_io_opt(dev->queue, dev->sess->max_io_size); - blk_queue_virt_boundary(dev->queue, SZ_4K - 1); - blk_queue_write_cache(dev->queue, - !!(rsp->cache_policy & RNBD_WRITEBACK), - !!(rsp->cache_policy & RNBD_FUA)); - blk_queue_max_write_zeroes_sectors(dev->queue, - le32_to_cpu(rsp->max_write_zeroes_sectors)); -} - static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, struct rnbd_msg_open_rsp *rsp, int idx) { @@ -1403,18 +1366,41 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, static int rnbd_client_setup_device(struct rnbd_clt_dev *dev, struct rnbd_msg_open_rsp *rsp) { + struct queue_limits lim = { + .logical_block_size = le16_to_cpu(rsp->logical_block_size), + .physical_block_size = le16_to_cpu(rsp->physical_block_size), + .io_opt = dev->sess->max_io_size, + .max_hw_sectors = dev->sess->max_io_size / SECTOR_SIZE, + .max_hw_discard_sectors = le32_to_cpu(rsp->max_discard_sectors), + .discard_granularity = le32_to_cpu(rsp->discard_granularity), + .discard_alignment = le32_to_cpu(rsp->discard_alignment), + .max_segments = dev->sess->max_segments, + .virt_boundary_mask = SZ_4K - 1, + .max_write_zeroes_sectors = + le32_to_cpu(rsp->max_write_zeroes_sectors), + }; int idx = dev->clt_device_id; dev->size = le64_to_cpu(rsp->nsectors) * le16_to_cpu(rsp->logical_block_size); - dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev); + if (rsp->secure_discard) { + lim.max_secure_erase_sectors = + le32_to_cpu(rsp->max_discard_sectors); + } + + dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, &lim, dev); if (IS_ERR(dev->gd)) return PTR_ERR(dev->gd); dev->queue = dev->gd->queue; rnbd_init_mq_hw_queues(dev); - setup_request_queue(dev, rsp); + blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); + blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); + blk_queue_write_cache(dev->queue, + !!(rsp->cache_policy & RNBD_WRITEBACK), + !!(rsp->cache_policy & RNBD_FUA)); + return rnbd_clt_setup_gen_disk(dev, rsp, idx); } diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 3a0d5dcec..f6e3a3c4b 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -145,7 +145,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, priv->sess_dev = sess_dev; priv->id = id; - bio = bio_alloc(sess_dev->bdev_handle->bdev, 1, + bio = bio_alloc(file_bdev(sess_dev->bdev_file), 1, rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL); if (bio_add_page(bio, virt_to_page(data), datalen, offset_in_page(data)) != datalen) { @@ -219,7 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id) rnbd_put_sess_dev(sess_dev); wait_for_completion(&dc); /* wait for inflights to drop to zero */ - bdev_release(sess_dev->bdev_handle); + fput(sess_dev->bdev_file); mutex_lock(&sess_dev->dev->lock); list_del(&sess_dev->dev_list); if (!sess_dev->readonly) @@ -534,7 +534,7 @@ rnbd_srv_get_or_create_srv_dev(struct block_device *bdev, static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, struct rnbd_srv_sess_dev *sess_dev) { - struct block_device *bdev = sess_dev->bdev_handle->bdev; + struct block_device *bdev = file_bdev(sess_dev->bdev_file); rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP); rsp->device_id = cpu_to_le32(sess_dev->device_id); @@ -560,7 +560,7 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, static struct rnbd_srv_sess_dev * rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess, const struct rnbd_msg_open *open_msg, - struct bdev_handle *handle, bool readonly, + struct file *bdev_file, bool readonly, struct rnbd_srv_dev *srv_dev) { struct rnbd_srv_sess_dev *sdev = rnbd_sess_dev_alloc(srv_sess); @@ -572,7 +572,7 @@ rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess, strscpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname)); - sdev->bdev_handle = handle; + sdev->bdev_file = bdev_file; sdev->sess = srv_sess; sdev->dev = srv_dev; sdev->readonly = readonly; @@ -678,7 +678,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, struct rnbd_srv_dev *srv_dev; struct rnbd_srv_sess_dev *srv_sess_dev; const struct rnbd_msg_open *open_msg = msg; - struct bdev_handle *bdev_handle; + struct file *bdev_file; blk_mode_t open_flags = BLK_OPEN_READ; char *full_path; struct rnbd_msg_open_rsp *rsp = data; @@ -716,15 +716,15 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, goto reject; } - bdev_handle = bdev_open_by_path(full_path, open_flags, NULL, NULL); - if (IS_ERR(bdev_handle)) { - ret = PTR_ERR(bdev_handle); + bdev_file = bdev_file_open_by_path(full_path, open_flags, NULL, NULL); + if (IS_ERR(bdev_file)) { + ret = PTR_ERR(bdev_file); pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %pe\n", - full_path, srv_sess->sessname, bdev_handle); + full_path, srv_sess->sessname, bdev_file); goto free_path; } - srv_dev = rnbd_srv_get_or_create_srv_dev(bdev_handle->bdev, srv_sess, + srv_dev = rnbd_srv_get_or_create_srv_dev(file_bdev(bdev_file), srv_sess, open_msg->access_mode); if (IS_ERR(srv_dev)) { pr_err("Opening device '%s' on session %s failed, creating srv_dev failed, err: %pe\n", @@ -734,7 +734,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, } srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg, - bdev_handle, + bdev_file, open_msg->access_mode == RNBD_ACCESS_RO, srv_dev); if (IS_ERR(srv_sess_dev)) { @@ -750,7 +750,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, */ mutex_lock(&srv_dev->lock); if (!srv_dev->dev_kobj.state_in_sysfs) { - ret = rnbd_srv_create_dev_sysfs(srv_dev, bdev_handle->bdev); + ret = rnbd_srv_create_dev_sysfs(srv_dev, file_bdev(bdev_file)); if (ret) { mutex_unlock(&srv_dev->lock); rnbd_srv_err(srv_sess_dev, @@ -793,7 +793,7 @@ srv_dev_put: } rnbd_put_srv_dev(srv_dev); blkdev_put: - bdev_release(bdev_handle); + fput(bdev_file); free_path: kfree(full_path); reject: diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h index 343cc682b..18d873808 100644 --- a/drivers/block/rnbd/rnbd-srv.h +++ b/drivers/block/rnbd/rnbd-srv.h @@ -46,7 +46,7 @@ struct rnbd_srv_dev { struct rnbd_srv_sess_dev { /* Entry inside rnbd_srv_dev struct */ struct list_head dev_list; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct rnbd_srv_session *sess; struct rnbd_srv_dev *dev; struct kobject kobj; diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 7bf4b48e2..5286cb8e0 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -28,7 +28,7 @@ static char version[] = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; -MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_AUTHOR("David S. Miller "); MODULE_DESCRIPTION("Sun LDOM virtual disk client driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); @@ -784,6 +784,14 @@ static const struct blk_mq_ops vdc_mq_ops = { static int probe_disk(struct vdc_port *port) { + struct queue_limits lim = { + .physical_block_size = port->vdisk_phys_blksz, + .max_hw_sectors = port->max_xfer_size, + /* Each segment in a request is up to an aligned page in size. */ + .seg_boundary_mask = PAGE_SIZE - 1, + .max_segment_size = PAGE_SIZE, + .max_segments = port->ring_cookies, + }; struct request_queue *q; struct gendisk *g; int err; @@ -824,7 +832,7 @@ static int probe_disk(struct vdc_port *port) if (err) return err; - g = blk_mq_alloc_disk(&port->tag_set, port); + g = blk_mq_alloc_disk(&port->tag_set, &lim, port); if (IS_ERR(g)) { printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", port->vio.name); @@ -835,12 +843,6 @@ static int probe_disk(struct vdc_port *port) port->disk = g; q = g->queue; - /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(q, PAGE_SIZE - 1); - blk_queue_max_segment_size(q, PAGE_SIZE); - - blk_queue_max_segments(q, port->ring_cookies); - blk_queue_max_hw_sectors(q, port->max_xfer_size); g->major = vdc_major; g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT; g->minors = 1 << PARTITION_SHIFT; @@ -872,8 +874,6 @@ static int probe_disk(struct vdc_port *port) } } - blk_queue_physical_block_size(q, port->vdisk_phys_blksz); - pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n", g->disk_name, port->vdisk_size, (port->vdisk_size >> (20 - 9)), diff --git a/drivers/block/swim.c b/drivers/block/swim.c index f85b6af41..6731678f3 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -820,7 +820,7 @@ static int swim_floppy_init(struct swim_priv *swd) goto exit_put_disks; swd->unit[drive].disk = - blk_mq_alloc_disk(&swd->unit[drive].tag_set, + blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL, &swd->unit[drive]); if (IS_ERR(swd->unit[drive].disk)) { blk_mq_free_tag_set(&swd->unit[drive].tag_set); @@ -916,7 +916,7 @@ out: return ret; } -static int swim_remove(struct platform_device *dev) +static void swim_remove(struct platform_device *dev) { struct swim_priv *swd = platform_get_drvdata(dev); int drive; @@ -937,13 +937,11 @@ static int swim_remove(struct platform_device *dev) release_mem_region(res->start, resource_size(res)); kfree(swd); - - return 0; } static struct platform_driver swim_driver = { .probe = swim_probe, - .remove = swim_remove, + .remove_new = swim_remove, .driver = { .name = CARDNAME, }, diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index c2bc85826..a04756ac7 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1210,7 +1210,7 @@ static int swim3_attach(struct macio_dev *mdev, if (rc) goto out_unregister; - disk = blk_mq_alloc_disk(&fs->tag_set, fs); + disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs); if (IS_ERR(disk)) { rc = PTR_ERR(disk); goto out_free_tag_set; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 1dfb2e778..374e4efa8 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -246,21 +246,12 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) return 0; } -static int ublk_dev_param_zoned_apply(struct ublk_device *ub) +static void ublk_dev_param_zoned_apply(struct ublk_device *ub) { - const struct ublk_param_zoned *p = &ub->params.zoned; - - disk_set_zoned(ub->ub_disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue); blk_queue_required_elevator_features(ub->ub_disk->queue, ELEVATOR_F_ZBD_SEQ_WRITE); - disk_set_max_active_zones(ub->ub_disk, p->max_active_zones); - disk_set_max_open_zones(ub->ub_disk, p->max_open_zones); - blk_queue_max_zone_append_sectors(ub->ub_disk->queue, p->max_zone_append_sectors); - ub->ub_disk->nr_zones = ublk_get_nr_zones(ub); - - return 0; } /* Based on virtblk_alloc_report_buffer */ @@ -432,9 +423,8 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) return -EOPNOTSUPP; } -static int ublk_dev_param_zoned_apply(struct ublk_device *ub) +static void ublk_dev_param_zoned_apply(struct ublk_device *ub) { - return -EOPNOTSUPP; } static int ublk_revalidate_disk_zones(struct ublk_device *ub) @@ -498,11 +488,6 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub) struct request_queue *q = ub->ub_disk->queue; const struct ublk_param_basic *p = &ub->params.basic; - blk_queue_logical_block_size(q, 1 << p->logical_bs_shift); - blk_queue_physical_block_size(q, 1 << p->physical_bs_shift); - blk_queue_io_min(q, 1 << p->io_min_shift); - blk_queue_io_opt(q, 1 << p->io_opt_shift); - blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE, p->attrs & UBLK_ATTR_FUA); if (p->attrs & UBLK_ATTR_ROTATIONAL) @@ -510,29 +495,12 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub) else blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - blk_queue_max_hw_sectors(q, p->max_sectors); - blk_queue_chunk_sectors(q, p->chunk_sectors); - blk_queue_virt_boundary(q, p->virt_boundary_mask); - if (p->attrs & UBLK_ATTR_READ_ONLY) set_disk_ro(ub->ub_disk, true); set_capacity(ub->ub_disk, p->dev_sectors); } -static void ublk_dev_param_discard_apply(struct ublk_device *ub) -{ - struct request_queue *q = ub->ub_disk->queue; - const struct ublk_param_discard *p = &ub->params.discard; - - q->limits.discard_alignment = p->discard_alignment; - q->limits.discard_granularity = p->discard_granularity; - blk_queue_max_discard_sectors(q, p->max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, - p->max_write_zeroes_sectors); - blk_queue_max_discard_segments(q, p->max_discard_segments); -} - static int ublk_validate_params(const struct ublk_device *ub) { /* basic param is the only one which must be set */ @@ -576,20 +544,12 @@ static int ublk_validate_params(const struct ublk_device *ub) return 0; } -static int ublk_apply_params(struct ublk_device *ub) +static void ublk_apply_params(struct ublk_device *ub) { - if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC)) - return -EINVAL; - ublk_dev_param_basic_apply(ub); - if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) - ublk_dev_param_discard_apply(ub); - if (ub->params.types & UBLK_PARAM_TYPE_ZONED) - return ublk_dev_param_zoned_apply(ub); - - return 0; + ublk_dev_param_zoned_apply(ub); } static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) @@ -645,14 +605,16 @@ static inline bool ublk_need_get_data(const struct ublk_queue *ubq) return ubq->flags & UBLK_F_NEED_GET_DATA; } -static struct ublk_device *ublk_get_device(struct ublk_device *ub) +/* Called in slow path only, keep it noinline for trace purpose */ +static noinline struct ublk_device *ublk_get_device(struct ublk_device *ub) { if (kobject_get_unless_zero(&ub->cdev_dev.kobj)) return ub; return NULL; } -static void ublk_put_device(struct ublk_device *ub) +/* Called in slow path only, keep it noinline for trace purpose */ +static noinline void ublk_put_device(struct ublk_device *ub) { put_device(&ub->cdev_dev); } @@ -711,7 +673,7 @@ static void ublk_free_disk(struct gendisk *disk) struct ublk_device *ub = disk->private_data; clear_bit(UB_STATE_USED, &ub->state); - put_device(&ub->cdev_dev); + ublk_put_device(ub); } static void ublk_store_owner_uid_gid(unsigned int *owner_uid, @@ -2182,7 +2144,7 @@ static void ublk_remove(struct ublk_device *ub) cancel_work_sync(&ub->stop_work); cancel_work_sync(&ub->quiesce_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); - put_device(&ub->cdev_dev); + ublk_put_device(ub); ublks_added--; } @@ -2205,12 +2167,48 @@ static struct ublk_device *ublk_get_device_from_id(int idx) static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) { const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); + const struct ublk_param_basic *p = &ub->params.basic; int ublksrv_pid = (int)header->data[0]; + struct queue_limits lim = { + .logical_block_size = 1 << p->logical_bs_shift, + .physical_block_size = 1 << p->physical_bs_shift, + .io_min = 1 << p->io_min_shift, + .io_opt = 1 << p->io_opt_shift, + .max_hw_sectors = p->max_sectors, + .chunk_sectors = p->chunk_sectors, + .virt_boundary_mask = p->virt_boundary_mask, + .max_segments = USHRT_MAX, + .max_segment_size = UINT_MAX, + }; struct gendisk *disk; int ret = -EINVAL; if (ublksrv_pid <= 0) return -EINVAL; + if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC)) + return -EINVAL; + + if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) { + const struct ublk_param_discard *pd = &ub->params.discard; + + lim.discard_alignment = pd->discard_alignment; + lim.discard_granularity = pd->discard_granularity; + lim.max_hw_discard_sectors = pd->max_discard_sectors; + lim.max_write_zeroes_sectors = pd->max_write_zeroes_sectors; + lim.max_discard_segments = pd->max_discard_segments; + } + + if (ub->params.types & UBLK_PARAM_TYPE_ZONED) { + const struct ublk_param_zoned *p = &ub->params.zoned; + + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + return -EOPNOTSUPP; + + lim.zoned = true; + lim.max_active_zones = p->max_active_zones; + lim.max_open_zones = p->max_open_zones; + lim.max_zone_append_sectors = p->max_zone_append_sectors; + } if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; @@ -2222,7 +2220,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) goto out_unlock; } - disk = blk_mq_alloc_disk(&ub->tag_set, NULL); + disk = blk_mq_alloc_disk(&ub->tag_set, &lim, NULL); if (IS_ERR(disk)) { ret = PTR_ERR(disk); goto out_unlock; @@ -2234,15 +2232,13 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) ub->dev_info.ublksrv_pid = ublksrv_pid; ub->ub_disk = disk; - ret = ublk_apply_params(ub); - if (ret) - goto out_put_disk; + ublk_apply_params(ub); /* don't probe partitions if any one ubq daemon is un-trusted */ if (ub->nr_privileged_daemon != ub->nr_queues_ready) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); - get_device(&ub->cdev_dev); + ublk_get_device(ub); ub->dev_info.state = UBLK_S_DEV_LIVE; if (ublk_dev_is_zoned(ub)) { @@ -2262,7 +2258,6 @@ out_put_cdev: ub->dev_info.state = UBLK_S_DEV_DEAD; ublk_put_device(ub); } -out_put_disk: if (ret) put_disk(disk); out_unlock: @@ -2474,7 +2469,7 @@ static inline bool ublk_idr_freed(int id) return ptr == NULL; } -static int ublk_ctrl_del_dev(struct ublk_device **p_ub) +static int ublk_ctrl_del_dev(struct ublk_device **p_ub, bool wait) { struct ublk_device *ub = *p_ub; int idx = ub->ub_number; @@ -2508,7 +2503,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub) * - the device number is freed already, we will not find this * device via ublk_get_device_from_id() */ - if (wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx))) + if (wait && wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx))) return -EINTR; return 0; } @@ -2907,7 +2902,10 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, ret = ublk_ctrl_add_dev(cmd); break; case UBLK_CMD_DEL_DEV: - ret = ublk_ctrl_del_dev(&ub); + ret = ublk_ctrl_del_dev(&ub, true); + break; + case UBLK_U_CMD_DEL_DEV_ASYNC: + ret = ublk_ctrl_del_dev(&ub, false); break; case UBLK_CMD_GET_QUEUE_AFFINITY: ret = ublk_ctrl_get_queue_affinity(ub, cmd); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2bf14a0e2..42dea7601 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -720,25 +720,24 @@ fail_report: return ret; } -static int virtblk_probe_zoned_device(struct virtio_device *vdev, - struct virtio_blk *vblk, - struct request_queue *q) +static int virtblk_read_zoned_limits(struct virtio_blk *vblk, + struct queue_limits *lim) { + struct virtio_device *vdev = vblk->vdev; u32 v, wg; dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); - disk_set_zoned(vblk->disk); - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); + lim->zoned = true; virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); - disk_set_max_open_zones(vblk->disk, v); + lim->max_open_zones = v; dev_dbg(&vdev->dev, "max open zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, zoned.max_active_zones, &v); - disk_set_max_active_zones(vblk->disk, v); + lim->max_active_zones = v; dev_dbg(&vdev->dev, "max active zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, @@ -747,8 +746,8 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, dev_warn(&vdev->dev, "zero write granularity reported\n"); return -ENODEV; } - blk_queue_physical_block_size(q, wg); - blk_queue_io_min(q, wg); + lim->physical_block_size = wg; + lim->io_min = wg; dev_dbg(&vdev->dev, "write granularity = %u\n", wg); @@ -764,13 +763,13 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, vblk->zone_sectors); return -ENODEV; } - blk_queue_chunk_sectors(q, vblk->zone_sectors); + lim->chunk_sectors = vblk->zone_sectors; dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors); if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { dev_warn(&vblk->vdev->dev, "ignoring negotiated F_DISCARD for zoned device\n"); - blk_queue_max_discard_sectors(q, 0); + lim->max_hw_discard_sectors = 0; } virtio_cread(vdev, struct virtio_blk_config, @@ -785,25 +784,21 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, wg, v); return -ENODEV; } - blk_queue_max_zone_append_sectors(q, v); + lim->max_zone_append_sectors = v; dev_dbg(&vdev->dev, "max append sectors = %u\n", v); - return blk_revalidate_disk_zones(vblk->disk, NULL); + return 0; } - #else - /* - * Zoned block device support is not configured in this kernel. - * Host-managed zoned devices can't be supported, but others are - * good to go as regular block devices. + * Zoned block device support is not configured in this kernel, host-managed + * zoned devices can't be supported. */ #define virtblk_report_zones NULL - -static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, - struct virtio_blk *vblk, struct request_queue *q) +static inline int virtblk_read_zoned_limits(struct virtio_blk *vblk, + struct queue_limits *lim) { - dev_err(&vdev->dev, + dev_err(&vblk->vdev->dev, "virtio_blk: zoned devices are not supported"); return -EOPNOTSUPP; } @@ -1248,31 +1243,17 @@ static const struct blk_mq_ops virtio_mq_ops = { static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); -static int virtblk_probe(struct virtio_device *vdev) +static int virtblk_read_limits(struct virtio_blk *vblk, + struct queue_limits *lim) { - struct virtio_blk *vblk; - struct request_queue *q; - int err, index; - + struct virtio_device *vdev = vblk->vdev; u32 v, blk_size, max_size, sg_elems, opt_io_size; u32 max_discard_segs = 0; u32 discard_granularity = 0; u16 min_io_size; u8 physical_block_exp, alignment_offset; - unsigned int queue_depth; size_t max_dma_size; - - if (!vdev->config->get) { - dev_err(&vdev->dev, "%s failure: config access disabled\n", - __func__); - return -EINVAL; - } - - err = ida_alloc_range(&vd_index_ida, 0, - minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL); - if (err < 0) - goto out; - index = err; + int err; /* We need to know how many segments before we allocate. */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, @@ -1286,78 +1267,11 @@ static int virtblk_probe(struct virtio_device *vdev) /* Prevent integer overflows and honor max vq size */ sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); - vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); - if (!vblk) { - err = -ENOMEM; - goto out_free_index; - } - - mutex_init(&vblk->vdev_mutex); - - vblk->vdev = vdev; - - INIT_WORK(&vblk->config_work, virtblk_config_changed_work); - - err = init_vq(vblk); - if (err) - goto out_free_vblk; - - /* Default queue sizing is to fill the ring. */ - if (!virtblk_queue_depth) { - queue_depth = vblk->vqs[0].vq->num_free; - /* ... but without indirect descs, we use 2 descs per req */ - if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) - queue_depth /= 2; - } else { - queue_depth = virtblk_queue_depth; - } - - memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); - vblk->tag_set.ops = &virtio_mq_ops; - vblk->tag_set.queue_depth = queue_depth; - vblk->tag_set.numa_node = NUMA_NO_NODE; - vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - vblk->tag_set.cmd_size = - sizeof(struct virtblk_req) + - sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; - vblk->tag_set.driver_data = vblk; - vblk->tag_set.nr_hw_queues = vblk->num_vqs; - vblk->tag_set.nr_maps = 1; - if (vblk->io_queues[HCTX_TYPE_POLL]) - vblk->tag_set.nr_maps = 3; - - err = blk_mq_alloc_tag_set(&vblk->tag_set); - if (err) - goto out_free_vq; - - vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk); - if (IS_ERR(vblk->disk)) { - err = PTR_ERR(vblk->disk); - goto out_free_tags; - } - q = vblk->disk->queue; - - virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); - - vblk->disk->major = major; - vblk->disk->first_minor = index_to_minor(index); - vblk->disk->minors = 1 << PART_BITS; - vblk->disk->private_data = vblk; - vblk->disk->fops = &virtblk_fops; - vblk->index = index; - - /* configure queue flush support */ - virtblk_update_cache_mode(vdev); - - /* If disk is read-only in the host, the guest should obey */ - if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) - set_disk_ro(vblk->disk, 1); - /* We can handle whatever the host told us to handle. */ - blk_queue_max_segments(q, sg_elems); + lim->max_segments = sg_elems; /* No real sector limit. */ - blk_queue_max_hw_sectors(q, UINT_MAX); + lim->max_hw_sectors = UINT_MAX; max_dma_size = virtio_max_dma_size(vdev); max_size = max_dma_size > U32_MAX ? U32_MAX : max_dma_size; @@ -1369,7 +1283,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!err) max_size = min(max_size, v); - blk_queue_max_segment_size(q, max_size); + lim->max_segment_size = max_size; /* Host can optionally specify the block size of the device */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, @@ -1381,38 +1295,37 @@ static int virtblk_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "virtio_blk: invalid block size: 0x%x\n", blk_size); - goto out_cleanup_disk; + return err; } - blk_queue_logical_block_size(q, blk_size); + lim->logical_block_size = blk_size; } else - blk_size = queue_logical_block_size(q); + blk_size = lim->logical_block_size; /* Use topology information if available */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, physical_block_exp, &physical_block_exp); if (!err && physical_block_exp) - blk_queue_physical_block_size(q, - blk_size * (1 << physical_block_exp)); + lim->physical_block_size = blk_size * (1 << physical_block_exp); err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, alignment_offset, &alignment_offset); if (!err && alignment_offset) - blk_queue_alignment_offset(q, blk_size * alignment_offset); + lim->alignment_offset = blk_size * alignment_offset; err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, min_io_size, &min_io_size); if (!err && min_io_size) - blk_queue_io_min(q, blk_size * min_io_size); + lim->io_min = blk_size * min_io_size; err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, opt_io_size, &opt_io_size); if (!err && opt_io_size) - blk_queue_io_opt(q, blk_size * opt_io_size); + lim->io_opt = blk_size * opt_io_size; if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { virtio_cread(vdev, struct virtio_blk_config, @@ -1420,7 +1333,7 @@ static int virtblk_probe(struct virtio_device *vdev) virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v); - blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); + lim->max_hw_discard_sectors = v ? v : UINT_MAX; virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &max_discard_segs); @@ -1429,7 +1342,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { virtio_cread(vdev, struct virtio_blk_config, max_write_zeroes_sectors, &v); - blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); + lim->max_write_zeroes_sectors = v ? v : UINT_MAX; } /* The discard and secure erase limits are combined since the Linux @@ -1455,8 +1368,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!v) { dev_err(&vdev->dev, "virtio_blk: secure_erase_sector_alignment can't be 0\n"); - err = -EINVAL; - goto out_cleanup_disk; + return -EINVAL; } discard_granularity = min_not_zero(discard_granularity, v); @@ -1470,11 +1382,10 @@ static int virtblk_probe(struct virtio_device *vdev) if (!v) { dev_err(&vdev->dev, "virtio_blk: max_secure_erase_sectors can't be 0\n"); - err = -EINVAL; - goto out_cleanup_disk; + return -EINVAL; } - blk_queue_max_secure_erase_sectors(q, v); + lim->max_secure_erase_sectors = v; virtio_cread(vdev, struct virtio_blk_config, max_secure_erase_seg, &v); @@ -1485,8 +1396,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!v) { dev_err(&vdev->dev, "virtio_blk: max_secure_erase_seg can't be 0\n"); - err = -EINVAL; - goto out_cleanup_disk; + return -EINVAL; } max_discard_segs = min_not_zero(max_discard_segs, v); @@ -1502,45 +1412,142 @@ static int virtblk_probe(struct virtio_device *vdev) if (!max_discard_segs) max_discard_segs = sg_elems; - blk_queue_max_discard_segments(q, - min(max_discard_segs, MAX_DISCARD_SEGMENTS)); + lim->max_discard_segments = + min(max_discard_segs, MAX_DISCARD_SEGMENTS); if (discard_granularity) - q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT; + lim->discard_granularity = + discard_granularity << SECTOR_SHIFT; else - q->limits.discard_granularity = blk_size; + lim->discard_granularity = blk_size; } - virtblk_update_capacity(vblk, false); - virtio_device_ready(vdev); - - /* - * All steps that follow use the VQs therefore they need to be - * placed after the virtio_device_ready() call above. - */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) { u8 model; - virtio_cread(vdev, struct virtio_blk_config, zoned.model, - &model); + virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model); switch (model) { case VIRTIO_BLK_Z_NONE: case VIRTIO_BLK_Z_HA: - /* Present the host-aware device as non-zoned */ - break; + /* treat host-aware devices as non-zoned */ + return 0; case VIRTIO_BLK_Z_HM: - err = virtblk_probe_zoned_device(vdev, vblk, q); + err = virtblk_read_zoned_limits(vblk, lim); if (err) - goto out_cleanup_disk; + return err; break; default: - dev_err(&vdev->dev, "unsupported zone model %d\n", - model); - err = -EINVAL; - goto out_cleanup_disk; + dev_err(&vdev->dev, "unsupported zone model %d\n", model); + return -EINVAL; } } + return 0; +} + +static int virtblk_probe(struct virtio_device *vdev) +{ + struct virtio_blk *vblk; + struct queue_limits lim = { }; + int err, index; + unsigned int queue_depth; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + err = ida_alloc_range(&vd_index_ida, 0, + minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL); + if (err < 0) + goto out; + index = err; + + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); + if (!vblk) { + err = -ENOMEM; + goto out_free_index; + } + + mutex_init(&vblk->vdev_mutex); + + vblk->vdev = vdev; + + INIT_WORK(&vblk->config_work, virtblk_config_changed_work); + + err = init_vq(vblk); + if (err) + goto out_free_vblk; + + /* Default queue sizing is to fill the ring. */ + if (!virtblk_queue_depth) { + queue_depth = vblk->vqs[0].vq->num_free; + /* ... but without indirect descs, we use 2 descs per req */ + if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) + queue_depth /= 2; + } else { + queue_depth = virtblk_queue_depth; + } + + memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); + vblk->tag_set.ops = &virtio_mq_ops; + vblk->tag_set.queue_depth = queue_depth; + vblk->tag_set.numa_node = NUMA_NO_NODE; + vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + vblk->tag_set.cmd_size = + sizeof(struct virtblk_req) + + sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; + vblk->tag_set.driver_data = vblk; + vblk->tag_set.nr_hw_queues = vblk->num_vqs; + vblk->tag_set.nr_maps = 1; + if (vblk->io_queues[HCTX_TYPE_POLL]) + vblk->tag_set.nr_maps = 3; + + err = blk_mq_alloc_tag_set(&vblk->tag_set); + if (err) + goto out_free_vq; + + err = virtblk_read_limits(vblk, &lim); + if (err) + goto out_free_tags; + + vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, &lim, vblk); + if (IS_ERR(vblk->disk)) { + err = PTR_ERR(vblk->disk); + goto out_free_tags; + } + + virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); + + vblk->disk->major = major; + vblk->disk->first_minor = index_to_minor(index); + vblk->disk->minors = 1 << PART_BITS; + vblk->disk->private_data = vblk; + vblk->disk->fops = &virtblk_fops; + vblk->index = index; + + /* configure queue flush support */ + virtblk_update_cache_mode(vdev); + + /* If disk is read-only in the host, the guest should obey */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) + set_disk_ro(vblk->disk, 1); + + virtblk_update_capacity(vblk, false); + virtio_device_ready(vdev); + + /* + * All steps that follow use the VQs therefore they need to be + * placed after the virtio_device_ready() call above. + */ + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) { + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue); + err = blk_revalidate_disk_zones(vblk->disk, NULL); + if (err) + goto out_cleanup_disk; + } + err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); if (err) goto out_cleanup_disk; diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4defd7f38..944576d58 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -465,7 +465,7 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, } req->dev = vbd->pdevice; - req->bdev = vbd->bdev_handle->bdev; + req->bdev = file_bdev(vbd->bdev_file); rc = 0; out: @@ -969,7 +969,7 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring, int err = 0; int status = BLKIF_RSP_OKAY; struct xen_blkif *blkif = ring->blkif; - struct block_device *bdev = blkif->vbd.bdev_handle->bdev; + struct block_device *bdev = file_bdev(blkif->vbd.bdev_file); struct phys_req preq; xen_blkif_get(blkif); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 1432c8318..b427d54bc 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -221,7 +221,7 @@ struct xen_vbd { unsigned char type; /* phys device that this vbd maps to. */ u32 pdevice; - struct bdev_handle *bdev_handle; + struct file *bdev_file; /* Cached size parameter. */ sector_t size; unsigned int flush_support:1; @@ -360,7 +360,7 @@ struct pending_req { }; -#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev_handle->bdev) +#define vbd_sz(_v) bdev_nr_sectors(file_bdev((_v)->bdev_file)) #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define xen_blkif_put(_b) \ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index e34219ea2..062187894 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -81,7 +81,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) int i; /* Not ready to connect? */ - if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev_handle) + if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev_file) return; /* Already connected? */ @@ -99,13 +99,12 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) return; } - err = sync_blockdev(blkif->vbd.bdev_handle->bdev); + err = sync_blockdev(file_bdev(blkif->vbd.bdev_file)); if (err) { xenbus_dev_error(blkif->be->dev, err, "block flush"); return; } - invalidate_inode_pages2( - blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping); + invalidate_inode_pages2(blkif->vbd.bdev_file->f_mapping); for (i = 0; i < blkif->nr_rings; i++) { ring = &blkif->rings[i]; @@ -473,9 +472,9 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev) static void xen_vbd_free(struct xen_vbd *vbd) { - if (vbd->bdev_handle) - bdev_release(vbd->bdev_handle); - vbd->bdev_handle = NULL; + if (vbd->bdev_file) + fput(vbd->bdev_file); + vbd->bdev_file = NULL; } static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, @@ -483,7 +482,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, int cdrom) { struct xen_vbd *vbd; - struct bdev_handle *bdev_handle; + struct file *bdev_file; vbd = &blkif->vbd; vbd->handle = handle; @@ -492,17 +491,17 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, vbd->pdevice = MKDEV(major, minor); - bdev_handle = bdev_open_by_dev(vbd->pdevice, vbd->readonly ? + bdev_file = bdev_file_open_by_dev(vbd->pdevice, vbd->readonly ? BLK_OPEN_READ : BLK_OPEN_WRITE, NULL, NULL); - if (IS_ERR(bdev_handle)) { + if (IS_ERR(bdev_file)) { pr_warn("xen_vbd_create: device %08x could not be opened\n", vbd->pdevice); return -ENOENT; } - vbd->bdev_handle = bdev_handle; - if (vbd->bdev_handle->bdev->bd_disk == NULL) { + vbd->bdev_file = bdev_file; + if (file_bdev(vbd->bdev_file)->bd_disk == NULL) { pr_warn("xen_vbd_create: device %08x doesn't exist\n", vbd->pdevice); xen_vbd_free(vbd); @@ -510,14 +509,14 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, } vbd->size = vbd_sz(vbd); - if (cdrom || disk_to_cdi(vbd->bdev_handle->bdev->bd_disk)) + if (cdrom || disk_to_cdi(file_bdev(vbd->bdev_file)->bd_disk)) vbd->type |= VDISK_CDROM; - if (vbd->bdev_handle->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + if (file_bdev(vbd->bdev_file)->bd_disk->flags & GENHD_FL_REMOVABLE) vbd->type |= VDISK_REMOVABLE; - if (bdev_write_cache(bdev_handle->bdev)) + if (bdev_write_cache(file_bdev(bdev_file))) vbd->flush_support = true; - if (bdev_max_secure_erase_sectors(bdev_handle->bdev)) + if (bdev_max_secure_erase_sectors(file_bdev(bdev_file))) vbd->discard_secure = true; pr_debug("Successful creation of handle=%04x (dom=%u)\n", @@ -570,7 +569,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info struct xen_blkif *blkif = be->blkif; int err; int state = 0; - struct block_device *bdev = be->blkif->vbd.bdev_handle->bdev; + struct block_device *bdev = file_bdev(be->blkif->vbd.bdev_file); if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1)) return; @@ -932,7 +931,7 @@ again: } err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", (unsigned long)bdev_logical_block_size( - be->blkif->vbd.bdev_handle->bdev)); + file_bdev(be->blkif->vbd.bdev_file))); if (err) { xenbus_dev_fatal(dev, err, "writing %s/sector-size", dev->nodename); @@ -940,7 +939,7 @@ again: } err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", bdev_physical_block_size( - be->blkif->vbd.bdev_handle->bdev)); + file_bdev(be->blkif->vbd.bdev_file))); if (err) xenbus_dev_error(dev, err, "writing %s/physical-sector-size", dev->nodename); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 434fab306..fd7c0ff21 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -941,39 +941,35 @@ static const struct blk_mq_ops blkfront_mq_ops = { .complete = blkif_complete_rq, }; -static void blkif_set_queue_limits(struct blkfront_info *info) +static void blkif_set_queue_limits(const struct blkfront_info *info, + struct queue_limits *lim) { - struct request_queue *rq = info->rq; - struct gendisk *gd = info->gd; unsigned int segments = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); - if (info->feature_discard) { - blk_queue_max_discard_sectors(rq, get_capacity(gd)); - rq->limits.discard_granularity = info->discard_granularity ?: - info->physical_sector_size; - rq->limits.discard_alignment = info->discard_alignment; + lim->max_hw_discard_sectors = UINT_MAX; + if (info->discard_granularity) + lim->discard_granularity = info->discard_granularity; + lim->discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - blk_queue_max_secure_erase_sectors(rq, - get_capacity(gd)); + lim->max_secure_erase_sectors = UINT_MAX; } /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_logical_block_size(rq, info->sector_size); - blk_queue_physical_block_size(rq, info->physical_sector_size); - blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); + lim->logical_block_size = info->sector_size; + lim->physical_block_size = info->physical_sector_size; + lim->max_hw_sectors = (segments * XEN_PAGE_SIZE) / 512; /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); + lim->seg_boundary_mask = PAGE_SIZE - 1; + lim->max_segment_size = PAGE_SIZE; /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); + lim->max_segments = segments / GRANTS_PER_PSEG; /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); + lim->dma_alignment = 511; } static const char *flush_info(struct blkfront_info *info) @@ -1070,6 +1066,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, u16 sector_size, unsigned int physical_sector_size) { + struct queue_limits lim = {}; struct gendisk *gd; int nr_minors = 1; int err; @@ -1136,11 +1133,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (err) goto out_release_minors; - gd = blk_mq_alloc_disk(&info->tag_set, info); + blkif_set_queue_limits(info, &lim); + gd = blk_mq_alloc_disk(&info->tag_set, &lim, info); if (IS_ERR(gd)) { err = PTR_ERR(gd); goto out_free_tag_set; } + blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue); strcpy(gd->disk_name, DEV_NAME); ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); @@ -1162,7 +1161,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, info->gd = gd; info->sector_size = sector_size; info->physical_sector_size = physical_sector_size; - blkif_set_queue_limits(info); xlvbd_flush(info); @@ -2006,18 +2004,19 @@ static int blkfront_probe(struct xenbus_device *dev, static int blkif_recover(struct blkfront_info *info) { + struct queue_limits lim; unsigned int r_index; struct request *req, *n; int rc; struct bio *bio; - unsigned int segs; struct blkfront_ring_info *rinfo; + lim = queue_limits_start_update(info->rq); blkfront_gather_backend_features(info); - /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ - blkif_set_queue_limits(info); - segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); + blkif_set_queue_limits(info, &lim); + rc = queue_limits_commit_update(info->rq, &lim); + if (rc) + return rc; for_each_rinfo(info, rinfo, r_index) { rc = blkfront_setup_indirect(rinfo); @@ -2037,7 +2036,9 @@ static int blkif_recover(struct blkfront_info *info) list_for_each_entry_safe(req, n, &info->requests, queuelist) { /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); - BUG_ON(req->nr_phys_segments > segs); + BUG_ON(req->nr_phys_segments > + (info->max_indirect_segments ? : + BLKIF_MAX_SEGMENTS_PER_REQUEST)); blk_mq_requeue_request(req, false); } blk_mq_start_stopped_hw_queues(info->rq, true); diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 11493167b..7c5f4e4d9 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -318,7 +318,7 @@ static int z2ram_register_disk(int minor) struct gendisk *disk; int err; - disk = blk_mq_alloc_disk(&tag_set, NULL); + disk = blk_mq_alloc_disk(&tag_set, NULL, NULL); if (IS_ERR(disk)) return PTR_ERR(disk); diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 55af4efd7..8237b08c4 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "zcomp.h" @@ -37,7 +38,7 @@ static void zcomp_strm_free(struct zcomp_strm *zstrm) { if (!IS_ERR_OR_NULL(zstrm->tfm)) crypto_free_comp(zstrm->tfm); - free_pages((unsigned long)zstrm->buffer, 1); + vfree(zstrm->buffer); zstrm->tfm = NULL; zstrm->buffer = NULL; } @@ -53,7 +54,7 @@ static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp) * allocate 2 pages. 1 for compressed data, plus 1 extra for the * case when compressed size is larger than the original one */ - zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + zstrm->buffer = vzalloc(2 * PAGE_SIZE); if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) { zcomp_strm_free(zstrm); return -ENOMEM; diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index cdefdef93..e9fe63da0 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -39,5 +39,4 @@ int zcomp_compress(struct zcomp_strm *zstrm, int zcomp_decompress(struct zcomp_strm *zstrm, const void *src, unsigned int src_len, void *dst); -bool zcomp_set_max_streams(struct zcomp *comp, int num_strm); #endif /* _ZCOMP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6772e0c65..f0639df6c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -426,11 +426,11 @@ static void reset_bdev(struct zram *zram) if (!zram->backing_dev) return; - bdev_release(zram->bdev_handle); + fput(zram->bdev_file); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; - zram->bdev_handle = NULL; + zram->bdev_file = NULL; zram->disk->fops = &zram_devops; kvfree(zram->bitmap); zram->bitmap = NULL; @@ -476,7 +476,7 @@ static ssize_t backing_dev_store(struct device *dev, struct address_space *mapping; unsigned int bitmap_sz; unsigned long nr_pages, *bitmap = NULL; - struct bdev_handle *bdev_handle = NULL; + struct file *bdev_file = NULL; int err; struct zram *zram = dev_to_zram(dev); @@ -513,11 +513,11 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - bdev_handle = bdev_open_by_dev(inode->i_rdev, + bdev_file = bdev_file_open_by_dev(inode->i_rdev, BLK_OPEN_READ | BLK_OPEN_WRITE, zram, NULL); - if (IS_ERR(bdev_handle)) { - err = PTR_ERR(bdev_handle); - bdev_handle = NULL; + if (IS_ERR(bdev_file)) { + err = PTR_ERR(bdev_file); + bdev_file = NULL; goto out; } @@ -531,7 +531,7 @@ static ssize_t backing_dev_store(struct device *dev, reset_bdev(zram); - zram->bdev_handle = bdev_handle; + zram->bdev_file = bdev_file; zram->backing_dev = backing_dev; zram->bitmap = bitmap; zram->nr_pages = nr_pages; @@ -544,8 +544,8 @@ static ssize_t backing_dev_store(struct device *dev, out: kvfree(bitmap); - if (bdev_handle) - bdev_release(bdev_handle); + if (bdev_file) + fput(bdev_file); if (backing_dev) filp_close(backing_dev, NULL); @@ -587,7 +587,7 @@ static void read_from_bdev_async(struct zram *zram, struct page *page, { struct bio *bio; - bio = bio_alloc(zram->bdev_handle->bdev, 1, parent->bi_opf, GFP_NOIO); + bio = bio_alloc(file_bdev(zram->bdev_file), 1, parent->bi_opf, GFP_NOIO); bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); __bio_add_page(bio, page, PAGE_SIZE, 0); bio_chain(bio, parent); @@ -703,7 +703,7 @@ static ssize_t writeback_store(struct device *dev, continue; } - bio_init(&bio, zram->bdev_handle->bdev, &bio_vec, 1, + bio_init(&bio, file_bdev(zram->bdev_file), &bio_vec, 1, REQ_OP_WRITE | REQ_SYNC); bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); __bio_add_page(&bio, page, PAGE_SIZE, 0); @@ -785,7 +785,7 @@ static void zram_sync_read(struct work_struct *work) struct bio_vec bv; struct bio bio; - bio_init(&bio, zw->zram->bdev_handle->bdev, &bv, 1, REQ_OP_READ); + bio_init(&bio, file_bdev(zw->zram->bdev_file), &bv, 1, REQ_OP_READ); bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); zw->error = submit_bio_wait(&bio); @@ -1337,7 +1337,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { dst = kmap_local_page(page); - memcpy(dst, src, PAGE_SIZE); + copy_page(dst, src); kunmap_local(dst); ret = 0; } else { @@ -2177,6 +2177,28 @@ ATTRIBUTE_GROUPS(zram_disk); */ static int zram_add(void) { + struct queue_limits lim = { + .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, + /* + * To ensure that we always get PAGE_SIZE aligned and + * n*PAGE_SIZED sized I/O requests. + */ + .physical_block_size = PAGE_SIZE, + .io_min = PAGE_SIZE, + .io_opt = PAGE_SIZE, + .max_hw_discard_sectors = UINT_MAX, + /* + * zram_bio_discard() will clear all logical blocks if logical + * block size is identical with physical block size(PAGE_SIZE). + * But if it is different, we will skip discarding some parts of + * logical blocks in the part of the request range which isn't + * aligned to physical block size. So we can't ensure that all + * discarded logical blocks are zeroed. + */ +#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE + .max_write_zeroes_sectors = UINT_MAX, +#endif + }; struct zram *zram; int ret, device_id; @@ -2195,11 +2217,11 @@ static int zram_add(void) #endif /* gendisk structure */ - zram->disk = blk_alloc_disk(NUMA_NO_NODE); - if (!zram->disk) { + zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); + if (IS_ERR(zram->disk)) { pr_err("Error allocating disk structure for device %d\n", device_id); - ret = -ENOMEM; + ret = PTR_ERR(zram->disk); goto out_free_idr; } @@ -2216,29 +2238,6 @@ static int zram_add(void) /* zram devices sort of resembles non-rotational disks */ blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); - - /* - * To ensure that we always get PAGE_SIZE aligned - * and n*PAGE_SIZED sized I/O requests. - */ - blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); - blk_queue_logical_block_size(zram->disk->queue, - ZRAM_LOGICAL_BLOCK_SIZE); - blk_queue_io_min(zram->disk->queue, PAGE_SIZE); - blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); - blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); - - /* - * zram_bio_discard() will clear all logical blocks if logical block - * size is identical with physical block size(PAGE_SIZE). But if it is - * different, we will skip discarding some parts of logical blocks in - * the part of the request range which isn't aligned to physical block - * size. So we can't ensure that all discarded logical blocks are - * zeroed. - */ - if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) - blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); if (ret) diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 3b94d12f4..37bf29f34 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -132,7 +132,7 @@ struct zram { spinlock_t wb_limit_lock; bool wb_limit_enable; u64 bd_wb_limit; - struct bdev_handle *bdev_handle; + struct file *bdev_file; unsigned long *bitmap; unsigned long nr_pages; #endif -- cgit v1.2.3