diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:35:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:39:31 +0000 |
commit | 85c675d0d09a45a135bddd15d7b385f8758c32fb (patch) | |
tree | 76267dbc9b9a130337be3640948fe397b04ac629 /drivers/block | |
parent | Adding upstream version 6.6.15. (diff) | |
download | linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.tar.xz linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.zip |
Adding upstream version 6.7.7.upstream/6.7.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 5 | ||||
-rw-r--r-- | drivers/block/aoe/aoenet.c | 3 | ||||
-rw-r--r-- | drivers/block/ataflop.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 65 | ||||
-rw-r--r-- | drivers/block/floppy.c | 4 | ||||
-rw-r--r-- | drivers/block/null_blk/main.c | 47 | ||||
-rw-r--r-- | drivers/block/pktcdvd.c | 76 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-srv.c | 46 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-srv.h | 2 | ||||
-rw-r--r-- | drivers/block/ublk_drv.c | 342 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 9 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 4 | ||||
-rw-r--r-- | drivers/block/xen-blkback/common.h | 4 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 40 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 31 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.h | 2 |
17 files changed, 405 insertions, 283 deletions
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index cf68837561..37eff1c974 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -333,6 +333,7 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct blk_mq_tag_set *set; + sector_t ssize; ulong flags; int late = 0; int err; @@ -395,7 +396,7 @@ aoeblk_gdalloc(void *vp) gd->minors = AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - set_capacity(gd, d->ssize); + ssize = d->ssize; snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); @@ -404,6 +405,8 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); + set_capacity(gd, ssize); + err = device_add_disk(NULL, gd, aoe_attr_groups); if (err) goto out_disk_cleanup; diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 63773a9058..c51ea95bc2 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -39,8 +39,7 @@ static struct ktstate kts; #ifndef MODULE static int __init aoe_iflist_setup(char *str) { - strncpy(aoe_iflist, str, IFLISTSZ); - aoe_iflist[IFLISTSZ - 1] = '\0'; + strscpy(aoe_iflist, str, IFLISTSZ); return 1; } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index cd738cab72..5094920779 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1760,8 +1760,10 @@ static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode, /* invalidate the buffer track to force a reread */ BufferDrive = -1; set_bit(drive, &fake_change); - if (disk_check_media_change(disk)) + if (disk_check_media_change(disk)) { + bdev_mark_dead(disk->part0, true); floppy_revalidate(disk); + } return 0; default: return -EINVAL; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a30a5ed811..c21e373275 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -524,7 +524,9 @@ struct drbd_md { struct drbd_backing_dev { struct block_device *backing_bdev; + struct bdev_handle *backing_bdev_handle; struct block_device *md_bdev; + struct bdev_handle *md_bdev_handle; struct drbd_md md; struct disk_conf *disk_conf; /* RCU, for updates: resource->conf_update */ sector_t known_size; /* last known size of that backing device */ @@ -553,7 +555,7 @@ struct fifo_buffer { unsigned int head_index; unsigned int size; int total; /* sum of all values */ - int values[]; + int values[] __counted_by(size); }; extern struct fifo_buffer *fifo_alloc(unsigned int fifo_size); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d3538bd83f..43747a1aae 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -82,7 +82,7 @@ static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */ DEFINE_MUTEX(notification_mutex); -/* used blkdev_get_by_path, to claim our meta data device(s) */ +/* used bdev_open_by_path, to claim our meta data device(s) */ static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) @@ -1635,43 +1635,45 @@ success: return 0; } -static struct block_device *open_backing_dev(struct drbd_device *device, +static struct bdev_handle *open_backing_dev(struct drbd_device *device, const char *bdev_path, void *claim_ptr, bool do_bd_link) { - struct block_device *bdev; + struct bdev_handle *handle; int err = 0; - bdev = blkdev_get_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE, - claim_ptr, NULL); - if (IS_ERR(bdev)) { + handle = bdev_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE, + claim_ptr, NULL); + if (IS_ERR(handle)) { drbd_err(device, "open(\"%s\") failed with %ld\n", - bdev_path, PTR_ERR(bdev)); - return bdev; + bdev_path, PTR_ERR(handle)); + return handle; } if (!do_bd_link) - return bdev; + return handle; - err = bd_link_disk_holder(bdev, device->vdisk); + err = bd_link_disk_holder(handle->bdev, device->vdisk); if (err) { - blkdev_put(bdev, claim_ptr); + bdev_release(handle); drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n", bdev_path, err); - bdev = ERR_PTR(err); + handle = ERR_PTR(err); } - return bdev; + return handle; } static int open_backing_devices(struct drbd_device *device, struct disk_conf *new_disk_conf, struct drbd_backing_dev *nbc) { - struct block_device *bdev; + struct bdev_handle *handle; - bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true); - if (IS_ERR(bdev)) + handle = open_backing_dev(device, new_disk_conf->backing_dev, device, + true); + if (IS_ERR(handle)) return ERR_OPEN_DISK; - nbc->backing_bdev = bdev; + nbc->backing_bdev = handle->bdev; + nbc->backing_bdev_handle = handle; /* * meta_dev_idx >= 0: external fixed size, possibly multiple @@ -1681,7 +1683,7 @@ static int open_backing_devices(struct drbd_device *device, * should check it for you already; but if you don't, or * someone fooled it, we need to double check here) */ - bdev = open_backing_dev(device, new_disk_conf->meta_dev, + handle = open_backing_dev(device, new_disk_conf->meta_dev, /* claim ptr: device, if claimed exclusively; shared drbd_m_holder, * if potentially shared with other drbd minors */ (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder, @@ -1689,20 +1691,21 @@ static int open_backing_devices(struct drbd_device *device, * as would happen with internal metadata. */ (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT && new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL)); - if (IS_ERR(bdev)) + if (IS_ERR(handle)) return ERR_OPEN_MD_DISK; - nbc->md_bdev = bdev; + nbc->md_bdev = handle->bdev; + nbc->md_bdev_handle = handle; return NO_ERROR; } -static void close_backing_dev(struct drbd_device *device, struct block_device *bdev, - void *claim_ptr, bool do_bd_unlink) +static void close_backing_dev(struct drbd_device *device, + struct bdev_handle *handle, bool do_bd_unlink) { - if (!bdev) + if (!handle) return; if (do_bd_unlink) - bd_unlink_disk_holder(bdev, device->vdisk); - blkdev_put(bdev, claim_ptr); + bd_unlink_disk_holder(handle->bdev, device->vdisk); + bdev_release(handle); } void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev) @@ -1710,11 +1713,9 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev * if (ldev == NULL) return; - close_backing_dev(device, ldev->md_bdev, - ldev->md.meta_dev_idx < 0 ? - (void *)device : (void *)drbd_m_holder, + close_backing_dev(device, ldev->md_bdev_handle, ldev->md_bdev != ldev->backing_bdev); - close_backing_dev(device, ldev->backing_bdev, device, true); + close_backing_dev(device, ldev->backing_bdev_handle, true); kfree(ldev->disk_conf); kfree(ldev); @@ -2130,11 +2131,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) fail: conn_reconfig_done(connection); if (nbc) { - close_backing_dev(device, nbc->md_bdev, - nbc->disk_conf->meta_dev_idx < 0 ? - (void *)device : (void *)drbd_m_holder, + close_backing_dev(device, nbc->md_bdev_handle, nbc->md_bdev != nbc->backing_bdev); - close_backing_dev(device, nbc->backing_bdev, device, true); + close_backing_dev(device, nbc->backing_bdev_handle, true); kfree(nbc); } kfree(new_disk_conf); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index ea4eb88a2e..11114a5d9e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3215,8 +3215,10 @@ static int invalidate_drive(struct gendisk *disk) /* invalidate the buffer track to force a reread */ set_bit((long)disk->private_data, &fake_change); process_fd_request(); - if (disk_check_media_change(disk)) + if (disk_check_media_change(disk)) { + bdev_mark_dead(disk->part0, true); floppy_revalidate(disk); + } return 0; } diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 9544746de1..13ed446b5e 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1464,19 +1464,13 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, return BLK_STS_OK; } -static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, - sector_t nr_sectors, enum req_op op) +static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, + sector_t nr_sectors, enum req_op op) { struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; blk_status_t sts; - if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { - sts = null_handle_throttled(cmd); - if (sts != BLK_STS_OK) - return sts; - } - if (op == REQ_OP_FLUSH) { cmd->error = errno_to_blk_status(null_handle_flush(nullb)); goto out; @@ -1493,7 +1487,6 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, out: nullb_complete_cmd(cmd); - return BLK_STS_OK; } static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) @@ -1724,8 +1717,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, cmd->fake_timeout = should_timeout_request(rq) || blk_should_fake_timeout(rq->q); - blk_mq_start_request(rq); - if (should_requeue_request(rq)) { /* * Alternate between hitting the core BUSY path, and the @@ -1738,6 +1729,15 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } + if (test_bit(NULLB_DEV_FL_THROTTLED, &nq->dev->flags)) { + blk_status_t sts = null_handle_throttled(cmd); + + if (sts != BLK_STS_OK) + return sts; + } + + blk_mq_start_request(rq); + if (is_poll) { spin_lock(&nq->poll_lock); list_add_tail(&rq->queuelist, &nq->poll_list); @@ -1747,7 +1747,27 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, if (cmd->fake_timeout) return BLK_STS_OK; - return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq)); + null_handle_cmd(cmd, sector, nr_sectors, req_op(rq)); + return BLK_STS_OK; +} + +static void null_queue_rqs(struct request **rqlist) +{ + struct request *requeue_list = NULL; + struct request **requeue_lastp = &requeue_list; + struct blk_mq_queue_data bd = { }; + blk_status_t ret; + + do { + struct request *rq = rq_list_pop(rqlist); + + bd.rq = rq; + ret = null_queue_rq(rq->mq_hctx, &bd); + if (ret != BLK_STS_OK) + rq_list_add_tail(&requeue_lastp, rq); + } while (!rq_list_empty(*rqlist)); + + *rqlist = requeue_list; } static void cleanup_queue(struct nullb_queue *nq) @@ -1802,6 +1822,7 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, static const struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, + .queue_rqs = null_queue_rqs, .complete = null_complete_rq, .timeout = null_timeout_rq, .poll = null_poll, @@ -1946,7 +1967,7 @@ static int null_gendisk_register(struct nullb *nullb) else disk->fops = &null_bio_ops; disk->private_data = nullb; - strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); if (nullb->dev->zoned) { int ret = null_register_zoned_dev(nullb); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index a1428538bd..d56d972aad 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -340,8 +340,8 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu n += sysfs_emit_at(data, n, "%s %u:%u %u:%u\n", pd->disk->disk_name, MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev), - MAJOR(pd->bdev->bd_dev), - MINOR(pd->bdev->bd_dev)); + MAJOR(pd->bdev_handle->bdev->bd_dev), + MINOR(pd->bdev_handle->bdev->bd_dev)); } mutex_unlock(&ctl_mutex); return n; @@ -437,7 +437,8 @@ static int pkt_seq_show(struct seq_file *m, void *p) char *msg; int states[PACKET_NUM_STATES]; - seq_printf(m, "Writer %s mapped to %pg:\n", pd->disk->disk_name, pd->bdev); + seq_printf(m, "Writer %s mapped to %pg:\n", pd->disk->disk_name, + pd->bdev_handle->bdev); seq_printf(m, "\nSettings:\n"); seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2); @@ -714,7 +715,7 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod */ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) { - struct request_queue *q = bdev_get_queue(pd->bdev); + struct request_queue *q = bdev_get_queue(pd->bdev_handle->bdev); struct scsi_cmnd *scmd; struct request *rq; int ret = 0; @@ -1047,7 +1048,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) continue; bio = pkt->r_bios[f]; - bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ); + bio_init(bio, pd->bdev_handle->bdev, bio->bi_inline_vecs, 1, + REQ_OP_READ); bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); bio->bi_end_io = pkt_end_io_read; bio->bi_private = pkt; @@ -1262,8 +1264,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) struct device *ddev = disk_to_dev(pd->disk); int f; - bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames, - REQ_OP_WRITE); + bio_init(pkt->w_bio, pd->bdev_handle->bdev, pkt->w_bio->bi_inline_vecs, + pkt->frames, REQ_OP_WRITE); pkt->w_bio->bi_iter.bi_sector = pkt->sector; pkt->w_bio->bi_end_io = pkt_end_io_packet_write; pkt->w_bio->bi_private = pkt; @@ -2160,18 +2162,20 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) int ret; long lba; struct request_queue *q; - struct block_device *bdev; + struct bdev_handle *bdev_handle; /* * We need to re-open the cdrom device without O_NONBLOCK to be able * to read/write from/to it. It is already opened in O_NONBLOCK mode * so open should not fail. */ - bdev = blkdev_get_by_dev(pd->bdev->bd_dev, BLK_OPEN_READ, pd, NULL); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); + bdev_handle = bdev_open_by_dev(pd->bdev_handle->bdev->bd_dev, + BLK_OPEN_READ, pd, NULL); + if (IS_ERR(bdev_handle)) { + ret = PTR_ERR(bdev_handle); goto out; } + pd->open_bdev_handle = bdev_handle; ret = pkt_get_last_written(pd, &lba); if (ret) { @@ -2180,9 +2184,9 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) } set_capacity(pd->disk, lba << 2); - set_capacity_and_notify(pd->bdev->bd_disk, lba << 2); + set_capacity_and_notify(pd->bdev_handle->bdev->bd_disk, lba << 2); - q = bdev_get_queue(pd->bdev); + q = bdev_get_queue(pd->bdev_handle->bdev); if (write) { ret = pkt_open_write(pd); if (ret) @@ -2214,7 +2218,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write) return 0; out_putdev: - blkdev_put(bdev, pd); + bdev_release(bdev_handle); out: return ret; } @@ -2233,7 +2237,8 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_lock_door(pd, 0); pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); - blkdev_put(pd->bdev, pd); + bdev_release(pd->open_bdev_handle); + pd->open_bdev_handle = NULL; pkt_shrink_pktlist(pd); } @@ -2321,8 +2326,8 @@ static void pkt_end_io_read_cloned(struct bio *bio) static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) { - struct bio *cloned_bio = - bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set); + struct bio *cloned_bio = bio_alloc_clone(pd->bdev_handle->bdev, bio, + GFP_NOIO, &pkt_bio_set); struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO); psd->pd = pd; @@ -2492,7 +2497,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) { struct device *ddev = disk_to_dev(pd->disk); int i; - struct block_device *bdev; + struct bdev_handle *bdev_handle; struct scsi_device *sdev; if (pd->pkt_dev == dev) { @@ -2503,8 +2508,9 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) struct pktcdvd_device *pd2 = pkt_devs[i]; if (!pd2) continue; - if (pd2->bdev->bd_dev == dev) { - dev_err(ddev, "%pg already setup\n", pd2->bdev); + if (pd2->bdev_handle->bdev->bd_dev == dev) { + dev_err(ddev, "%pg already setup\n", + pd2->bdev_handle->bdev); return -EBUSY; } if (pd2->pkt_dev == dev) { @@ -2513,13 +2519,13 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) } } - bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY, NULL, - NULL); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - sdev = scsi_device_from_queue(bdev->bd_disk->queue); + bdev_handle = bdev_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY, + NULL, NULL); + if (IS_ERR(bdev_handle)) + return PTR_ERR(bdev_handle); + sdev = scsi_device_from_queue(bdev_handle->bdev->bd_disk->queue); if (!sdev) { - blkdev_put(bdev, NULL); + bdev_release(bdev_handle); return -EINVAL; } put_device(&sdev->sdev_gendev); @@ -2527,8 +2533,8 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - pd->bdev = bdev; - set_blocksize(bdev, CD_FRAMESIZE); + pd->bdev_handle = bdev_handle; + set_blocksize(bdev_handle->bdev, CD_FRAMESIZE); pkt_init_queue(pd); @@ -2540,11 +2546,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) } proc_create_single_data(pd->disk->disk_name, 0, pkt_proc, pkt_seq_show, pd); - dev_notice(ddev, "writer mapped to %pg\n", bdev); + dev_notice(ddev, "writer mapped to %pg\n", bdev_handle->bdev); return 0; out_mem: - blkdev_put(bdev, NULL); + bdev_release(bdev_handle); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return -ENOMEM; @@ -2599,9 +2605,9 @@ static unsigned int pkt_check_events(struct gendisk *disk, if (!pd) return 0; - if (!pd->bdev) + if (!pd->bdev_handle) return 0; - attached_disk = pd->bdev->bd_disk; + attached_disk = pd->bdev_handle->bdev->bd_disk; if (!attached_disk || !attached_disk->fops->check_events) return 0; return attached_disk->fops->check_events(attached_disk, clearing); @@ -2686,7 +2692,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) goto out_mem2; /* inherit events of the host device */ - disk->events = pd->bdev->bd_disk->events; + disk->events = pd->bdev_handle->bdev->bd_disk->events; ret = add_disk(disk); if (ret) @@ -2751,7 +2757,7 @@ static int pkt_remove_dev(dev_t pkt_dev) pkt_debugfs_dev_remove(pd); pkt_sysfs_dev_remove(pd); - blkdev_put(pd->bdev, NULL); + bdev_release(pd->bdev_handle); remove_proc_entry(pd->disk->disk_name, pkt_proc); dev_notice(ddev, "writer unmapped\n"); @@ -2778,7 +2784,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd) pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index); if (pd) { - ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev); + ctrl_cmd->dev = new_encode_dev(pd->bdev_handle->bdev->bd_dev); ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev); } else { ctrl_cmd->dev = 0; diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index c186df0ec6..ab78eab97d 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -145,7 +145,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, priv->sess_dev = sess_dev; priv->id = id; - bio = bio_alloc(sess_dev->bdev, 1, + bio = bio_alloc(sess_dev->bdev_handle->bdev, 1, rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL); if (bio_add_page(bio, virt_to_page(data), datalen, offset_in_page(data)) != datalen) { @@ -219,7 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id) rnbd_put_sess_dev(sess_dev); wait_for_completion(&dc); /* wait for inflights to drop to zero */ - blkdev_put(sess_dev->bdev, NULL); + bdev_release(sess_dev->bdev_handle); mutex_lock(&sess_dev->dev->lock); list_del(&sess_dev->dev_list); if (!sess_dev->readonly) @@ -534,7 +534,7 @@ rnbd_srv_get_or_create_srv_dev(struct block_device *bdev, static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, struct rnbd_srv_sess_dev *sess_dev) { - struct block_device *bdev = sess_dev->bdev; + struct block_device *bdev = sess_dev->bdev_handle->bdev; rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP); rsp->device_id = cpu_to_le32(sess_dev->device_id); @@ -559,7 +559,7 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, static struct rnbd_srv_sess_dev * rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess, const struct rnbd_msg_open *open_msg, - struct block_device *bdev, bool readonly, + struct bdev_handle *handle, bool readonly, struct rnbd_srv_dev *srv_dev) { struct rnbd_srv_sess_dev *sdev = rnbd_sess_dev_alloc(srv_sess); @@ -571,7 +571,7 @@ rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess, strscpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname)); - sdev->bdev = bdev; + sdev->bdev_handle = handle; sdev->sess = srv_sess; sdev->dev = srv_dev; sdev->readonly = readonly; @@ -585,6 +585,7 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess, { char *full_path; char *a, *b; + int len; full_path = kmalloc(PATH_MAX, GFP_KERNEL); if (!full_path) @@ -596,19 +597,19 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess, */ a = strnstr(dev_search_path, "%SESSNAME%", sizeof(dev_search_path)); if (a) { - int len = a - dev_search_path; + len = a - dev_search_path; len = snprintf(full_path, PATH_MAX, "%.*s/%s/%s", len, dev_search_path, srv_sess->sessname, dev_name); - if (len >= PATH_MAX) { - pr_err("Too long path: %s, %s, %s\n", - dev_search_path, srv_sess->sessname, dev_name); - kfree(full_path); - return ERR_PTR(-EINVAL); - } } else { - snprintf(full_path, PATH_MAX, "%s/%s", - dev_search_path, dev_name); + len = snprintf(full_path, PATH_MAX, "%s/%s", + dev_search_path, dev_name); + } + if (len >= PATH_MAX) { + pr_err("Too long path: %s, %s, %s\n", + dev_search_path, srv_sess->sessname, dev_name); + kfree(full_path); + return ERR_PTR(-EINVAL); } /* eliminitate duplicated slashes */ @@ -676,7 +677,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, struct rnbd_srv_dev *srv_dev; struct rnbd_srv_sess_dev *srv_sess_dev; const struct rnbd_msg_open *open_msg = msg; - struct block_device *bdev; + struct bdev_handle *bdev_handle; blk_mode_t open_flags = BLK_OPEN_READ; char *full_path; struct rnbd_msg_open_rsp *rsp = data; @@ -714,15 +715,15 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, goto reject; } - bdev = blkdev_get_by_path(full_path, open_flags, NULL, NULL); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); + bdev_handle = bdev_open_by_path(full_path, open_flags, NULL, NULL); + if (IS_ERR(bdev_handle)) { + ret = PTR_ERR(bdev_handle); pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %d\n", full_path, srv_sess->sessname, ret); goto free_path; } - srv_dev = rnbd_srv_get_or_create_srv_dev(bdev, srv_sess, + srv_dev = rnbd_srv_get_or_create_srv_dev(bdev_handle->bdev, srv_sess, open_msg->access_mode); if (IS_ERR(srv_dev)) { pr_err("Opening device '%s' on session %s failed, creating srv_dev failed, err: %ld\n", @@ -731,7 +732,8 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, goto blkdev_put; } - srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg, bdev, + srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg, + bdev_handle, open_msg->access_mode == RNBD_ACCESS_RO, srv_dev); if (IS_ERR(srv_sess_dev)) { @@ -747,7 +749,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, */ mutex_lock(&srv_dev->lock); if (!srv_dev->dev_kobj.state_in_sysfs) { - ret = rnbd_srv_create_dev_sysfs(srv_dev, bdev); + ret = rnbd_srv_create_dev_sysfs(srv_dev, bdev_handle->bdev); if (ret) { mutex_unlock(&srv_dev->lock); rnbd_srv_err(srv_sess_dev, @@ -790,7 +792,7 @@ srv_dev_put: } rnbd_put_srv_dev(srv_dev); blkdev_put: - blkdev_put(bdev, NULL); + bdev_release(bdev_handle); free_path: kfree(full_path); reject: diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h index 1027656ded..343cc682b6 100644 --- a/drivers/block/rnbd/rnbd-srv.h +++ b/drivers/block/rnbd/rnbd-srv.h @@ -46,7 +46,7 @@ struct rnbd_srv_dev { struct rnbd_srv_sess_dev { /* Entry inside rnbd_srv_dev struct */ struct list_head dev_list; - struct block_device *bdev; + struct bdev_handle *bdev_handle; struct rnbd_srv_session *sess; struct rnbd_srv_dev *dev; struct kobject kobj; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 630ddfe665..83600b45e1 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -75,6 +75,7 @@ struct ublk_rq_data { struct ublk_uring_cmd_pdu { struct ublk_queue *ubq; + u16 tag; }; /* @@ -115,6 +116,9 @@ struct ublk_uring_cmd_pdu { */ #define UBLK_IO_FLAG_NEED_GET_DATA 0x08 +/* atomic RW with ubq->cancel_lock */ +#define UBLK_IO_FLAG_CANCELED 0x80000000 + struct ublk_io { /* userspace buffer address from io cmd */ __u64 addr; @@ -138,13 +142,13 @@ struct ublk_queue { unsigned int max_io_sz; bool force_abort; bool timeout; + bool canceling; unsigned short nr_io_ready; /* how many ios setup */ + spinlock_t cancel_lock; struct ublk_device *dev; struct ublk_io ios[]; }; -#define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ) - struct ublk_device { struct gendisk *ub_disk; @@ -166,7 +170,7 @@ struct ublk_device { struct mutex mutex; - spinlock_t mm_lock; + spinlock_t lock; struct mm_struct *mm; struct ublk_params params; @@ -175,11 +179,6 @@ struct ublk_device { unsigned int nr_queues_ready; unsigned int nr_privileged_daemon; - /* - * Our ubq->daemon may be killed without any notification, so - * monitor each queue's daemon periodically - */ - struct delayed_work monitor_work; struct work_struct quiesce_work; struct work_struct stop_work; }; @@ -190,10 +189,11 @@ struct ublk_params_header { __u32 types; }; +static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq); + static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag); - static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub) { return ub->dev_info.flags & UBLK_F_USER_COPY; @@ -470,6 +470,7 @@ static DEFINE_MUTEX(ublk_ctl_mutex); * It can be extended to one per-user limit in future or even controlled * by cgroup. */ +#define UBLK_MAX_UBLKS UBLK_MINORS static unsigned int ublks_max = 64; static unsigned int ublks_added; /* protected by ublk_ctl_mutex */ @@ -1083,13 +1084,10 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, { WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); - if (!(io->flags & UBLK_IO_FLAG_ABORTED)) { - io->flags |= UBLK_IO_FLAG_ABORTED; - if (ublk_queue_can_use_recovery_reissue(ubq)) - blk_mq_requeue_request(req, false); - else - ublk_put_req_ref(ubq, req); - } + if (ublk_queue_can_use_recovery_reissue(ubq)) + blk_mq_requeue_request(req, false); + else + ublk_put_req_ref(ubq, req); } static void ubq_complete_io_cmd(struct ublk_io *io, int res, @@ -1118,8 +1116,6 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, blk_mq_requeue_request(rq, false); else blk_mq_end_request(rq, BLK_STS_IOERR); - - mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0); } static inline void __ublk_rq_task_work(struct request *req, @@ -1212,15 +1208,6 @@ static inline void ublk_forward_io_cmds(struct ublk_queue *ubq, __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags); } -static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) -{ - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data, *tmp; - - llist_for_each_entry_safe(data, tmp, io_cmds, node) - __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); -} - static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); @@ -1232,38 +1219,19 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); - struct ublk_io *io; - if (!llist_add(&data->node, &ubq->io_cmds)) - return; + if (llist_add(&data->node, &ubq->io_cmds)) { + struct ublk_io *io = &ubq->ios[rq->tag]; - io = &ubq->ios[rq->tag]; - /* - * If the check pass, we know that this is a re-issued request aborted - * previously in monitor_work because the ubq_daemon(cmd's task) is - * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore - * because this ioucmd's io_uring context may be freed now if no inflight - * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work. - * - * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing - * the tag). Then the request is re-started(allocating the tag) and we are here. - * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED - * guarantees that here is a re-issued request aborted previously. - */ - if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) { - ublk_abort_io_cmds(ubq); - } else { - struct io_uring_cmd *cmd = io->cmd; - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - - pdu->ubq = ubq; - io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); } } static enum blk_eh_timer_return ublk_timeout(struct request *rq) { struct ublk_queue *ubq = rq->mq_hctx->driver_data; + unsigned int nr_inflight = 0; + int i; if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) { if (!ubq->timeout) { @@ -1274,6 +1242,29 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) return BLK_EH_DONE; } + if (!ubq_daemon_is_dying(ubq)) + return BLK_EH_RESET_TIMER; + + for (i = 0; i < ubq->q_depth; i++) { + struct ublk_io *io = &ubq->ios[i]; + + if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) + nr_inflight++; + } + + /* cancelable uring_cmd can't help us if all commands are in-flight */ + if (nr_inflight == ubq->q_depth) { + struct ublk_device *ub = ubq->dev; + + if (ublk_abort_requests(ub, ubq)) { + if (ublk_can_use_recovery(ub)) + schedule_work(&ub->quiesce_work); + else + schedule_work(&ub->stop_work); + } + return BLK_EH_DONE; + } + return BLK_EH_RESET_TIMER; } @@ -1301,13 +1292,12 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, if (ublk_queue_can_use_recovery(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; - blk_mq_start_request(bd->rq); - - if (unlikely(ubq_daemon_is_dying(ubq))) { + if (unlikely(ubq->canceling)) { __ublk_abort_rq(ubq, rq); return BLK_STS_OK; } + blk_mq_start_request(bd->rq); ublk_queue_cmd(ubq, rq); return BLK_STS_OK; @@ -1357,12 +1347,12 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) unsigned long pfn, end, phys_off = vma->vm_pgoff << PAGE_SHIFT; int q_id, ret = 0; - spin_lock(&ub->mm_lock); + spin_lock(&ub->lock); if (!ub->mm) ub->mm = current->mm; if (current->mm != ub->mm) ret = -EINVAL; - spin_unlock(&ub->mm_lock); + spin_unlock(&ub->lock); if (ret) return ret; @@ -1411,17 +1401,14 @@ static void ublk_commit_completion(struct ublk_device *ub, } /* - * When ->ubq_daemon is exiting, either new request is ended immediately, - * or any queued io command is drained, so it is safe to abort queue - * lockless + * Called from ubq_daemon context via cancel fn, meantime quiesce ublk + * blk-mq queue, so we are called exclusively with blk-mq and ubq_daemon + * context, so everything is serialized. */ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) { int i; - if (!ublk_get_device(ub)) - return; - for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; @@ -1433,72 +1420,114 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) * will do it */ rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i); - if (rq) + if (rq && blk_mq_request_started(rq)) { + io->flags |= UBLK_IO_FLAG_ABORTED; __ublk_fail_req(ubq, io, rq); + } } } - ublk_put_device(ub); } -static void ublk_daemon_monitor_work(struct work_struct *work) +static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) { - struct ublk_device *ub = - container_of(work, struct ublk_device, monitor_work.work); - int i; + struct gendisk *disk; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { - struct ublk_queue *ubq = ublk_get_queue(ub, i); + spin_lock(&ubq->cancel_lock); + if (ubq->canceling) { + spin_unlock(&ubq->cancel_lock); + return false; + } + ubq->canceling = true; + spin_unlock(&ubq->cancel_lock); - if (ubq_daemon_is_dying(ubq)) { - if (ublk_queue_can_use_recovery(ubq)) - schedule_work(&ub->quiesce_work); - else - schedule_work(&ub->stop_work); + spin_lock(&ub->lock); + disk = ub->ub_disk; + if (disk) + get_device(disk_to_dev(disk)); + spin_unlock(&ub->lock); - /* abort queue is for making forward progress */ - ublk_abort_queue(ub, ubq); - } - } + /* Our disk has been dead */ + if (!disk) + return false; - /* - * We can't schedule monitor work after ub's state is not UBLK_S_DEV_LIVE. - * after ublk_remove() or __ublk_quiesce_dev() is started. - * - * No need ub->mutex, monitor work are canceled after state is marked - * as not LIVE, so new state is observed reliably. - */ - if (ub->dev_info.state == UBLK_S_DEV_LIVE) - schedule_delayed_work(&ub->monitor_work, - UBLK_DAEMON_MONITOR_PERIOD); -} + /* Now we are serialized with ublk_queue_rq() */ + blk_mq_quiesce_queue(disk->queue); + /* abort queue is for making forward progress */ + ublk_abort_queue(ub, ubq); + blk_mq_unquiesce_queue(disk->queue); + put_device(disk_to_dev(disk)); -static inline bool ublk_queue_ready(struct ublk_queue *ubq) -{ - return ubq->nr_io_ready == ubq->q_depth; + return true; } -static void ublk_cmd_cancel_cb(struct io_uring_cmd *cmd, unsigned issue_flags) +static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, + unsigned int issue_flags) { - io_uring_cmd_done(cmd, UBLK_IO_RES_ABORT, 0, issue_flags); + bool done; + + if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) + return; + + spin_lock(&ubq->cancel_lock); + done = !!(io->flags & UBLK_IO_FLAG_CANCELED); + if (!done) + io->flags |= UBLK_IO_FLAG_CANCELED; + spin_unlock(&ubq->cancel_lock); + + if (!done) + io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, issue_flags); } -static void ublk_cancel_queue(struct ublk_queue *ubq) +/* + * The ublk char device won't be closed when calling cancel fn, so both + * ublk device and queue are guaranteed to be live + */ +static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, + unsigned int issue_flags) { - int i; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + struct ublk_queue *ubq = pdu->ubq; + struct task_struct *task; + struct ublk_device *ub; + bool need_schedule; + struct ublk_io *io; - if (!ublk_queue_ready(ubq)) + if (WARN_ON_ONCE(!ubq)) return; - for (i = 0; i < ubq->q_depth; i++) { - struct ublk_io *io = &ubq->ios[i]; + if (WARN_ON_ONCE(pdu->tag >= ubq->q_depth)) + return; + + task = io_uring_cmd_get_task(cmd); + if (WARN_ON_ONCE(task && task != ubq->ubq_daemon)) + return; + + ub = ubq->dev; + need_schedule = ublk_abort_requests(ub, ubq); + + io = &ubq->ios[pdu->tag]; + WARN_ON_ONCE(io->cmd != cmd); + ublk_cancel_cmd(ubq, io, issue_flags); - if (io->flags & UBLK_IO_FLAG_ACTIVE) - io_uring_cmd_complete_in_task(io->cmd, - ublk_cmd_cancel_cb); + if (need_schedule) { + if (ublk_can_use_recovery(ub)) + schedule_work(&ub->quiesce_work); + else + schedule_work(&ub->stop_work); } +} - /* all io commands are canceled */ - ubq->nr_io_ready = 0; +static inline bool ublk_queue_ready(struct ublk_queue *ubq) +{ + return ubq->nr_io_ready == ubq->q_depth; +} + +static void ublk_cancel_queue(struct ublk_queue *ubq) +{ + int i; + + for (i = 0; i < ubq->q_depth; i++) + ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED); } /* Cancel all pending commands, must be called after del_gendisk() returns */ @@ -1545,16 +1574,6 @@ static void __ublk_quiesce_dev(struct ublk_device *ub) blk_mq_quiesce_queue(ub->ub_disk->queue); ublk_wait_tagset_rqs_idle(ub); ub->dev_info.state = UBLK_S_DEV_QUIESCED; - ublk_cancel_dev(ub); - /* we are going to release task_struct of ubq_daemon and resets - * ->ubq_daemon to NULL. So in monitor_work, check on ubq_daemon causes UAF. - * Besides, monitor_work is not necessary in QUIESCED state since we have - * already scheduled quiesce_work and quiesced all ubqs. - * - * Do not let monitor_work schedule itself if state it QUIESCED. And we cancel - * it here and re-schedule it in END_USER_RECOVERY to avoid UAF. - */ - cancel_delayed_work_sync(&ub->monitor_work); } static void ublk_quiesce_work_fn(struct work_struct *work) @@ -1568,6 +1587,7 @@ static void ublk_quiesce_work_fn(struct work_struct *work) __ublk_quiesce_dev(ub); unlock: mutex_unlock(&ub->mutex); + ublk_cancel_dev(ub); } static void ublk_unquiesce_dev(struct ublk_device *ub) @@ -1593,6 +1613,8 @@ static void ublk_unquiesce_dev(struct ublk_device *ub) static void ublk_stop_dev(struct ublk_device *ub) { + struct gendisk *disk; + mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_DEAD) goto unlock; @@ -1602,14 +1624,18 @@ static void ublk_stop_dev(struct ublk_device *ub) ublk_unquiesce_dev(ub); } del_gendisk(ub->ub_disk); + + /* Sync with ublk_abort_queue() by holding the lock */ + spin_lock(&ub->lock); + disk = ub->ub_disk; ub->dev_info.state = UBLK_S_DEV_DEAD; ub->dev_info.ublksrv_pid = -1; - put_disk(ub->ub_disk); ub->ub_disk = NULL; + spin_unlock(&ub->lock); + put_disk(disk); unlock: - ublk_cancel_dev(ub); mutex_unlock(&ub->mutex); - cancel_delayed_work_sync(&ub->monitor_work); + ublk_cancel_dev(ub); } /* device can only be started after all IOs are ready */ @@ -1660,6 +1686,21 @@ static inline void ublk_fill_io_cmd(struct ublk_io *io, io->addr = buf_addr; } +static inline void ublk_prep_cancel(struct io_uring_cmd *cmd, + unsigned int issue_flags, + struct ublk_queue *ubq, unsigned int tag) +{ + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + + /* + * Safe to refer to @ubq since ublk_queue won't be died until its + * commands are completed + */ + pdu->ubq = ubq; + pdu->tag = tag; + io_uring_cmd_mark_cancelable(cmd, issue_flags); +} + static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags, const struct ublksrv_io_cmd *ub_cmd) @@ -1775,6 +1816,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, default: goto out; } + ublk_prep_cancel(cmd, issue_flags, ubq, tag); return -EIOCBQUEUED; out: @@ -1814,7 +1856,8 @@ fail_put: return NULL; } -static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd, + unsigned int issue_flags) { /* * Not necessary for async retry, but let's keep it simple and always @@ -1828,9 +1871,33 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) .addr = READ_ONCE(ub_src->addr) }; + WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED); + return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd); } +static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + ublk_ch_uring_cmd_local(cmd, issue_flags); +} + +static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + if (unlikely(issue_flags & IO_URING_F_CANCEL)) { + ublk_uring_cmd_cancel_fn(cmd, issue_flags); + return 0; + } + + /* well-implemented server won't run into unlocked */ + if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { + io_uring_cmd_complete_in_task(cmd, ublk_ch_uring_cmd_cb); + return -EIOCBQUEUED; + } + + return ublk_ch_uring_cmd_local(cmd, issue_flags); +} + static inline bool ublk_check_ubuf_dir(const struct request *req, int ubuf_dir) { @@ -1962,6 +2029,7 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id) void *ptr; int size; + spin_lock_init(&ubq->cancel_lock); ubq->flags = ub->dev_info.flags; ubq->q_id = q_id; ubq->q_depth = ub->dev_info.queue_depth; @@ -2026,7 +2094,8 @@ static int ublk_alloc_dev_number(struct ublk_device *ub, int idx) if (err == -ENOSPC) err = -EEXIST; } else { - err = idr_alloc(&ublk_index_idr, ub, 0, 0, GFP_NOWAIT); + err = idr_alloc(&ublk_index_idr, ub, 0, UBLK_MAX_UBLKS, + GFP_NOWAIT); } spin_unlock(&ublk_idr_lock); @@ -2151,8 +2220,6 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; - schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD); - mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_LIVE || test_bit(UB_STATE_USED, &ub->state)) { @@ -2305,6 +2372,12 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) return -EINVAL; } + if (header->dev_id != U32_MAX && header->dev_id >= UBLK_MAX_UBLKS) { + pr_warn("%s: dev id is too large. Max supported is %d\n", + __func__, UBLK_MAX_UBLKS - 1); + return -EINVAL; + } + ublk_dump_dev_info(&info); ret = mutex_lock_killable(&ublk_ctl_mutex); @@ -2320,10 +2393,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) if (!ub) goto out_unlock; mutex_init(&ub->mutex); - spin_lock_init(&ub->mm_lock); + spin_lock_init(&ub->lock); INIT_WORK(&ub->quiesce_work, ublk_quiesce_work_fn); INIT_WORK(&ub->stop_work, ublk_stop_work_fn); - INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work); ret = ublk_alloc_dev_number(ub, header->dev_id); if (ret < 0) @@ -2569,13 +2641,15 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) int i; WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq))); + /* All old ioucmds have to be completed */ - WARN_ON_ONCE(ubq->nr_io_ready); + ubq->nr_io_ready = 0; /* old daemon is PF_EXITING, put it now */ put_task_struct(ubq->ubq_daemon); /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ ubq->ubq_daemon = NULL; ubq->timeout = false; + ubq->canceling = false; for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; @@ -2661,7 +2735,6 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, __func__, header->dev_id); blk_mq_kick_requeue_list(ub->ub_disk->queue); ub->dev_info.state = UBLK_S_DEV_LIVE; - schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD); ret = 0; out_unlock: mutex_unlock(&ub->mutex); @@ -2932,7 +3005,22 @@ static void __exit ublk_exit(void) module_init(ublk_init); module_exit(ublk_exit); -module_param(ublks_max, int, 0444); +static int ublk_set_max_ublks(const char *buf, const struct kernel_param *kp) +{ + return param_set_uint_minmax(buf, kp, 0, UBLK_MAX_UBLKS); +} + +static int ublk_get_max_ublks(char *buf, const struct kernel_param *kp) +{ + return sysfs_emit(buf, "%u\n", ublks_max); +} + +static const struct kernel_param_ops ublk_max_ublks_ops = { + .set = ublk_set_max_ublks, + .get = ublk_get_max_ublks, +}; + +module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644); MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)"); MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>"); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 225c86c74d..2c846eed5a 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -470,8 +470,6 @@ static bool virtblk_prep_rq_batch(struct request *req) struct virtio_blk *vblk = req->mq_hctx->queue->queuedata; struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - req->mq_hctx->tags->rqs[req->tag] = req; - return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK; } @@ -1629,14 +1627,15 @@ static int virtblk_freeze(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + /* Ensure no requests in virtqueues before deleting vqs. */ + blk_mq_freeze_queue(vblk->disk->queue); + /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); - blk_mq_quiesce_queue(vblk->disk->queue); - vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -1654,7 +1653,7 @@ static int virtblk_restore(struct virtio_device *vdev) virtio_device_ready(vdev); - blk_mq_unquiesce_queue(vblk->disk->queue); + blk_mq_unfreeze_queue(vblk->disk->queue); return 0; } #endif diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index c362f4ad80..4defd7f387 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -465,7 +465,7 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, } req->dev = vbd->pdevice; - req->bdev = vbd->bdev; + req->bdev = vbd->bdev_handle->bdev; rc = 0; out: @@ -969,7 +969,7 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring, int err = 0; int status = BLKIF_RSP_OKAY; struct xen_blkif *blkif = ring->blkif; - struct block_device *bdev = blkif->vbd.bdev; + struct block_device *bdev = blkif->vbd.bdev_handle->bdev; struct phys_req preq; xen_blkif_get(blkif); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 40f67bfc05..5ff50e76ce 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -221,7 +221,7 @@ struct xen_vbd { unsigned char type; /* phys device that this vbd maps to. */ u32 pdevice; - struct block_device *bdev; + struct bdev_handle *bdev_handle; /* Cached size parameter. */ sector_t size; unsigned int flush_support:1; @@ -360,7 +360,7 @@ struct pending_req { }; -#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev) +#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev_handle->bdev) #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define xen_blkif_put(_b) \ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index bb66178c43..e34219ea2b 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -81,7 +81,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) int i; /* Not ready to connect? */ - if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev) + if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev_handle) return; /* Already connected? */ @@ -99,12 +99,13 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) return; } - err = sync_blockdev(blkif->vbd.bdev); + err = sync_blockdev(blkif->vbd.bdev_handle->bdev); if (err) { xenbus_dev_error(blkif->be->dev, err, "block flush"); return; } - invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); + invalidate_inode_pages2( + blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping); for (i = 0; i < blkif->nr_rings; i++) { ring = &blkif->rings[i]; @@ -472,9 +473,9 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev) static void xen_vbd_free(struct xen_vbd *vbd) { - if (vbd->bdev) - blkdev_put(vbd->bdev, NULL); - vbd->bdev = NULL; + if (vbd->bdev_handle) + bdev_release(vbd->bdev_handle); + vbd->bdev_handle = NULL; } static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, @@ -482,7 +483,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, int cdrom) { struct xen_vbd *vbd; - struct block_device *bdev; + struct bdev_handle *bdev_handle; vbd = &blkif->vbd; vbd->handle = handle; @@ -491,17 +492,17 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, vbd->pdevice = MKDEV(major, minor); - bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + bdev_handle = bdev_open_by_dev(vbd->pdevice, vbd->readonly ? BLK_OPEN_READ : BLK_OPEN_WRITE, NULL, NULL); - if (IS_ERR(bdev)) { + if (IS_ERR(bdev_handle)) { pr_warn("xen_vbd_create: device %08x could not be opened\n", vbd->pdevice); return -ENOENT; } - vbd->bdev = bdev; - if (vbd->bdev->bd_disk == NULL) { + vbd->bdev_handle = bdev_handle; + if (vbd->bdev_handle->bdev->bd_disk == NULL) { pr_warn("xen_vbd_create: device %08x doesn't exist\n", vbd->pdevice); xen_vbd_free(vbd); @@ -509,14 +510,14 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, } vbd->size = vbd_sz(vbd); - if (cdrom || disk_to_cdi(vbd->bdev->bd_disk)) + if (cdrom || disk_to_cdi(vbd->bdev_handle->bdev->bd_disk)) vbd->type |= VDISK_CDROM; - if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + if (vbd->bdev_handle->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) vbd->type |= VDISK_REMOVABLE; - if (bdev_write_cache(bdev)) + if (bdev_write_cache(bdev_handle->bdev)) vbd->flush_support = true; - if (bdev_max_secure_erase_sectors(bdev)) + if (bdev_max_secure_erase_sectors(bdev_handle->bdev)) vbd->discard_secure = true; pr_debug("Successful creation of handle=%04x (dom=%u)\n", @@ -569,7 +570,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info struct xen_blkif *blkif = be->blkif; int err; int state = 0; - struct block_device *bdev = be->blkif->vbd.bdev; + struct block_device *bdev = be->blkif->vbd.bdev_handle->bdev; if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1)) return; @@ -930,15 +931,16 @@ again: goto abort; } err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", - (unsigned long) - bdev_logical_block_size(be->blkif->vbd.bdev)); + (unsigned long)bdev_logical_block_size( + be->blkif->vbd.bdev_handle->bdev)); if (err) { xenbus_dev_fatal(dev, err, "writing %s/sector-size", dev->nodename); goto abort; } err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", - bdev_physical_block_size(be->blkif->vbd.bdev)); + bdev_physical_block_size( + be->blkif->vbd.bdev_handle->bdev)); if (err) xenbus_dev_error(dev, err, "writing %s/physical-sector-size", dev->nodename); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 06673c6ca2..d77d3664ca 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -414,17 +414,14 @@ static ssize_t writeback_limit_show(struct device *dev, static void reset_bdev(struct zram *zram) { - struct block_device *bdev; - if (!zram->backing_dev) return; - bdev = zram->bdev; - blkdev_put(bdev, zram); + bdev_release(zram->bdev_handle); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; - zram->bdev = NULL; + zram->bdev_handle = NULL; zram->disk->fops = &zram_devops; kvfree(zram->bitmap); zram->bitmap = NULL; @@ -470,7 +467,7 @@ static ssize_t backing_dev_store(struct device *dev, struct address_space *mapping; unsigned int bitmap_sz; unsigned long nr_pages, *bitmap = NULL; - struct block_device *bdev = NULL; + struct bdev_handle *bdev_handle = NULL; int err; struct zram *zram = dev_to_zram(dev); @@ -507,11 +504,11 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - bdev = blkdev_get_by_dev(inode->i_rdev, BLK_OPEN_READ | BLK_OPEN_WRITE, - zram, NULL); - if (IS_ERR(bdev)) { - err = PTR_ERR(bdev); - bdev = NULL; + bdev_handle = bdev_open_by_dev(inode->i_rdev, + BLK_OPEN_READ | BLK_OPEN_WRITE, zram, NULL); + if (IS_ERR(bdev_handle)) { + err = PTR_ERR(bdev_handle); + bdev_handle = NULL; goto out; } @@ -525,7 +522,7 @@ static ssize_t backing_dev_store(struct device *dev, reset_bdev(zram); - zram->bdev = bdev; + zram->bdev_handle = bdev_handle; zram->backing_dev = backing_dev; zram->bitmap = bitmap; zram->nr_pages = nr_pages; @@ -538,8 +535,8 @@ static ssize_t backing_dev_store(struct device *dev, out: kvfree(bitmap); - if (bdev) - blkdev_put(bdev, zram); + if (bdev_handle) + bdev_release(bdev_handle); if (backing_dev) filp_close(backing_dev, NULL); @@ -581,7 +578,7 @@ static void read_from_bdev_async(struct zram *zram, struct page *page, { struct bio *bio; - bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); + bio = bio_alloc(zram->bdev_handle->bdev, 1, parent->bi_opf, GFP_NOIO); bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); __bio_add_page(bio, page, PAGE_SIZE, 0); bio_chain(bio, parent); @@ -697,7 +694,7 @@ static ssize_t writeback_store(struct device *dev, continue; } - bio_init(&bio, zram->bdev, &bio_vec, 1, + bio_init(&bio, zram->bdev_handle->bdev, &bio_vec, 1, REQ_OP_WRITE | REQ_SYNC); bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); __bio_add_page(&bio, page, PAGE_SIZE, 0); @@ -779,7 +776,7 @@ static void zram_sync_read(struct work_struct *work) struct bio_vec bv; struct bio bio; - bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); + bio_init(&bio, zw->zram->bdev_handle->bdev, &bv, 1, REQ_OP_READ); bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); zw->error = submit_bio_wait(&bio); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index ca7a15bd48..d090753f97 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -132,7 +132,7 @@ struct zram { spinlock_t wb_limit_lock; bool wb_limit_enable; u64 bd_wb_limit; - struct block_device *bdev; + struct bdev_handle *bdev_handle; unsigned long *bitmap; unsigned long nr_pages; #endif |