summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/block/aoe/aoeblk.c5
-rw-r--r--drivers/block/aoe/aoenet.c3
-rw-r--r--drivers/block/ataflop.c4
-rw-r--r--drivers/block/drbd/drbd_int.h4
-rw-r--r--drivers/block/drbd/drbd_nl.c65
-rw-r--r--drivers/block/floppy.c4
-rw-r--r--drivers/block/null_blk/main.c47
-rw-r--r--drivers/block/pktcdvd.c76
-rw-r--r--drivers/block/rnbd/rnbd-srv.c46
-rw-r--r--drivers/block/rnbd/rnbd-srv.h2
-rw-r--r--drivers/block/ublk_drv.c342
-rw-r--r--drivers/block/virtio_blk.c9
-rw-r--r--drivers/block/xen-blkback/blkback.c4
-rw-r--r--drivers/block/xen-blkback/common.h4
-rw-r--r--drivers/block/xen-blkback/xenbus.c40
-rw-r--r--drivers/block/zram/zram_drv.c31
-rw-r--r--drivers/block/zram/zram_drv.h2
17 files changed, 405 insertions, 283 deletions
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index cf68837561..37eff1c974 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -333,6 +333,7 @@ aoeblk_gdalloc(void *vp)
struct gendisk *gd;
mempool_t *mp;
struct blk_mq_tag_set *set;
+ sector_t ssize;
ulong flags;
int late = 0;
int err;
@@ -395,7 +396,7 @@ aoeblk_gdalloc(void *vp)
gd->minors = AOE_PARTITIONS;
gd->fops = &aoe_bdops;
gd->private_data = d;
- set_capacity(gd, d->ssize);
+ ssize = d->ssize;
snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
d->aoemajor, d->aoeminor);
@@ -404,6 +405,8 @@ aoeblk_gdalloc(void *vp)
spin_unlock_irqrestore(&d->lock, flags);
+ set_capacity(gd, ssize);
+
err = device_add_disk(NULL, gd, aoe_attr_groups);
if (err)
goto out_disk_cleanup;
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index 63773a9058..c51ea95bc2 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -39,8 +39,7 @@ static struct ktstate kts;
#ifndef MODULE
static int __init aoe_iflist_setup(char *str)
{
- strncpy(aoe_iflist, str, IFLISTSZ);
- aoe_iflist[IFLISTSZ - 1] = '\0';
+ strscpy(aoe_iflist, str, IFLISTSZ);
return 1;
}
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index cd738cab72..5094920779 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1760,8 +1760,10 @@ static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
/* invalidate the buffer track to force a reread */
BufferDrive = -1;
set_bit(drive, &fake_change);
- if (disk_check_media_change(disk))
+ if (disk_check_media_change(disk)) {
+ bdev_mark_dead(disk->part0, true);
floppy_revalidate(disk);
+ }
return 0;
default:
return -EINVAL;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index a30a5ed811..c21e373275 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -524,7 +524,9 @@ struct drbd_md {
struct drbd_backing_dev {
struct block_device *backing_bdev;
+ struct bdev_handle *backing_bdev_handle;
struct block_device *md_bdev;
+ struct bdev_handle *md_bdev_handle;
struct drbd_md md;
struct disk_conf *disk_conf; /* RCU, for updates: resource->conf_update */
sector_t known_size; /* last known size of that backing device */
@@ -553,7 +555,7 @@ struct fifo_buffer {
unsigned int head_index;
unsigned int size;
int total; /* sum of all values */
- int values[];
+ int values[] __counted_by(size);
};
extern struct fifo_buffer *fifo_alloc(unsigned int fifo_size);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index d3538bd83f..43747a1aae 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -82,7 +82,7 @@ static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */
DEFINE_MUTEX(notification_mutex);
-/* used blkdev_get_by_path, to claim our meta data device(s) */
+/* used bdev_open_by_path, to claim our meta data device(s) */
static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
@@ -1635,43 +1635,45 @@ success:
return 0;
}
-static struct block_device *open_backing_dev(struct drbd_device *device,
+static struct bdev_handle *open_backing_dev(struct drbd_device *device,
const char *bdev_path, void *claim_ptr, bool do_bd_link)
{
- struct block_device *bdev;
+ struct bdev_handle *handle;
int err = 0;
- bdev = blkdev_get_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE,
- claim_ptr, NULL);
- if (IS_ERR(bdev)) {
+ handle = bdev_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ claim_ptr, NULL);
+ if (IS_ERR(handle)) {
drbd_err(device, "open(\"%s\") failed with %ld\n",
- bdev_path, PTR_ERR(bdev));
- return bdev;
+ bdev_path, PTR_ERR(handle));
+ return handle;
}
if (!do_bd_link)
- return bdev;
+ return handle;
- err = bd_link_disk_holder(bdev, device->vdisk);
+ err = bd_link_disk_holder(handle->bdev, device->vdisk);
if (err) {
- blkdev_put(bdev, claim_ptr);
+ bdev_release(handle);
drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
bdev_path, err);
- bdev = ERR_PTR(err);
+ handle = ERR_PTR(err);
}
- return bdev;
+ return handle;
}
static int open_backing_devices(struct drbd_device *device,
struct disk_conf *new_disk_conf,
struct drbd_backing_dev *nbc)
{
- struct block_device *bdev;
+ struct bdev_handle *handle;
- bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true);
- if (IS_ERR(bdev))
+ handle = open_backing_dev(device, new_disk_conf->backing_dev, device,
+ true);
+ if (IS_ERR(handle))
return ERR_OPEN_DISK;
- nbc->backing_bdev = bdev;
+ nbc->backing_bdev = handle->bdev;
+ nbc->backing_bdev_handle = handle;
/*
* meta_dev_idx >= 0: external fixed size, possibly multiple
@@ -1681,7 +1683,7 @@ static int open_backing_devices(struct drbd_device *device,
* should check it for you already; but if you don't, or
* someone fooled it, we need to double check here)
*/
- bdev = open_backing_dev(device, new_disk_conf->meta_dev,
+ handle = open_backing_dev(device, new_disk_conf->meta_dev,
/* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
* if potentially shared with other drbd minors */
(new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
@@ -1689,20 +1691,21 @@ static int open_backing_devices(struct drbd_device *device,
* as would happen with internal metadata. */
(new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
- if (IS_ERR(bdev))
+ if (IS_ERR(handle))
return ERR_OPEN_MD_DISK;
- nbc->md_bdev = bdev;
+ nbc->md_bdev = handle->bdev;
+ nbc->md_bdev_handle = handle;
return NO_ERROR;
}
-static void close_backing_dev(struct drbd_device *device, struct block_device *bdev,
- void *claim_ptr, bool do_bd_unlink)
+static void close_backing_dev(struct drbd_device *device,
+ struct bdev_handle *handle, bool do_bd_unlink)
{
- if (!bdev)
+ if (!handle)
return;
if (do_bd_unlink)
- bd_unlink_disk_holder(bdev, device->vdisk);
- blkdev_put(bdev, claim_ptr);
+ bd_unlink_disk_holder(handle->bdev, device->vdisk);
+ bdev_release(handle);
}
void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
@@ -1710,11 +1713,9 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *
if (ldev == NULL)
return;
- close_backing_dev(device, ldev->md_bdev,
- ldev->md.meta_dev_idx < 0 ?
- (void *)device : (void *)drbd_m_holder,
+ close_backing_dev(device, ldev->md_bdev_handle,
ldev->md_bdev != ldev->backing_bdev);
- close_backing_dev(device, ldev->backing_bdev, device, true);
+ close_backing_dev(device, ldev->backing_bdev_handle, true);
kfree(ldev->disk_conf);
kfree(ldev);
@@ -2130,11 +2131,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
fail:
conn_reconfig_done(connection);
if (nbc) {
- close_backing_dev(device, nbc->md_bdev,
- nbc->disk_conf->meta_dev_idx < 0 ?
- (void *)device : (void *)drbd_m_holder,
+ close_backing_dev(device, nbc->md_bdev_handle,
nbc->md_bdev != nbc->backing_bdev);
- close_backing_dev(device, nbc->backing_bdev, device, true);
+ close_backing_dev(device, nbc->backing_bdev_handle, true);
kfree(nbc);
}
kfree(new_disk_conf);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index ea4eb88a2e..11114a5d9e 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3215,8 +3215,10 @@ static int invalidate_drive(struct gendisk *disk)
/* invalidate the buffer track to force a reread */
set_bit((long)disk->private_data, &fake_change);
process_fd_request();
- if (disk_check_media_change(disk))
+ if (disk_check_media_change(disk)) {
+ bdev_mark_dead(disk->part0, true);
floppy_revalidate(disk);
+ }
return 0;
}
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 9544746de1..13ed446b5e 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1464,19 +1464,13 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
return BLK_STS_OK;
}
-static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
- sector_t nr_sectors, enum req_op op)
+static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
+ sector_t nr_sectors, enum req_op op)
{
struct nullb_device *dev = cmd->nq->dev;
struct nullb *nullb = dev->nullb;
blk_status_t sts;
- if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
- sts = null_handle_throttled(cmd);
- if (sts != BLK_STS_OK)
- return sts;
- }
-
if (op == REQ_OP_FLUSH) {
cmd->error = errno_to_blk_status(null_handle_flush(nullb));
goto out;
@@ -1493,7 +1487,6 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
out:
nullb_complete_cmd(cmd);
- return BLK_STS_OK;
}
static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
@@ -1724,8 +1717,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
cmd->fake_timeout = should_timeout_request(rq) ||
blk_should_fake_timeout(rq->q);
- blk_mq_start_request(rq);
-
if (should_requeue_request(rq)) {
/*
* Alternate between hitting the core BUSY path, and the
@@ -1738,6 +1729,15 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK;
}
+ if (test_bit(NULLB_DEV_FL_THROTTLED, &nq->dev->flags)) {
+ blk_status_t sts = null_handle_throttled(cmd);
+
+ if (sts != BLK_STS_OK)
+ return sts;
+ }
+
+ blk_mq_start_request(rq);
+
if (is_poll) {
spin_lock(&nq->poll_lock);
list_add_tail(&rq->queuelist, &nq->poll_list);
@@ -1747,7 +1747,27 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
if (cmd->fake_timeout)
return BLK_STS_OK;
- return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
+ null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
+ return BLK_STS_OK;
+}
+
+static void null_queue_rqs(struct request **rqlist)
+{
+ struct request *requeue_list = NULL;
+ struct request **requeue_lastp = &requeue_list;
+ struct blk_mq_queue_data bd = { };
+ blk_status_t ret;
+
+ do {
+ struct request *rq = rq_list_pop(rqlist);
+
+ bd.rq = rq;
+ ret = null_queue_rq(rq->mq_hctx, &bd);
+ if (ret != BLK_STS_OK)
+ rq_list_add_tail(&requeue_lastp, rq);
+ } while (!rq_list_empty(*rqlist));
+
+ *rqlist = requeue_list;
}
static void cleanup_queue(struct nullb_queue *nq)
@@ -1802,6 +1822,7 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq,
+ .queue_rqs = null_queue_rqs,
.complete = null_complete_rq,
.timeout = null_timeout_rq,
.poll = null_poll,
@@ -1946,7 +1967,7 @@ static int null_gendisk_register(struct nullb *nullb)
else
disk->fops = &null_bio_ops;
disk->private_data = nullb;
- strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+ strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
if (nullb->dev->zoned) {
int ret = null_register_zoned_dev(nullb);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index a1428538bd..d56d972aad 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -340,8 +340,8 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu
n += sysfs_emit_at(data, n, "%s %u:%u %u:%u\n",
pd->disk->disk_name,
MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev),
- MAJOR(pd->bdev->bd_dev),
- MINOR(pd->bdev->bd_dev));
+ MAJOR(pd->bdev_handle->bdev->bd_dev),
+ MINOR(pd->bdev_handle->bdev->bd_dev));
}
mutex_unlock(&ctl_mutex);
return n;
@@ -437,7 +437,8 @@ static int pkt_seq_show(struct seq_file *m, void *p)
char *msg;
int states[PACKET_NUM_STATES];
- seq_printf(m, "Writer %s mapped to %pg:\n", pd->disk->disk_name, pd->bdev);
+ seq_printf(m, "Writer %s mapped to %pg:\n", pd->disk->disk_name,
+ pd->bdev_handle->bdev);
seq_printf(m, "\nSettings:\n");
seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
@@ -714,7 +715,7 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod
*/
static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
{
- struct request_queue *q = bdev_get_queue(pd->bdev);
+ struct request_queue *q = bdev_get_queue(pd->bdev_handle->bdev);
struct scsi_cmnd *scmd;
struct request *rq;
int ret = 0;
@@ -1047,7 +1048,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
continue;
bio = pkt->r_bios[f];
- bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
+ bio_init(bio, pd->bdev_handle->bdev, bio->bi_inline_vecs, 1,
+ REQ_OP_READ);
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
@@ -1262,8 +1264,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
struct device *ddev = disk_to_dev(pd->disk);
int f;
- bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
- REQ_OP_WRITE);
+ bio_init(pkt->w_bio, pd->bdev_handle->bdev, pkt->w_bio->bi_inline_vecs,
+ pkt->frames, REQ_OP_WRITE);
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
@@ -2160,18 +2162,20 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
int ret;
long lba;
struct request_queue *q;
- struct block_device *bdev;
+ struct bdev_handle *bdev_handle;
/*
* We need to re-open the cdrom device without O_NONBLOCK to be able
* to read/write from/to it. It is already opened in O_NONBLOCK mode
* so open should not fail.
*/
- bdev = blkdev_get_by_dev(pd->bdev->bd_dev, BLK_OPEN_READ, pd, NULL);
- if (IS_ERR(bdev)) {
- ret = PTR_ERR(bdev);
+ bdev_handle = bdev_open_by_dev(pd->bdev_handle->bdev->bd_dev,
+ BLK_OPEN_READ, pd, NULL);
+ if (IS_ERR(bdev_handle)) {
+ ret = PTR_ERR(bdev_handle);
goto out;
}
+ pd->open_bdev_handle = bdev_handle;
ret = pkt_get_last_written(pd, &lba);
if (ret) {
@@ -2180,9 +2184,9 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
}
set_capacity(pd->disk, lba << 2);
- set_capacity_and_notify(pd->bdev->bd_disk, lba << 2);
+ set_capacity_and_notify(pd->bdev_handle->bdev->bd_disk, lba << 2);
- q = bdev_get_queue(pd->bdev);
+ q = bdev_get_queue(pd->bdev_handle->bdev);
if (write) {
ret = pkt_open_write(pd);
if (ret)
@@ -2214,7 +2218,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
return 0;
out_putdev:
- blkdev_put(bdev, pd);
+ bdev_release(bdev_handle);
out:
return ret;
}
@@ -2233,7 +2237,8 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
pkt_lock_door(pd, 0);
pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
- blkdev_put(pd->bdev, pd);
+ bdev_release(pd->open_bdev_handle);
+ pd->open_bdev_handle = NULL;
pkt_shrink_pktlist(pd);
}
@@ -2321,8 +2326,8 @@ static void pkt_end_io_read_cloned(struct bio *bio)
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
- struct bio *cloned_bio =
- bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set);
+ struct bio *cloned_bio = bio_alloc_clone(pd->bdev_handle->bdev, bio,
+ GFP_NOIO, &pkt_bio_set);
struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
psd->pd = pd;
@@ -2492,7 +2497,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
{
struct device *ddev = disk_to_dev(pd->disk);
int i;
- struct block_device *bdev;
+ struct bdev_handle *bdev_handle;
struct scsi_device *sdev;
if (pd->pkt_dev == dev) {
@@ -2503,8 +2508,9 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
struct pktcdvd_device *pd2 = pkt_devs[i];
if (!pd2)
continue;
- if (pd2->bdev->bd_dev == dev) {
- dev_err(ddev, "%pg already setup\n", pd2->bdev);
+ if (pd2->bdev_handle->bdev->bd_dev == dev) {
+ dev_err(ddev, "%pg already setup\n",
+ pd2->bdev_handle->bdev);
return -EBUSY;
}
if (pd2->pkt_dev == dev) {
@@ -2513,13 +2519,13 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
}
}
- bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY, NULL,
- NULL);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- sdev = scsi_device_from_queue(bdev->bd_disk->queue);
+ bdev_handle = bdev_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY,
+ NULL, NULL);
+ if (IS_ERR(bdev_handle))
+ return PTR_ERR(bdev_handle);
+ sdev = scsi_device_from_queue(bdev_handle->bdev->bd_disk->queue);
if (!sdev) {
- blkdev_put(bdev, NULL);
+ bdev_release(bdev_handle);
return -EINVAL;
}
put_device(&sdev->sdev_gendev);
@@ -2527,8 +2533,8 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
- pd->bdev = bdev;
- set_blocksize(bdev, CD_FRAMESIZE);
+ pd->bdev_handle = bdev_handle;
+ set_blocksize(bdev_handle->bdev, CD_FRAMESIZE);
pkt_init_queue(pd);
@@ -2540,11 +2546,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
}
proc_create_single_data(pd->disk->disk_name, 0, pkt_proc, pkt_seq_show, pd);
- dev_notice(ddev, "writer mapped to %pg\n", bdev);
+ dev_notice(ddev, "writer mapped to %pg\n", bdev_handle->bdev);
return 0;
out_mem:
- blkdev_put(bdev, NULL);
+ bdev_release(bdev_handle);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return -ENOMEM;
@@ -2599,9 +2605,9 @@ static unsigned int pkt_check_events(struct gendisk *disk,
if (!pd)
return 0;
- if (!pd->bdev)
+ if (!pd->bdev_handle)
return 0;
- attached_disk = pd->bdev->bd_disk;
+ attached_disk = pd->bdev_handle->bdev->bd_disk;
if (!attached_disk || !attached_disk->fops->check_events)
return 0;
return attached_disk->fops->check_events(attached_disk, clearing);
@@ -2686,7 +2692,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
goto out_mem2;
/* inherit events of the host device */
- disk->events = pd->bdev->bd_disk->events;
+ disk->events = pd->bdev_handle->bdev->bd_disk->events;
ret = add_disk(disk);
if (ret)
@@ -2751,7 +2757,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
pkt_debugfs_dev_remove(pd);
pkt_sysfs_dev_remove(pd);
- blkdev_put(pd->bdev, NULL);
+ bdev_release(pd->bdev_handle);
remove_proc_entry(pd->disk->disk_name, pkt_proc);
dev_notice(ddev, "writer unmapped\n");
@@ -2778,7 +2784,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
if (pd) {
- ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev);
+ ctrl_cmd->dev = new_encode_dev(pd->bdev_handle->bdev->bd_dev);
ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev);
} else {
ctrl_cmd->dev = 0;
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index c186df0ec6..ab78eab97d 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -145,7 +145,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
priv->sess_dev = sess_dev;
priv->id = id;
- bio = bio_alloc(sess_dev->bdev, 1,
+ bio = bio_alloc(sess_dev->bdev_handle->bdev, 1,
rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL);
if (bio_add_page(bio, virt_to_page(data), datalen,
offset_in_page(data)) != datalen) {
@@ -219,7 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
rnbd_put_sess_dev(sess_dev);
wait_for_completion(&dc); /* wait for inflights to drop to zero */
- blkdev_put(sess_dev->bdev, NULL);
+ bdev_release(sess_dev->bdev_handle);
mutex_lock(&sess_dev->dev->lock);
list_del(&sess_dev->dev_list);
if (!sess_dev->readonly)
@@ -534,7 +534,7 @@ rnbd_srv_get_or_create_srv_dev(struct block_device *bdev,
static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
struct rnbd_srv_sess_dev *sess_dev)
{
- struct block_device *bdev = sess_dev->bdev;
+ struct block_device *bdev = sess_dev->bdev_handle->bdev;
rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP);
rsp->device_id = cpu_to_le32(sess_dev->device_id);
@@ -559,7 +559,7 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
static struct rnbd_srv_sess_dev *
rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
const struct rnbd_msg_open *open_msg,
- struct block_device *bdev, bool readonly,
+ struct bdev_handle *handle, bool readonly,
struct rnbd_srv_dev *srv_dev)
{
struct rnbd_srv_sess_dev *sdev = rnbd_sess_dev_alloc(srv_sess);
@@ -571,7 +571,7 @@ rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
strscpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname));
- sdev->bdev = bdev;
+ sdev->bdev_handle = handle;
sdev->sess = srv_sess;
sdev->dev = srv_dev;
sdev->readonly = readonly;
@@ -585,6 +585,7 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess,
{
char *full_path;
char *a, *b;
+ int len;
full_path = kmalloc(PATH_MAX, GFP_KERNEL);
if (!full_path)
@@ -596,19 +597,19 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess,
*/
a = strnstr(dev_search_path, "%SESSNAME%", sizeof(dev_search_path));
if (a) {
- int len = a - dev_search_path;
+ len = a - dev_search_path;
len = snprintf(full_path, PATH_MAX, "%.*s/%s/%s", len,
dev_search_path, srv_sess->sessname, dev_name);
- if (len >= PATH_MAX) {
- pr_err("Too long path: %s, %s, %s\n",
- dev_search_path, srv_sess->sessname, dev_name);
- kfree(full_path);
- return ERR_PTR(-EINVAL);
- }
} else {
- snprintf(full_path, PATH_MAX, "%s/%s",
- dev_search_path, dev_name);
+ len = snprintf(full_path, PATH_MAX, "%s/%s",
+ dev_search_path, dev_name);
+ }
+ if (len >= PATH_MAX) {
+ pr_err("Too long path: %s, %s, %s\n",
+ dev_search_path, srv_sess->sessname, dev_name);
+ kfree(full_path);
+ return ERR_PTR(-EINVAL);
}
/* eliminitate duplicated slashes */
@@ -676,7 +677,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
struct rnbd_srv_dev *srv_dev;
struct rnbd_srv_sess_dev *srv_sess_dev;
const struct rnbd_msg_open *open_msg = msg;
- struct block_device *bdev;
+ struct bdev_handle *bdev_handle;
blk_mode_t open_flags = BLK_OPEN_READ;
char *full_path;
struct rnbd_msg_open_rsp *rsp = data;
@@ -714,15 +715,15 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
goto reject;
}
- bdev = blkdev_get_by_path(full_path, open_flags, NULL, NULL);
- if (IS_ERR(bdev)) {
- ret = PTR_ERR(bdev);
+ bdev_handle = bdev_open_by_path(full_path, open_flags, NULL, NULL);
+ if (IS_ERR(bdev_handle)) {
+ ret = PTR_ERR(bdev_handle);
pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %d\n",
full_path, srv_sess->sessname, ret);
goto free_path;
}
- srv_dev = rnbd_srv_get_or_create_srv_dev(bdev, srv_sess,
+ srv_dev = rnbd_srv_get_or_create_srv_dev(bdev_handle->bdev, srv_sess,
open_msg->access_mode);
if (IS_ERR(srv_dev)) {
pr_err("Opening device '%s' on session %s failed, creating srv_dev failed, err: %ld\n",
@@ -731,7 +732,8 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
goto blkdev_put;
}
- srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg, bdev,
+ srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg,
+ bdev_handle,
open_msg->access_mode == RNBD_ACCESS_RO,
srv_dev);
if (IS_ERR(srv_sess_dev)) {
@@ -747,7 +749,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
*/
mutex_lock(&srv_dev->lock);
if (!srv_dev->dev_kobj.state_in_sysfs) {
- ret = rnbd_srv_create_dev_sysfs(srv_dev, bdev);
+ ret = rnbd_srv_create_dev_sysfs(srv_dev, bdev_handle->bdev);
if (ret) {
mutex_unlock(&srv_dev->lock);
rnbd_srv_err(srv_sess_dev,
@@ -790,7 +792,7 @@ srv_dev_put:
}
rnbd_put_srv_dev(srv_dev);
blkdev_put:
- blkdev_put(bdev, NULL);
+ bdev_release(bdev_handle);
free_path:
kfree(full_path);
reject:
diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h
index 1027656ded..343cc682b6 100644
--- a/drivers/block/rnbd/rnbd-srv.h
+++ b/drivers/block/rnbd/rnbd-srv.h
@@ -46,7 +46,7 @@ struct rnbd_srv_dev {
struct rnbd_srv_sess_dev {
/* Entry inside rnbd_srv_dev struct */
struct list_head dev_list;
- struct block_device *bdev;
+ struct bdev_handle *bdev_handle;
struct rnbd_srv_session *sess;
struct rnbd_srv_dev *dev;
struct kobject kobj;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 630ddfe665..83600b45e1 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -75,6 +75,7 @@ struct ublk_rq_data {
struct ublk_uring_cmd_pdu {
struct ublk_queue *ubq;
+ u16 tag;
};
/*
@@ -115,6 +116,9 @@ struct ublk_uring_cmd_pdu {
*/
#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
+/* atomic RW with ubq->cancel_lock */
+#define UBLK_IO_FLAG_CANCELED 0x80000000
+
struct ublk_io {
/* userspace buffer address from io cmd */
__u64 addr;
@@ -138,13 +142,13 @@ struct ublk_queue {
unsigned int max_io_sz;
bool force_abort;
bool timeout;
+ bool canceling;
unsigned short nr_io_ready; /* how many ios setup */
+ spinlock_t cancel_lock;
struct ublk_device *dev;
struct ublk_io ios[];
};
-#define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ)
-
struct ublk_device {
struct gendisk *ub_disk;
@@ -166,7 +170,7 @@ struct ublk_device {
struct mutex mutex;
- spinlock_t mm_lock;
+ spinlock_t lock;
struct mm_struct *mm;
struct ublk_params params;
@@ -175,11 +179,6 @@ struct ublk_device {
unsigned int nr_queues_ready;
unsigned int nr_privileged_daemon;
- /*
- * Our ubq->daemon may be killed without any notification, so
- * monitor each queue's daemon periodically
- */
- struct delayed_work monitor_work;
struct work_struct quiesce_work;
struct work_struct stop_work;
};
@@ -190,10 +189,11 @@ struct ublk_params_header {
__u32 types;
};
+static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq);
+
static inline unsigned int ublk_req_build_flags(struct request *req);
static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
int tag);
-
static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub)
{
return ub->dev_info.flags & UBLK_F_USER_COPY;
@@ -470,6 +470,7 @@ static DEFINE_MUTEX(ublk_ctl_mutex);
* It can be extended to one per-user limit in future or even controlled
* by cgroup.
*/
+#define UBLK_MAX_UBLKS UBLK_MINORS
static unsigned int ublks_max = 64;
static unsigned int ublks_added; /* protected by ublk_ctl_mutex */
@@ -1083,13 +1084,10 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
{
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
- if (!(io->flags & UBLK_IO_FLAG_ABORTED)) {
- io->flags |= UBLK_IO_FLAG_ABORTED;
- if (ublk_queue_can_use_recovery_reissue(ubq))
- blk_mq_requeue_request(req, false);
- else
- ublk_put_req_ref(ubq, req);
- }
+ if (ublk_queue_can_use_recovery_reissue(ubq))
+ blk_mq_requeue_request(req, false);
+ else
+ ublk_put_req_ref(ubq, req);
}
static void ubq_complete_io_cmd(struct ublk_io *io, int res,
@@ -1118,8 +1116,6 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
blk_mq_requeue_request(rq, false);
else
blk_mq_end_request(rq, BLK_STS_IOERR);
-
- mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0);
}
static inline void __ublk_rq_task_work(struct request *req,
@@ -1212,15 +1208,6 @@ static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
__ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
}
-static inline void ublk_abort_io_cmds(struct ublk_queue *ubq)
-{
- struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
- struct ublk_rq_data *data, *tmp;
-
- llist_for_each_entry_safe(data, tmp, io_cmds, node)
- __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
-}
-
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
{
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
@@ -1232,38 +1219,19 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
{
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
- struct ublk_io *io;
- if (!llist_add(&data->node, &ubq->io_cmds))
- return;
+ if (llist_add(&data->node, &ubq->io_cmds)) {
+ struct ublk_io *io = &ubq->ios[rq->tag];
- io = &ubq->ios[rq->tag];
- /*
- * If the check pass, we know that this is a re-issued request aborted
- * previously in monitor_work because the ubq_daemon(cmd's task) is
- * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
- * because this ioucmd's io_uring context may be freed now if no inflight
- * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
- *
- * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
- * the tag). Then the request is re-started(allocating the tag) and we are here.
- * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
- * guarantees that here is a re-issued request aborted previously.
- */
- if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
- ublk_abort_io_cmds(ubq);
- } else {
- struct io_uring_cmd *cmd = io->cmd;
- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
-
- pdu->ubq = ubq;
- io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
+ io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
}
}
static enum blk_eh_timer_return ublk_timeout(struct request *rq)
{
struct ublk_queue *ubq = rq->mq_hctx->driver_data;
+ unsigned int nr_inflight = 0;
+ int i;
if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) {
if (!ubq->timeout) {
@@ -1274,6 +1242,29 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
return BLK_EH_DONE;
}
+ if (!ubq_daemon_is_dying(ubq))
+ return BLK_EH_RESET_TIMER;
+
+ for (i = 0; i < ubq->q_depth; i++) {
+ struct ublk_io *io = &ubq->ios[i];
+
+ if (!(io->flags & UBLK_IO_FLAG_ACTIVE))
+ nr_inflight++;
+ }
+
+ /* cancelable uring_cmd can't help us if all commands are in-flight */
+ if (nr_inflight == ubq->q_depth) {
+ struct ublk_device *ub = ubq->dev;
+
+ if (ublk_abort_requests(ub, ubq)) {
+ if (ublk_can_use_recovery(ub))
+ schedule_work(&ub->quiesce_work);
+ else
+ schedule_work(&ub->stop_work);
+ }
+ return BLK_EH_DONE;
+ }
+
return BLK_EH_RESET_TIMER;
}
@@ -1301,13 +1292,12 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ublk_queue_can_use_recovery(ubq) && unlikely(ubq->force_abort))
return BLK_STS_IOERR;
- blk_mq_start_request(bd->rq);
-
- if (unlikely(ubq_daemon_is_dying(ubq))) {
+ if (unlikely(ubq->canceling)) {
__ublk_abort_rq(ubq, rq);
return BLK_STS_OK;
}
+ blk_mq_start_request(bd->rq);
ublk_queue_cmd(ubq, rq);
return BLK_STS_OK;
@@ -1357,12 +1347,12 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
unsigned long pfn, end, phys_off = vma->vm_pgoff << PAGE_SHIFT;
int q_id, ret = 0;
- spin_lock(&ub->mm_lock);
+ spin_lock(&ub->lock);
if (!ub->mm)
ub->mm = current->mm;
if (current->mm != ub->mm)
ret = -EINVAL;
- spin_unlock(&ub->mm_lock);
+ spin_unlock(&ub->lock);
if (ret)
return ret;
@@ -1411,17 +1401,14 @@ static void ublk_commit_completion(struct ublk_device *ub,
}
/*
- * When ->ubq_daemon is exiting, either new request is ended immediately,
- * or any queued io command is drained, so it is safe to abort queue
- * lockless
+ * Called from ubq_daemon context via cancel fn, meantime quiesce ublk
+ * blk-mq queue, so we are called exclusively with blk-mq and ubq_daemon
+ * context, so everything is serialized.
*/
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
{
int i;
- if (!ublk_get_device(ub))
- return;
-
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
@@ -1433,72 +1420,114 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
* will do it
*/
rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
- if (rq)
+ if (rq && blk_mq_request_started(rq)) {
+ io->flags |= UBLK_IO_FLAG_ABORTED;
__ublk_fail_req(ubq, io, rq);
+ }
}
}
- ublk_put_device(ub);
}
-static void ublk_daemon_monitor_work(struct work_struct *work)
+static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)
{
- struct ublk_device *ub =
- container_of(work, struct ublk_device, monitor_work.work);
- int i;
+ struct gendisk *disk;
- for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
- struct ublk_queue *ubq = ublk_get_queue(ub, i);
+ spin_lock(&ubq->cancel_lock);
+ if (ubq->canceling) {
+ spin_unlock(&ubq->cancel_lock);
+ return false;
+ }
+ ubq->canceling = true;
+ spin_unlock(&ubq->cancel_lock);
- if (ubq_daemon_is_dying(ubq)) {
- if (ublk_queue_can_use_recovery(ubq))
- schedule_work(&ub->quiesce_work);
- else
- schedule_work(&ub->stop_work);
+ spin_lock(&ub->lock);
+ disk = ub->ub_disk;
+ if (disk)
+ get_device(disk_to_dev(disk));
+ spin_unlock(&ub->lock);
- /* abort queue is for making forward progress */
- ublk_abort_queue(ub, ubq);
- }
- }
+ /* Our disk has been dead */
+ if (!disk)
+ return false;
- /*
- * We can't schedule monitor work after ub's state is not UBLK_S_DEV_LIVE.
- * after ublk_remove() or __ublk_quiesce_dev() is started.
- *
- * No need ub->mutex, monitor work are canceled after state is marked
- * as not LIVE, so new state is observed reliably.
- */
- if (ub->dev_info.state == UBLK_S_DEV_LIVE)
- schedule_delayed_work(&ub->monitor_work,
- UBLK_DAEMON_MONITOR_PERIOD);
-}
+ /* Now we are serialized with ublk_queue_rq() */
+ blk_mq_quiesce_queue(disk->queue);
+ /* abort queue is for making forward progress */
+ ublk_abort_queue(ub, ubq);
+ blk_mq_unquiesce_queue(disk->queue);
+ put_device(disk_to_dev(disk));
-static inline bool ublk_queue_ready(struct ublk_queue *ubq)
-{
- return ubq->nr_io_ready == ubq->q_depth;
+ return true;
}
-static void ublk_cmd_cancel_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
+static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
+ unsigned int issue_flags)
{
- io_uring_cmd_done(cmd, UBLK_IO_RES_ABORT, 0, issue_flags);
+ bool done;
+
+ if (!(io->flags & UBLK_IO_FLAG_ACTIVE))
+ return;
+
+ spin_lock(&ubq->cancel_lock);
+ done = !!(io->flags & UBLK_IO_FLAG_CANCELED);
+ if (!done)
+ io->flags |= UBLK_IO_FLAG_CANCELED;
+ spin_unlock(&ubq->cancel_lock);
+
+ if (!done)
+ io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, issue_flags);
}
-static void ublk_cancel_queue(struct ublk_queue *ubq)
+/*
+ * The ublk char device won't be closed when calling cancel fn, so both
+ * ublk device and queue are guaranteed to be live
+ */
+static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
{
- int i;
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+ struct ublk_queue *ubq = pdu->ubq;
+ struct task_struct *task;
+ struct ublk_device *ub;
+ bool need_schedule;
+ struct ublk_io *io;
- if (!ublk_queue_ready(ubq))
+ if (WARN_ON_ONCE(!ubq))
return;
- for (i = 0; i < ubq->q_depth; i++) {
- struct ublk_io *io = &ubq->ios[i];
+ if (WARN_ON_ONCE(pdu->tag >= ubq->q_depth))
+ return;
+
+ task = io_uring_cmd_get_task(cmd);
+ if (WARN_ON_ONCE(task && task != ubq->ubq_daemon))
+ return;
+
+ ub = ubq->dev;
+ need_schedule = ublk_abort_requests(ub, ubq);
+
+ io = &ubq->ios[pdu->tag];
+ WARN_ON_ONCE(io->cmd != cmd);
+ ublk_cancel_cmd(ubq, io, issue_flags);
- if (io->flags & UBLK_IO_FLAG_ACTIVE)
- io_uring_cmd_complete_in_task(io->cmd,
- ublk_cmd_cancel_cb);
+ if (need_schedule) {
+ if (ublk_can_use_recovery(ub))
+ schedule_work(&ub->quiesce_work);
+ else
+ schedule_work(&ub->stop_work);
}
+}
- /* all io commands are canceled */
- ubq->nr_io_ready = 0;
+static inline bool ublk_queue_ready(struct ublk_queue *ubq)
+{
+ return ubq->nr_io_ready == ubq->q_depth;
+}
+
+static void ublk_cancel_queue(struct ublk_queue *ubq)
+{
+ int i;
+
+ for (i = 0; i < ubq->q_depth; i++)
+ ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED);
}
/* Cancel all pending commands, must be called after del_gendisk() returns */
@@ -1545,16 +1574,6 @@ static void __ublk_quiesce_dev(struct ublk_device *ub)
blk_mq_quiesce_queue(ub->ub_disk->queue);
ublk_wait_tagset_rqs_idle(ub);
ub->dev_info.state = UBLK_S_DEV_QUIESCED;
- ublk_cancel_dev(ub);
- /* we are going to release task_struct of ubq_daemon and resets
- * ->ubq_daemon to NULL. So in monitor_work, check on ubq_daemon causes UAF.
- * Besides, monitor_work is not necessary in QUIESCED state since we have
- * already scheduled quiesce_work and quiesced all ubqs.
- *
- * Do not let monitor_work schedule itself if state it QUIESCED. And we cancel
- * it here and re-schedule it in END_USER_RECOVERY to avoid UAF.
- */
- cancel_delayed_work_sync(&ub->monitor_work);
}
static void ublk_quiesce_work_fn(struct work_struct *work)
@@ -1568,6 +1587,7 @@ static void ublk_quiesce_work_fn(struct work_struct *work)
__ublk_quiesce_dev(ub);
unlock:
mutex_unlock(&ub->mutex);
+ ublk_cancel_dev(ub);
}
static void ublk_unquiesce_dev(struct ublk_device *ub)
@@ -1593,6 +1613,8 @@ static void ublk_unquiesce_dev(struct ublk_device *ub)
static void ublk_stop_dev(struct ublk_device *ub)
{
+ struct gendisk *disk;
+
mutex_lock(&ub->mutex);
if (ub->dev_info.state == UBLK_S_DEV_DEAD)
goto unlock;
@@ -1602,14 +1624,18 @@ static void ublk_stop_dev(struct ublk_device *ub)
ublk_unquiesce_dev(ub);
}
del_gendisk(ub->ub_disk);
+
+ /* Sync with ublk_abort_queue() by holding the lock */
+ spin_lock(&ub->lock);
+ disk = ub->ub_disk;
ub->dev_info.state = UBLK_S_DEV_DEAD;
ub->dev_info.ublksrv_pid = -1;
- put_disk(ub->ub_disk);
ub->ub_disk = NULL;
+ spin_unlock(&ub->lock);
+ put_disk(disk);
unlock:
- ublk_cancel_dev(ub);
mutex_unlock(&ub->mutex);
- cancel_delayed_work_sync(&ub->monitor_work);
+ ublk_cancel_dev(ub);
}
/* device can only be started after all IOs are ready */
@@ -1660,6 +1686,21 @@ static inline void ublk_fill_io_cmd(struct ublk_io *io,
io->addr = buf_addr;
}
+static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
+ unsigned int issue_flags,
+ struct ublk_queue *ubq, unsigned int tag)
+{
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+ /*
+ * Safe to refer to @ubq since ublk_queue won't be died until its
+ * commands are completed
+ */
+ pdu->ubq = ubq;
+ pdu->tag = tag;
+ io_uring_cmd_mark_cancelable(cmd, issue_flags);
+}
+
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags,
const struct ublksrv_io_cmd *ub_cmd)
@@ -1775,6 +1816,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
default:
goto out;
}
+ ublk_prep_cancel(cmd, issue_flags, ubq, tag);
return -EIOCBQUEUED;
out:
@@ -1814,7 +1856,8 @@ fail_put:
return NULL;
}
-static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
{
/*
* Not necessary for async retry, but let's keep it simple and always
@@ -1828,9 +1871,33 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
.addr = READ_ONCE(ub_src->addr)
};
+ WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED);
+
return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
}
+static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ ublk_ch_uring_cmd_local(cmd, issue_flags);
+}
+
+static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ if (unlikely(issue_flags & IO_URING_F_CANCEL)) {
+ ublk_uring_cmd_cancel_fn(cmd, issue_flags);
+ return 0;
+ }
+
+ /* well-implemented server won't run into unlocked */
+ if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
+ io_uring_cmd_complete_in_task(cmd, ublk_ch_uring_cmd_cb);
+ return -EIOCBQUEUED;
+ }
+
+ return ublk_ch_uring_cmd_local(cmd, issue_flags);
+}
+
static inline bool ublk_check_ubuf_dir(const struct request *req,
int ubuf_dir)
{
@@ -1962,6 +2029,7 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id)
void *ptr;
int size;
+ spin_lock_init(&ubq->cancel_lock);
ubq->flags = ub->dev_info.flags;
ubq->q_id = q_id;
ubq->q_depth = ub->dev_info.queue_depth;
@@ -2026,7 +2094,8 @@ static int ublk_alloc_dev_number(struct ublk_device *ub, int idx)
if (err == -ENOSPC)
err = -EEXIST;
} else {
- err = idr_alloc(&ublk_index_idr, ub, 0, 0, GFP_NOWAIT);
+ err = idr_alloc(&ublk_index_idr, ub, 0, UBLK_MAX_UBLKS,
+ GFP_NOWAIT);
}
spin_unlock(&ublk_idr_lock);
@@ -2151,8 +2220,6 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (wait_for_completion_interruptible(&ub->completion) != 0)
return -EINTR;
- schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
-
mutex_lock(&ub->mutex);
if (ub->dev_info.state == UBLK_S_DEV_LIVE ||
test_bit(UB_STATE_USED, &ub->state)) {
@@ -2305,6 +2372,12 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
return -EINVAL;
}
+ if (header->dev_id != U32_MAX && header->dev_id >= UBLK_MAX_UBLKS) {
+ pr_warn("%s: dev id is too large. Max supported is %d\n",
+ __func__, UBLK_MAX_UBLKS - 1);
+ return -EINVAL;
+ }
+
ublk_dump_dev_info(&info);
ret = mutex_lock_killable(&ublk_ctl_mutex);
@@ -2320,10 +2393,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
if (!ub)
goto out_unlock;
mutex_init(&ub->mutex);
- spin_lock_init(&ub->mm_lock);
+ spin_lock_init(&ub->lock);
INIT_WORK(&ub->quiesce_work, ublk_quiesce_work_fn);
INIT_WORK(&ub->stop_work, ublk_stop_work_fn);
- INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work);
ret = ublk_alloc_dev_number(ub, header->dev_id);
if (ret < 0)
@@ -2569,13 +2641,15 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
int i;
WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq)));
+
/* All old ioucmds have to be completed */
- WARN_ON_ONCE(ubq->nr_io_ready);
+ ubq->nr_io_ready = 0;
/* old daemon is PF_EXITING, put it now */
put_task_struct(ubq->ubq_daemon);
/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
ubq->ubq_daemon = NULL;
ubq->timeout = false;
+ ubq->canceling = false;
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
@@ -2661,7 +2735,6 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
__func__, header->dev_id);
blk_mq_kick_requeue_list(ub->ub_disk->queue);
ub->dev_info.state = UBLK_S_DEV_LIVE;
- schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
ret = 0;
out_unlock:
mutex_unlock(&ub->mutex);
@@ -2932,7 +3005,22 @@ static void __exit ublk_exit(void)
module_init(ublk_init);
module_exit(ublk_exit);
-module_param(ublks_max, int, 0444);
+static int ublk_set_max_ublks(const char *buf, const struct kernel_param *kp)
+{
+ return param_set_uint_minmax(buf, kp, 0, UBLK_MAX_UBLKS);
+}
+
+static int ublk_get_max_ublks(char *buf, const struct kernel_param *kp)
+{
+ return sysfs_emit(buf, "%u\n", ublks_max);
+}
+
+static const struct kernel_param_ops ublk_max_ublks_ops = {
+ .set = ublk_set_max_ublks,
+ .get = ublk_get_max_ublks,
+};
+
+module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644);
MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)");
MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 225c86c74d..2c846eed5a 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -470,8 +470,6 @@ static bool virtblk_prep_rq_batch(struct request *req)
struct virtio_blk *vblk = req->mq_hctx->queue->queuedata;
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
- req->mq_hctx->tags->rqs[req->tag] = req;
-
return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK;
}
@@ -1629,14 +1627,15 @@ static int virtblk_freeze(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
+ /* Ensure no requests in virtqueues before deleting vqs. */
+ blk_mq_freeze_queue(vblk->disk->queue);
+
/* Ensure we don't receive any more interrupts */
virtio_reset_device(vdev);
/* Make sure no work handler is accessing the device. */
flush_work(&vblk->config_work);
- blk_mq_quiesce_queue(vblk->disk->queue);
-
vdev->config->del_vqs(vdev);
kfree(vblk->vqs);
@@ -1654,7 +1653,7 @@ static int virtblk_restore(struct virtio_device *vdev)
virtio_device_ready(vdev);
- blk_mq_unquiesce_queue(vblk->disk->queue);
+ blk_mq_unfreeze_queue(vblk->disk->queue);
return 0;
}
#endif
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index c362f4ad80..4defd7f387 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -465,7 +465,7 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
}
req->dev = vbd->pdevice;
- req->bdev = vbd->bdev;
+ req->bdev = vbd->bdev_handle->bdev;
rc = 0;
out:
@@ -969,7 +969,7 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
int err = 0;
int status = BLKIF_RSP_OKAY;
struct xen_blkif *blkif = ring->blkif;
- struct block_device *bdev = blkif->vbd.bdev;
+ struct block_device *bdev = blkif->vbd.bdev_handle->bdev;
struct phys_req preq;
xen_blkif_get(blkif);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 40f67bfc05..5ff50e76ce 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -221,7 +221,7 @@ struct xen_vbd {
unsigned char type;
/* phys device that this vbd maps to. */
u32 pdevice;
- struct block_device *bdev;
+ struct bdev_handle *bdev_handle;
/* Cached size parameter. */
sector_t size;
unsigned int flush_support:1;
@@ -360,7 +360,7 @@ struct pending_req {
};
-#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev)
+#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev_handle->bdev)
#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
#define xen_blkif_put(_b) \
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index bb66178c43..e34219ea2b 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -81,7 +81,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
int i;
/* Not ready to connect? */
- if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
+ if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev_handle)
return;
/* Already connected? */
@@ -99,12 +99,13 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
return;
}
- err = sync_blockdev(blkif->vbd.bdev);
+ err = sync_blockdev(blkif->vbd.bdev_handle->bdev);
if (err) {
xenbus_dev_error(blkif->be->dev, err, "block flush");
return;
}
- invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
+ invalidate_inode_pages2(
+ blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping);
for (i = 0; i < blkif->nr_rings; i++) {
ring = &blkif->rings[i];
@@ -472,9 +473,9 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev)
static void xen_vbd_free(struct xen_vbd *vbd)
{
- if (vbd->bdev)
- blkdev_put(vbd->bdev, NULL);
- vbd->bdev = NULL;
+ if (vbd->bdev_handle)
+ bdev_release(vbd->bdev_handle);
+ vbd->bdev_handle = NULL;
}
static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
@@ -482,7 +483,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
int cdrom)
{
struct xen_vbd *vbd;
- struct block_device *bdev;
+ struct bdev_handle *bdev_handle;
vbd = &blkif->vbd;
vbd->handle = handle;
@@ -491,17 +492,17 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
vbd->pdevice = MKDEV(major, minor);
- bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
+ bdev_handle = bdev_open_by_dev(vbd->pdevice, vbd->readonly ?
BLK_OPEN_READ : BLK_OPEN_WRITE, NULL, NULL);
- if (IS_ERR(bdev)) {
+ if (IS_ERR(bdev_handle)) {
pr_warn("xen_vbd_create: device %08x could not be opened\n",
vbd->pdevice);
return -ENOENT;
}
- vbd->bdev = bdev;
- if (vbd->bdev->bd_disk == NULL) {
+ vbd->bdev_handle = bdev_handle;
+ if (vbd->bdev_handle->bdev->bd_disk == NULL) {
pr_warn("xen_vbd_create: device %08x doesn't exist\n",
vbd->pdevice);
xen_vbd_free(vbd);
@@ -509,14 +510,14 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
}
vbd->size = vbd_sz(vbd);
- if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
+ if (cdrom || disk_to_cdi(vbd->bdev_handle->bdev->bd_disk))
vbd->type |= VDISK_CDROM;
- if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
+ if (vbd->bdev_handle->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
vbd->type |= VDISK_REMOVABLE;
- if (bdev_write_cache(bdev))
+ if (bdev_write_cache(bdev_handle->bdev))
vbd->flush_support = true;
- if (bdev_max_secure_erase_sectors(bdev))
+ if (bdev_max_secure_erase_sectors(bdev_handle->bdev))
vbd->discard_secure = true;
pr_debug("Successful creation of handle=%04x (dom=%u)\n",
@@ -569,7 +570,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
struct xen_blkif *blkif = be->blkif;
int err;
int state = 0;
- struct block_device *bdev = be->blkif->vbd.bdev;
+ struct block_device *bdev = be->blkif->vbd.bdev_handle->bdev;
if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
return;
@@ -930,15 +931,16 @@ again:
goto abort;
}
err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
- (unsigned long)
- bdev_logical_block_size(be->blkif->vbd.bdev));
+ (unsigned long)bdev_logical_block_size(
+ be->blkif->vbd.bdev_handle->bdev));
if (err) {
xenbus_dev_fatal(dev, err, "writing %s/sector-size",
dev->nodename);
goto abort;
}
err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
- bdev_physical_block_size(be->blkif->vbd.bdev));
+ bdev_physical_block_size(
+ be->blkif->vbd.bdev_handle->bdev));
if (err)
xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
dev->nodename);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 06673c6ca2..d77d3664ca 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -414,17 +414,14 @@ static ssize_t writeback_limit_show(struct device *dev,
static void reset_bdev(struct zram *zram)
{
- struct block_device *bdev;
-
if (!zram->backing_dev)
return;
- bdev = zram->bdev;
- blkdev_put(bdev, zram);
+ bdev_release(zram->bdev_handle);
/* hope filp_close flush all of IO */
filp_close(zram->backing_dev, NULL);
zram->backing_dev = NULL;
- zram->bdev = NULL;
+ zram->bdev_handle = NULL;
zram->disk->fops = &zram_devops;
kvfree(zram->bitmap);
zram->bitmap = NULL;
@@ -470,7 +467,7 @@ static ssize_t backing_dev_store(struct device *dev,
struct address_space *mapping;
unsigned int bitmap_sz;
unsigned long nr_pages, *bitmap = NULL;
- struct block_device *bdev = NULL;
+ struct bdev_handle *bdev_handle = NULL;
int err;
struct zram *zram = dev_to_zram(dev);
@@ -507,11 +504,11 @@ static ssize_t backing_dev_store(struct device *dev,
goto out;
}
- bdev = blkdev_get_by_dev(inode->i_rdev, BLK_OPEN_READ | BLK_OPEN_WRITE,
- zram, NULL);
- if (IS_ERR(bdev)) {
- err = PTR_ERR(bdev);
- bdev = NULL;
+ bdev_handle = bdev_open_by_dev(inode->i_rdev,
+ BLK_OPEN_READ | BLK_OPEN_WRITE, zram, NULL);
+ if (IS_ERR(bdev_handle)) {
+ err = PTR_ERR(bdev_handle);
+ bdev_handle = NULL;
goto out;
}
@@ -525,7 +522,7 @@ static ssize_t backing_dev_store(struct device *dev,
reset_bdev(zram);
- zram->bdev = bdev;
+ zram->bdev_handle = bdev_handle;
zram->backing_dev = backing_dev;
zram->bitmap = bitmap;
zram->nr_pages = nr_pages;
@@ -538,8 +535,8 @@ static ssize_t backing_dev_store(struct device *dev,
out:
kvfree(bitmap);
- if (bdev)
- blkdev_put(bdev, zram);
+ if (bdev_handle)
+ bdev_release(bdev_handle);
if (backing_dev)
filp_close(backing_dev, NULL);
@@ -581,7 +578,7 @@ static void read_from_bdev_async(struct zram *zram, struct page *page,
{
struct bio *bio;
- bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
+ bio = bio_alloc(zram->bdev_handle->bdev, 1, parent->bi_opf, GFP_NOIO);
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
__bio_add_page(bio, page, PAGE_SIZE, 0);
bio_chain(bio, parent);
@@ -697,7 +694,7 @@ static ssize_t writeback_store(struct device *dev,
continue;
}
- bio_init(&bio, zram->bdev, &bio_vec, 1,
+ bio_init(&bio, zram->bdev_handle->bdev, &bio_vec, 1,
REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
__bio_add_page(&bio, page, PAGE_SIZE, 0);
@@ -779,7 +776,7 @@ static void zram_sync_read(struct work_struct *work)
struct bio_vec bv;
struct bio bio;
- bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ);
+ bio_init(&bio, zw->zram->bdev_handle->bdev, &bv, 1, REQ_OP_READ);
bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
__bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
zw->error = submit_bio_wait(&bio);
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index ca7a15bd48..d090753f97 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -132,7 +132,7 @@ struct zram {
spinlock_t wb_limit_lock;
bool wb_limit_enable;
u64 bd_wb_limit;
- struct block_device *bdev;
+ struct bdev_handle *bdev_handle;
unsigned long *bitmap;
unsigned long nr_pages;
#endif