summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/brd.c66
-rw-r--r--drivers/block/loop.c4
-rw-r--r--drivers/block/nbd.c58
-rw-r--r--drivers/block/null_blk/main.c42
-rw-r--r--drivers/block/null_blk/null_blk.h2
-rw-r--r--drivers/block/null_blk/trace.h7
-rw-r--r--drivers/block/null_blk/zoned.c358
-rw-r--r--drivers/block/pktcdvd.c7
-rw-r--r--drivers/block/rbd.c35
-rw-r--r--drivers/block/rnbd/rnbd-srv-trace.h12
-rw-r--r--drivers/block/ublk_drv.c11
-rw-r--r--drivers/block/virtio_blk.c3
-rw-r--r--drivers/block/xen-blkfront.c16
-rw-r--r--drivers/block/zram/zram_drv.c60
-rw-r--r--drivers/block/zram/zram_drv.h2
15 files changed, 364 insertions, 319 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index e322cef659..558d8e6705 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -29,10 +29,7 @@
/*
* Each block ramdisk device has a xarray brd_pages of pages that stores
- * the pages containing the block device's contents. A brd page's ->index is
- * its offset in PAGE_SIZE units. This is similar to, but in no way connected
- * with, the kernel's pagecache or buffer cache (which sit above our block
- * device).
+ * the pages containing the block device's contents.
*/
struct brd_device {
int brd_number;
@@ -51,15 +48,7 @@ struct brd_device {
*/
static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
{
- pgoff_t idx;
- struct page *page;
-
- idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
- page = xa_load(&brd->brd_pages, idx);
-
- BUG_ON(page && page->index != idx);
-
- return page;
+ return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
}
/*
@@ -67,8 +56,8 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
*/
static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
{
- pgoff_t idx;
- struct page *page, *cur;
+ pgoff_t idx = sector >> PAGE_SECTORS_SHIFT;
+ struct page *page;
int ret = 0;
page = brd_lookup_page(brd, sector);
@@ -80,23 +69,16 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
return -ENOMEM;
xa_lock(&brd->brd_pages);
-
- idx = sector >> PAGE_SECTORS_SHIFT;
- page->index = idx;
-
- cur = __xa_cmpxchg(&brd->brd_pages, idx, NULL, page, gfp);
-
- if (unlikely(cur)) {
- __free_page(page);
- ret = xa_err(cur);
- if (!ret && (cur->index != idx))
- ret = -EIO;
- } else {
+ ret = __xa_insert(&brd->brd_pages, idx, page, gfp);
+ if (!ret)
brd->brd_nr_pages++;
- }
-
xa_unlock(&brd->brd_pages);
+ if (ret < 0) {
+ __free_page(page);
+ if (ret == -EBUSY)
+ ret = 0;
+ }
return ret;
}
@@ -240,6 +222,23 @@ out:
return err;
}
+static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
+{
+ sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS;
+ struct page *page;
+
+ size -= (aligned_sector - sector) * SECTOR_SIZE;
+ xa_lock(&brd->brd_pages);
+ while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) {
+ page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
+ if (page)
+ __free_page(page);
+ aligned_sector += PAGE_SECTORS;
+ size -= PAGE_SIZE;
+ }
+ xa_unlock(&brd->brd_pages);
+}
+
static void brd_submit_bio(struct bio *bio)
{
struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
@@ -247,6 +246,12 @@ static void brd_submit_bio(struct bio *bio)
struct bio_vec bvec;
struct bvec_iter iter;
+ if (unlikely(op_is_discard(bio->bi_opf))) {
+ brd_do_discard(brd, sector, bio->bi_iter.bi_size);
+ bio_endio(bio);
+ return;
+ }
+
bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len;
int err;
@@ -327,6 +332,9 @@ static int brd_alloc(int i)
* is harmless)
*/
.physical_block_size = PAGE_SIZE,
+ .max_hw_discard_sectors = UINT_MAX,
+ .max_discard_segments = 1,
+ .discard_granularity = PAGE_SIZE,
};
list_for_each_entry(brd, &brd_devices, brd_list)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 95a468eaa7..1153721bc7 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -468,9 +468,9 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
if (rw == ITER_SOURCE)
- ret = call_write_iter(file, &cmd->iocb, &iter);
+ ret = file->f_op->write_iter(&cmd->iocb, &iter);
else
- ret = call_read_iter(file, &cmd->iocb, &iter);
+ ret = file->f_op->read_iter(&cmd->iocb, &iter);
lo_rw_aio_do_completion(cmd);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1ddd3e5497..b87aa80a46 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -222,7 +222,7 @@ static ssize_t pid_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
- struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
+ struct nbd_device *nbd = disk->private_data;
return sprintf(buf, "%d\n", nbd->pid);
}
@@ -236,7 +236,7 @@ static ssize_t backend_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
- struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
+ struct nbd_device *nbd = disk->private_data;
return sprintf(buf, "%s\n", nbd->backend ?: "");
}
@@ -589,10 +589,11 @@ static inline int was_interrupted(int result)
}
/*
- * Returns BLK_STS_RESOURCE if the caller should retry after a delay. Returns
- * -EAGAIN if the caller should requeue @cmd. Returns -EIO if sending failed.
+ * Returns BLK_STS_RESOURCE if the caller should retry after a delay.
+ * Returns BLK_STS_IOERR if sending failed.
*/
-static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
+static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
+ int index)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
struct nbd_config *config = nbd->config;
@@ -601,7 +602,6 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
struct iov_iter from;
- unsigned long size = blk_rq_bytes(req);
struct bio *bio;
u64 handle;
u32 type;
@@ -615,13 +615,13 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
type = req_to_nbd_cmd_type(req);
if (type == U32_MAX)
- return -EIO;
+ return BLK_STS_IOERR;
if (rq_data_dir(req) == WRITE &&
(config->flags & NBD_FLAG_READ_ONLY)) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Write on read-only\n");
- return -EIO;
+ return BLK_STS_IOERR;
}
if (req->cmd_flags & REQ_FUA)
@@ -650,7 +650,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
request.type = htonl(type | nbd_cmd_flags);
if (type != NBD_CMD_FLUSH) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
- request.len = htonl(size);
+ request.len = htonl(blk_rq_bytes(req));
}
handle = nbd_cmd_handle(cmd);
request.cookie = cpu_to_be64(handle);
@@ -675,11 +675,11 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
nsock->sent = sent;
}
set_bit(NBD_CMD_REQUEUED, &cmd->flags);
- return (__force int)BLK_STS_RESOURCE;
+ return BLK_STS_RESOURCE;
}
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result);
- return -EAGAIN;
+ goto requeue;
}
send_pages:
if (type != NBD_CMD_WRITE)
@@ -716,12 +716,12 @@ send_pages:
nsock->pending = req;
nsock->sent = sent;
set_bit(NBD_CMD_REQUEUED, &cmd->flags);
- return (__force int)BLK_STS_RESOURCE;
+ return BLK_STS_RESOURCE;
}
dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n",
result);
- return -EAGAIN;
+ goto requeue;
}
/*
* The completion might already have come in,
@@ -738,7 +738,16 @@ out:
trace_nbd_payload_sent(req, handle);
nsock->pending = NULL;
nsock->sent = 0;
- return 0;
+ __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+ return BLK_STS_OK;
+
+requeue:
+ /* retry on a different socket */
+ dev_err_ratelimited(disk_to_dev(nbd->disk),
+ "Request send failed, requeueing\n");
+ nbd_mark_nsock_dead(nbd, nsock, 1);
+ nbd_requeue_cmd(cmd);
+ return BLK_STS_OK;
}
static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock,
@@ -1019,7 +1028,7 @@ static blk_status_t nbd_handle_cmd(struct nbd_cmd *cmd, int index)
struct nbd_device *nbd = cmd->nbd;
struct nbd_config *config;
struct nbd_sock *nsock;
- int ret;
+ blk_status_t ret;
lockdep_assert_held(&cmd->lock);
@@ -1073,28 +1082,11 @@ again:
ret = BLK_STS_OK;
goto out;
}
- /*
- * Some failures are related to the link going down, so anything that
- * returns EAGAIN can be retried on a different socket.
- */
ret = nbd_send_cmd(nbd, cmd, index);
- /*
- * Access to this flag is protected by cmd->lock, thus it's safe to set
- * the flag after nbd_send_cmd() succeed to send request to server.
- */
- if (!ret)
- __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
- else if (ret == -EAGAIN) {
- dev_err_ratelimited(disk_to_dev(nbd->disk),
- "Request send failed, requeueing\n");
- nbd_mark_nsock_dead(nbd, nsock, 1);
- nbd_requeue_cmd(cmd);
- ret = BLK_STS_OK;
- }
out:
mutex_unlock(&nsock->tx_lock);
nbd_config_put(nbd);
- return ret < 0 ? BLK_STS_IOERR : (__force blk_status_t)ret;
+ return ret;
}
static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 26e2c22a87..f940580193 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -225,6 +225,10 @@ static unsigned long g_cache_size;
module_param_named(cache_size, g_cache_size, ulong, 0444);
MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
+static bool g_fua = true;
+module_param_named(fua, g_fua, bool, 0444);
+MODULE_PARM_DESC(fua, "Enable/disable FUA support when cache_size is used. Default: true");
+
static unsigned int g_mbps;
module_param_named(mbps, g_mbps, uint, 0444);
MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
@@ -253,6 +257,11 @@ static unsigned int g_zone_max_active;
module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
+static int g_zone_append_max_sectors = INT_MAX;
+module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444);
+MODULE_PARM_DESC(zone_append_max_sectors,
+ "Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
+
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
@@ -448,10 +457,12 @@ NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
+NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
+NULLB_DEVICE_ATTR(fua, bool, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -596,12 +607,14 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_nr_conv,
&nullb_device_attr_zone_max_open,
&nullb_device_attr_zone_max_active,
+ &nullb_device_attr_zone_append_max_sectors,
&nullb_device_attr_zone_readonly,
&nullb_device_attr_zone_offline,
&nullb_device_attr_virt_boundary,
&nullb_device_attr_no_sched,
&nullb_device_attr_shared_tags,
&nullb_device_attr_shared_tag_bitmap,
+ &nullb_device_attr_fua,
NULL,
};
@@ -680,14 +693,14 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
return snprintf(page, PAGE_SIZE,
- "badblocks,blocking,blocksize,cache_size,"
+ "badblocks,blocking,blocksize,cache_size,fua,"
"completion_nsec,discard,home_node,hw_queue_depth,"
"irqmode,max_sectors,mbps,memory_backed,no_sched,"
"poll_queues,power,queue_mode,shared_tag_bitmap,"
"shared_tags,size,submit_queues,use_per_node_hctx,"
"virt_boundary,zoned,zone_capacity,zone_max_active,"
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
- "zone_size\n");
+ "zone_size,zone_append_max_sectors\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -767,10 +780,13 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_nr_conv = g_zone_nr_conv;
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
+ dev->zone_append_max_sectors = g_zone_append_max_sectors;
dev->virt_boundary = g_virt_boundary;
dev->no_sched = g_no_sched;
dev->shared_tags = g_shared_tags;
dev->shared_tag_bitmap = g_shared_tag_bitmap;
+ dev->fua = g_fua;
+
return dev;
}
@@ -1167,7 +1183,7 @@ blk_status_t null_handle_discard(struct nullb_device *dev,
return BLK_STS_OK;
}
-static int null_handle_flush(struct nullb *nullb)
+static blk_status_t null_handle_flush(struct nullb *nullb)
{
int err;
@@ -1184,7 +1200,7 @@ static int null_handle_flush(struct nullb *nullb)
WARN_ON(!radix_tree_empty(&nullb->dev->cache));
spin_unlock_irq(&nullb->lock);
- return err;
+ return errno_to_blk_status(err);
}
static int null_transfer(struct nullb *nullb, struct page *page,
@@ -1218,11 +1234,11 @@ static int null_transfer(struct nullb *nullb, struct page *page,
return err;
}
-static int null_handle_rq(struct nullb_cmd *cmd)
+static blk_status_t null_handle_rq(struct nullb_cmd *cmd)
{
struct request *rq = blk_mq_rq_from_pdu(cmd);
struct nullb *nullb = cmd->nq->dev->nullb;
- int err;
+ int err = 0;
unsigned int len;
sector_t sector = blk_rq_pos(rq);
struct req_iterator iter;
@@ -1234,15 +1250,13 @@ static int null_handle_rq(struct nullb_cmd *cmd)
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
op_is_write(req_op(rq)), sector,
rq->cmd_flags & REQ_FUA);
- if (err) {
- spin_unlock_irq(&nullb->lock);
- return err;
- }
+ if (err)
+ break;
sector += len >> SECTOR_SHIFT;
}
spin_unlock_irq(&nullb->lock);
- return 0;
+ return errno_to_blk_status(err);
}
static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
@@ -1289,8 +1303,8 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
if (op == REQ_OP_DISCARD)
return null_handle_discard(dev, sector, nr_sectors);
- return errno_to_blk_status(null_handle_rq(cmd));
+ return null_handle_rq(cmd);
}
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
@@ -1359,7 +1373,7 @@ static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
blk_status_t sts;
if (op == REQ_OP_FLUSH) {
- cmd->error = errno_to_blk_status(null_handle_flush(nullb));
+ cmd->error = null_handle_flush(nullb);
goto out;
}
@@ -1928,7 +1942,7 @@ static int null_add_dev(struct nullb_device *dev)
if (dev->cache_size > 0) {
set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
- blk_queue_write_cache(nullb->q, true, true);
+ blk_queue_write_cache(nullb->q, true, dev->fua);
}
nullb->q->queuedata = nullb;
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 477b977468..3234e6c85e 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -82,6 +82,7 @@ struct nullb_device {
unsigned int zone_nr_conv; /* number of conventional zones */
unsigned int zone_max_open; /* max number of open zones */
unsigned int zone_max_active; /* max number of active zones */
+ unsigned int zone_append_max_sectors; /* Max sectors per zone append command */
unsigned int submit_queues; /* number of submission queues */
unsigned int prev_submit_queues; /* number of submission queues before change */
unsigned int poll_queues; /* number of IOPOLL submission queues */
@@ -104,6 +105,7 @@ struct nullb_device {
bool no_sched; /* no IO scheduler for the device */
bool shared_tags; /* share tag set between devices for blk-mq */
bool shared_tag_bitmap; /* use hostwide shared tags */
+ bool fua; /* Support FUA */
};
struct nullb {
diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h
index ef2d05d5f0..82b8f6a5e5 100644
--- a/drivers/block/null_blk/trace.h
+++ b/drivers/block/null_blk/trace.h
@@ -36,7 +36,12 @@ TRACE_EVENT(nullb_zone_op,
TP_ARGS(cmd, zone_no, zone_cond),
TP_STRUCT__entry(
__array(char, disk, DISK_NAME_LEN)
- __field(enum req_op, op)
+ /*
+ * __field() uses is_signed_type(). is_signed_type() does not
+ * support bitwise types. Use __field_struct() instead because
+ * it does not use is_signed_type().
+ */
+ __field_struct(enum req_op, op)
__field(unsigned int, zone_no)
__field(unsigned int, zone_cond)
),
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 74d0418dda..f118d304f3 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -9,6 +9,8 @@
#undef pr_fmt
#define pr_fmt(fmt) "null_blk: " fmt
+#define NULL_ZONE_INVALID_WP ((sector_t)-1)
+
static inline sector_t mb_to_sects(unsigned long mb)
{
return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
@@ -19,18 +21,6 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
return sect >> ilog2(dev->zone_size_sects);
}
-static inline void null_lock_zone_res(struct nullb_device *dev)
-{
- if (dev->need_zone_res_mgmt)
- spin_lock_irq(&dev->zone_res_lock);
-}
-
-static inline void null_unlock_zone_res(struct nullb_device *dev)
-{
- if (dev->need_zone_res_mgmt)
- spin_unlock_irq(&dev->zone_res_lock);
-}
-
static inline void null_init_zone_lock(struct nullb_device *dev,
struct nullb_zone *zone)
{
@@ -114,6 +104,11 @@ int null_init_zoned_dev(struct nullb_device *dev,
dev->zone_nr_conv);
}
+ dev->zone_append_max_sectors =
+ min(ALIGN_DOWN(dev->zone_append_max_sectors,
+ dev->blocksize >> SECTOR_SHIFT),
+ zone_capacity_sects);
+
/* Max active zones has to be < nbr of seq zones in order to be enforceable */
if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
dev->zone_max_active = 0;
@@ -165,7 +160,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
lim->zoned = true;
lim->chunk_sectors = dev->zone_size_sects;
- lim->max_zone_append_sectors = dev->zone_size_sects;
+ lim->max_zone_append_sectors = dev->zone_append_max_sectors;
lim->max_open_zones = dev->zone_max_open;
lim->max_active_zones = dev->zone_max_active;
return 0;
@@ -174,11 +169,16 @@ int null_init_zoned_dev(struct nullb_device *dev,
int null_register_zoned_dev(struct nullb *nullb)
{
struct request_queue *q = nullb->q;
+ struct gendisk *disk = nullb->disk;
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
- blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
- nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
- return blk_revalidate_disk_zones(nullb->disk, NULL);
+ disk->nr_zones = bdev_nr_zones(disk->part0);
+
+ pr_info("%s: using %s zone append\n",
+ disk->disk_name,
+ queue_emulates_zone_append(q) ? "emulated" : "native");
+
+ return blk_revalidate_disk_zones(disk);
}
void null_free_zoned_dev(struct nullb_device *dev)
@@ -252,35 +252,6 @@ size_t null_zone_valid_read_len(struct nullb *nullb,
return (zone->wp - sector) << SECTOR_SHIFT;
}
-static blk_status_t __null_close_zone(struct nullb_device *dev,
- struct nullb_zone *zone)
-{
- switch (zone->cond) {
- case BLK_ZONE_COND_CLOSED:
- /* close operation on closed is not an error */
- return BLK_STS_OK;
- case BLK_ZONE_COND_IMP_OPEN:
- dev->nr_zones_imp_open--;
- break;
- case BLK_ZONE_COND_EXP_OPEN:
- dev->nr_zones_exp_open--;
- break;
- case BLK_ZONE_COND_EMPTY:
- case BLK_ZONE_COND_FULL:
- default:
- return BLK_STS_IOERR;
- }
-
- if (zone->wp == zone->start) {
- zone->cond = BLK_ZONE_COND_EMPTY;
- } else {
- zone->cond = BLK_ZONE_COND_CLOSED;
- dev->nr_zones_closed++;
- }
-
- return BLK_STS_OK;
-}
-
static void null_close_imp_open_zone(struct nullb_device *dev)
{
struct nullb_zone *zone;
@@ -297,7 +268,13 @@ static void null_close_imp_open_zone(struct nullb_device *dev)
zno = dev->zone_nr_conv;
if (zone->cond == BLK_ZONE_COND_IMP_OPEN) {
- __null_close_zone(dev, zone);
+ dev->nr_zones_imp_open--;
+ if (zone->wp == zone->start) {
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ } else {
+ zone->cond = BLK_ZONE_COND_CLOSED;
+ dev->nr_zones_closed++;
+ }
dev->imp_close_zone_no = zno;
return;
}
@@ -385,73 +362,73 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
null_lock_zone(dev, zone);
- if (zone->cond == BLK_ZONE_COND_FULL ||
- zone->cond == BLK_ZONE_COND_READONLY ||
- zone->cond == BLK_ZONE_COND_OFFLINE) {
- /* Cannot write to the zone */
- ret = BLK_STS_IOERR;
- goto unlock;
- }
-
/*
- * Regular writes must be at the write pointer position.
- * Zone append writes are automatically issued at the write
- * pointer and the position returned using the request or BIO
- * sector.
+ * Regular writes must be at the write pointer position. Zone append
+ * writes are automatically issued at the write pointer and the position
+ * returned using the request sector. Note that we do not check the zone
+ * condition because for FULL, READONLY and OFFLINE zones, the sector
+ * check against the zone write pointer will always result in failing
+ * the command.
*/
if (append) {
+ if (WARN_ON_ONCE(!dev->zone_append_max_sectors) ||
+ zone->wp == NULL_ZONE_INVALID_WP) {
+ ret = BLK_STS_IOERR;
+ goto unlock_zone;
+ }
sector = zone->wp;
blk_mq_rq_from_pdu(cmd)->__sector = sector;
- } else if (sector != zone->wp) {
- ret = BLK_STS_IOERR;
- goto unlock;
}
- if (zone->wp + nr_sectors > zone->start + zone->capacity) {
+ if (sector != zone->wp ||
+ zone->wp + nr_sectors > zone->start + zone->capacity) {
ret = BLK_STS_IOERR;
- goto unlock;
+ goto unlock_zone;
}
if (zone->cond == BLK_ZONE_COND_CLOSED ||
zone->cond == BLK_ZONE_COND_EMPTY) {
- null_lock_zone_res(dev);
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK) {
- null_unlock_zone_res(dev);
- goto unlock;
- }
- if (zone->cond == BLK_ZONE_COND_CLOSED) {
- dev->nr_zones_closed--;
- dev->nr_zones_imp_open++;
- } else if (zone->cond == BLK_ZONE_COND_EMPTY) {
- dev->nr_zones_imp_open++;
- }
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ goto unlock_zone;
+ }
+ if (zone->cond == BLK_ZONE_COND_CLOSED) {
+ dev->nr_zones_closed--;
+ dev->nr_zones_imp_open++;
+ } else if (zone->cond == BLK_ZONE_COND_EMPTY) {
+ dev->nr_zones_imp_open++;
+ }
- if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
- zone->cond = BLK_ZONE_COND_IMP_OPEN;
+ spin_unlock(&dev->zone_res_lock);
+ }
- null_unlock_zone_res(dev);
+ zone->cond = BLK_ZONE_COND_IMP_OPEN;
}
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
if (ret != BLK_STS_OK)
- goto unlock;
+ goto unlock_zone;
zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->capacity) {
- null_lock_zone_res(dev);
- if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
- dev->nr_zones_exp_open--;
- else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
- dev->nr_zones_imp_open--;
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
+ if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
+ dev->nr_zones_exp_open--;
+ else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
+ dev->nr_zones_imp_open--;
+ spin_unlock(&dev->zone_res_lock);
+ }
zone->cond = BLK_ZONE_COND_FULL;
- null_unlock_zone_res(dev);
}
ret = BLK_STS_OK;
-unlock:
+unlock_zone:
null_unlock_zone(dev, zone);
return ret;
@@ -465,54 +442,100 @@ static blk_status_t null_open_zone(struct nullb_device *dev,
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
- null_lock_zone_res(dev);
-
switch (zone->cond) {
case BLK_ZONE_COND_EXP_OPEN:
- /* open operation on exp open is not an error */
- goto unlock;
+ /* Open operation on exp open is not an error */
+ return BLK_STS_OK;
case BLK_ZONE_COND_EMPTY:
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK)
- goto unlock;
- break;
case BLK_ZONE_COND_IMP_OPEN:
- dev->nr_zones_imp_open--;
- break;
case BLK_ZONE_COND_CLOSED:
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK)
- goto unlock;
- dev->nr_zones_closed--;
break;
case BLK_ZONE_COND_FULL:
default:
- ret = BLK_STS_IOERR;
- goto unlock;
+ return BLK_STS_IOERR;
}
- zone->cond = BLK_ZONE_COND_EXP_OPEN;
- dev->nr_zones_exp_open++;
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
-unlock:
- null_unlock_zone_res(dev);
+ switch (zone->cond) {
+ case BLK_ZONE_COND_EMPTY:
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ return ret;
+ }
+ break;
+ case BLK_ZONE_COND_IMP_OPEN:
+ dev->nr_zones_imp_open--;
+ break;
+ case BLK_ZONE_COND_CLOSED:
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ return ret;
+ }
+ dev->nr_zones_closed--;
+ break;
+ default:
+ break;
+ }
- return ret;
+ dev->nr_zones_exp_open++;
+
+ spin_unlock(&dev->zone_res_lock);
+ }
+
+ zone->cond = BLK_ZONE_COND_EXP_OPEN;
+
+ return BLK_STS_OK;
}
static blk_status_t null_close_zone(struct nullb_device *dev,
struct nullb_zone *zone)
{
- blk_status_t ret;
-
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
- null_lock_zone_res(dev);
- ret = __null_close_zone(dev, zone);
- null_unlock_zone_res(dev);
+ switch (zone->cond) {
+ case BLK_ZONE_COND_CLOSED:
+ /* close operation on closed is not an error */
+ return BLK_STS_OK;
+ case BLK_ZONE_COND_IMP_OPEN:
+ case BLK_ZONE_COND_EXP_OPEN:
+ break;
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_FULL:
+ default:
+ return BLK_STS_IOERR;
+ }
+
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
- return ret;
+ switch (zone->cond) {
+ case BLK_ZONE_COND_IMP_OPEN:
+ dev->nr_zones_imp_open--;
+ break;
+ case BLK_ZONE_COND_EXP_OPEN:
+ dev->nr_zones_exp_open--;
+ break;
+ default:
+ break;
+ }
+
+ if (zone->wp > zone->start)
+ dev->nr_zones_closed++;
+
+ spin_unlock(&dev->zone_res_lock);
+ }
+
+ if (zone->wp == zone->start)
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ else
+ zone->cond = BLK_ZONE_COND_CLOSED;
+
+ return BLK_STS_OK;
}
static blk_status_t null_finish_zone(struct nullb_device *dev,
@@ -523,41 +546,47 @@ static blk_status_t null_finish_zone(struct nullb_device *dev,
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
- null_lock_zone_res(dev);
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
- switch (zone->cond) {
- case BLK_ZONE_COND_FULL:
- /* finish operation on full is not an error */
- goto unlock;
- case BLK_ZONE_COND_EMPTY:
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK)
- goto unlock;
- break;
- case BLK_ZONE_COND_IMP_OPEN:
- dev->nr_zones_imp_open--;
- break;
- case BLK_ZONE_COND_EXP_OPEN:
- dev->nr_zones_exp_open--;
- break;
- case BLK_ZONE_COND_CLOSED:
- ret = null_check_zone_resources(dev, zone);
- if (ret != BLK_STS_OK)
- goto unlock;
- dev->nr_zones_closed--;
- break;
- default:
- ret = BLK_STS_IOERR;
- goto unlock;
+ switch (zone->cond) {
+ case BLK_ZONE_COND_FULL:
+ /* Finish operation on full is not an error */
+ spin_unlock(&dev->zone_res_lock);
+ return BLK_STS_OK;
+ case BLK_ZONE_COND_EMPTY:
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ return ret;
+ }
+ break;
+ case BLK_ZONE_COND_IMP_OPEN:
+ dev->nr_zones_imp_open--;
+ break;
+ case BLK_ZONE_COND_EXP_OPEN:
+ dev->nr_zones_exp_open--;
+ break;
+ case BLK_ZONE_COND_CLOSED:
+ ret = null_check_zone_resources(dev, zone);
+ if (ret != BLK_STS_OK) {
+ spin_unlock(&dev->zone_res_lock);
+ return ret;
+ }
+ dev->nr_zones_closed--;
+ break;
+ default:
+ spin_unlock(&dev->zone_res_lock);
+ return BLK_STS_IOERR;
+ }
+
+ spin_unlock(&dev->zone_res_lock);
}
zone->cond = BLK_ZONE_COND_FULL;
zone->wp = zone->start + zone->len;
-unlock:
- null_unlock_zone_res(dev);
-
- return ret;
+ return BLK_STS_OK;
}
static blk_status_t null_reset_zone(struct nullb_device *dev,
@@ -566,34 +595,33 @@ static blk_status_t null_reset_zone(struct nullb_device *dev,
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR;
- null_lock_zone_res(dev);
+ if (dev->need_zone_res_mgmt) {
+ spin_lock(&dev->zone_res_lock);
- switch (zone->cond) {
- case BLK_ZONE_COND_EMPTY:
- /* reset operation on empty is not an error */
- null_unlock_zone_res(dev);
- return BLK_STS_OK;
- case BLK_ZONE_COND_IMP_OPEN:
- dev->nr_zones_imp_open--;
- break;
- case BLK_ZONE_COND_EXP_OPEN:
- dev->nr_zones_exp_open--;
- break;
- case BLK_ZONE_COND_CLOSED:
- dev->nr_zones_closed--;
- break;
- case BLK_ZONE_COND_FULL:
- break;
- default:
- null_unlock_zone_res(dev);
- return BLK_STS_IOERR;
+ switch (zone->cond) {
+ case BLK_ZONE_COND_IMP_OPEN:
+ dev->nr_zones_imp_open--;
+ break;
+ case BLK_ZONE_COND_EXP_OPEN:
+ dev->nr_zones_exp_open--;
+ break;
+ case BLK_ZONE_COND_CLOSED:
+ dev->nr_zones_closed--;
+ break;
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_FULL:
+ break;
+ default:
+ spin_unlock(&dev->zone_res_lock);
+ return BLK_STS_IOERR;
+ }
+
+ spin_unlock(&dev->zone_res_lock);
}
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
- null_unlock_zone_res(dev);
-
if (dev->memory_backed)
return null_handle_discard(dev, zone->start, zone->len);
@@ -722,7 +750,7 @@ static void null_set_zone_cond(struct nullb_device *dev,
zone->cond != BLK_ZONE_COND_OFFLINE)
null_finish_zone(dev, zone);
zone->cond = cond;
- zone->wp = (sector_t)-1;
+ zone->wp = NULL_ZONE_INVALID_WP;
}
null_unlock_zone(dev, zone);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 21728e9ea5..8a2ce80700 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2215,6 +2215,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
}
dev_info(ddev, "%lukB available on disc\n", lba << 1);
}
+ set_blocksize(bdev_file, CD_FRAMESIZE);
return 0;
@@ -2278,11 +2279,6 @@ static int pkt_open(struct gendisk *disk, blk_mode_t mode)
ret = pkt_open_dev(pd, mode & BLK_OPEN_WRITE);
if (ret)
goto out_dec;
- /*
- * needed here as well, since ext2 (among others) may change
- * the blocksize at mount time
- */
- set_blocksize(disk->part0, CD_FRAMESIZE);
}
mutex_unlock(&ctl_mutex);
mutex_unlock(&pktcdvd_mutex);
@@ -2526,7 +2522,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
__module_get(THIS_MODULE);
pd->bdev_file = bdev_file;
- set_blocksize(file_bdev(bdev_file), CD_FRAMESIZE);
atomic_set(&pd->cdrw.pending_bios, 0);
pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 26ff5cd2bf..da22ce38c0 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -362,7 +362,7 @@ enum rbd_watch_state {
enum rbd_lock_state {
RBD_LOCK_STATE_UNLOCKED,
RBD_LOCK_STATE_LOCKED,
- RBD_LOCK_STATE_RELEASING,
+ RBD_LOCK_STATE_QUIESCING,
};
/* WatchNotify::ClientId */
@@ -422,7 +422,7 @@ struct rbd_device {
struct list_head running_list;
struct completion acquire_wait;
int acquire_err;
- struct completion releasing_wait;
+ struct completion quiescing_wait;
spinlock_t object_map_lock;
u8 *object_map;
@@ -525,7 +525,7 @@ static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
lockdep_assert_held(&rbd_dev->lock_rwsem);
return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
- rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING;
+ rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING;
}
static bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
@@ -3457,13 +3457,14 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
if (!list_empty(&img_req->lock_item)) {
+ rbd_assert(!list_empty(&rbd_dev->running_list));
list_del_init(&img_req->lock_item);
- need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
+ need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
list_empty(&rbd_dev->running_list));
}
spin_unlock(&rbd_dev->lock_lists_lock);
if (need_wakeup)
- complete(&rbd_dev->releasing_wait);
+ complete(&rbd_dev->quiescing_wait);
}
static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
if (rbd_lock_add_request(img_req))
return 1;
- if (rbd_dev->opts->exclusive) {
- WARN_ON(1); /* lock got released? */
- return -EROFS;
- }
-
/*
* Note the use of mod_delayed_work() in rbd_acquire_lock()
* and cancel_delayed_work() in wake_lock_waiters().
@@ -4181,16 +4177,16 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
/*
* Ensure that all in-flight IO is flushed.
*/
- rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
- rbd_assert(!completion_done(&rbd_dev->releasing_wait));
+ rbd_dev->lock_state = RBD_LOCK_STATE_QUIESCING;
+ rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
if (list_empty(&rbd_dev->running_list))
return true;
up_write(&rbd_dev->lock_rwsem);
- wait_for_completion(&rbd_dev->releasing_wait);
+ wait_for_completion(&rbd_dev->quiescing_wait);
down_write(&rbd_dev->lock_rwsem);
- if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
+ if (rbd_dev->lock_state != RBD_LOCK_STATE_QUIESCING)
return false;
rbd_assert(list_empty(&rbd_dev->running_list));
@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret);
+ if (rbd_dev->opts->exclusive)
+ rbd_warn(rbd_dev,
+ "temporarily releasing lock on exclusive mapping");
+
/*
* Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire.
@@ -5383,7 +5383,7 @@ static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec)
INIT_LIST_HEAD(&rbd_dev->acquiring_list);
INIT_LIST_HEAD(&rbd_dev->running_list);
init_completion(&rbd_dev->acquire_wait);
- init_completion(&rbd_dev->releasing_wait);
+ init_completion(&rbd_dev->quiescing_wait);
spin_lock_init(&rbd_dev->object_map_lock);
@@ -6589,11 +6589,6 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
if (ret)
return ret;
- /*
- * The lock may have been released by now, unless automatic lock
- * transitions are disabled.
- */
- rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev));
return 0;
}
diff --git a/drivers/block/rnbd/rnbd-srv-trace.h b/drivers/block/rnbd/rnbd-srv-trace.h
index 8dedf73bdd..89d0bcb171 100644
--- a/drivers/block/rnbd/rnbd-srv-trace.h
+++ b/drivers/block/rnbd/rnbd-srv-trace.h
@@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(rnbd_srv_link_class,
TP_fast_assign(
__entry->qdepth = srv->queue_depth;
- __assign_str(sessname, srv->sessname);
+ __assign_str(sessname);
),
TP_printk("sessname: %s qdepth: %d",
@@ -85,7 +85,7 @@ TRACE_EVENT(process_rdma,
),
TP_fast_assign(
- __assign_str(sessname, srv->sessname);
+ __assign_str(sessname);
__entry->dir = id->dir;
__entry->ver = srv->ver;
__entry->device_id = le32_to_cpu(msg->device_id);
@@ -130,7 +130,7 @@ TRACE_EVENT(process_msg_sess_info,
__entry->proto_ver = srv->ver;
__entry->clt_ver = msg->ver;
__entry->srv_ver = RNBD_PROTO_VER_MAJOR;
- __assign_str(sessname, srv->sessname);
+ __assign_str(sessname);
),
TP_printk("Session %s using proto-ver %d (clt-ver: %d, srv-ver: %d)",
@@ -165,8 +165,8 @@ TRACE_EVENT(process_msg_open,
TP_fast_assign(
__entry->access_mode = msg->access_mode;
- __assign_str(sessname, srv->sessname);
- __assign_str(dev_name, msg->dev_name);
+ __assign_str(sessname);
+ __assign_str(dev_name);
),
TP_printk("Open message received: session='%s' path='%s' access_mode=%s",
@@ -189,7 +189,7 @@ TRACE_EVENT(process_msg_close,
TP_fast_assign(
__entry->device_id = le32_to_cpu(msg->device_id);
- __assign_str(sessname, srv->sessname);
+ __assign_str(sessname);
),
TP_printk("Close message received: session='%s' device id='%d'",
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 374e4efa87..3b58839321 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -48,6 +48,9 @@
#define UBLK_MINORS (1U << MINORBITS)
+/* private ioctl command mirror */
+#define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC)
+
/* All UBLK_F_* have to be included into UBLK_F_ALL */
#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
| UBLK_F_URING_CMD_COMP_IN_TASK \
@@ -221,7 +224,7 @@ static int ublk_get_nr_zones(const struct ublk_device *ub)
static int ublk_revalidate_disk_zones(struct ublk_device *ub)
{
- return blk_revalidate_disk_zones(ub->ub_disk, NULL);
+ return blk_revalidate_disk_zones(ub->ub_disk);
}
static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
@@ -249,8 +252,7 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
{
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue);
- blk_queue_required_elevator_features(ub->ub_disk->queue,
- ELEVATOR_F_ZBD_SEQ_WRITE);
+
ub->ub_disk->nr_zones = ublk_get_nr_zones(ub);
}
@@ -2179,6 +2181,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
.virt_boundary_mask = p->virt_boundary_mask,
.max_segments = USHRT_MAX,
.max_segment_size = UINT_MAX,
+ .dma_alignment = 3,
};
struct gendisk *disk;
int ret = -EINVAL;
@@ -2904,7 +2907,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
case UBLK_CMD_DEL_DEV:
ret = ublk_ctrl_del_dev(&ub, true);
break;
- case UBLK_U_CMD_DEL_DEV_ASYNC:
+ case UBLK_CMD_DEL_DEV_ASYNC:
ret = ublk_ctrl_del_dev(&ub, false);
break;
case UBLK_CMD_GET_QUEUE_AFFINITY:
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 42dea7601d..2351f411fa 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1543,7 +1543,7 @@ static int virtblk_probe(struct virtio_device *vdev)
*/
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) {
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue);
- err = blk_revalidate_disk_zones(vblk->disk, NULL);
+ err = blk_revalidate_disk_zones(vblk->disk);
if (err)
goto out_cleanup_disk;
}
@@ -1658,7 +1658,6 @@ static struct virtio_driver virtio_blk = {
.feature_table_legacy = features_legacy,
.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtblk_probe,
.remove = virtblk_remove,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index fd7c0ff213..67aa63dabc 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1063,8 +1063,7 @@ static char *encode_disk_name(char *ptr, unsigned int n)
}
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- struct blkfront_info *info, u16 sector_size,
- unsigned int physical_sector_size)
+ struct blkfront_info *info)
{
struct queue_limits lim = {};
struct gendisk *gd;
@@ -1159,8 +1158,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
info->rq = gd->queue;
info->gd = gd;
- info->sector_size = sector_size;
- info->physical_sector_size = physical_sector_size;
xlvbd_flush(info);
@@ -2315,8 +2312,6 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
static void blkfront_connect(struct blkfront_info *info)
{
unsigned long long sectors;
- unsigned long sector_size;
- unsigned int physical_sector_size;
int err, i;
struct blkfront_ring_info *rinfo;
@@ -2355,7 +2350,7 @@ static void blkfront_connect(struct blkfront_info *info)
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"sectors", "%llu", &sectors,
"info", "%u", &info->vdisk_info,
- "sector-size", "%lu", &sector_size,
+ "sector-size", "%lu", &info->sector_size,
NULL);
if (err) {
xenbus_dev_fatal(info->xbdev, err,
@@ -2369,9 +2364,9 @@ static void blkfront_connect(struct blkfront_info *info)
* provide this. Assume physical sector size to be the same as
* sector_size in that case.
*/
- physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
+ info->physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
"physical-sector-size",
- sector_size);
+ info->sector_size);
blkfront_gather_backend_features(info);
for_each_rinfo(info, rinfo, i) {
err = blkfront_setup_indirect(rinfo);
@@ -2383,8 +2378,7 @@ static void blkfront_connect(struct blkfront_info *info)
}
}
- err = xlvbd_alloc_gendisk(sectors, info, sector_size,
- physical_sector_size);
+ err = xlvbd_alloc_gendisk(sectors, info);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
info->xbdev->otherend);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f0639df6cd..3acd7006ad 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -426,11 +426,10 @@ static void reset_bdev(struct zram *zram)
if (!zram->backing_dev)
return;
- fput(zram->bdev_file);
/* hope filp_close flush all of IO */
filp_close(zram->backing_dev, NULL);
zram->backing_dev = NULL;
- zram->bdev_file = NULL;
+ zram->bdev = NULL;
zram->disk->fops = &zram_devops;
kvfree(zram->bitmap);
zram->bitmap = NULL;
@@ -473,10 +472,8 @@ static ssize_t backing_dev_store(struct device *dev,
size_t sz;
struct file *backing_dev = NULL;
struct inode *inode;
- struct address_space *mapping;
unsigned int bitmap_sz;
unsigned long nr_pages, *bitmap = NULL;
- struct file *bdev_file = NULL;
int err;
struct zram *zram = dev_to_zram(dev);
@@ -497,15 +494,14 @@ static ssize_t backing_dev_store(struct device *dev,
if (sz > 0 && file_name[sz - 1] == '\n')
file_name[sz - 1] = 0x00;
- backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
+ backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0);
if (IS_ERR(backing_dev)) {
err = PTR_ERR(backing_dev);
backing_dev = NULL;
goto out;
}
- mapping = backing_dev->f_mapping;
- inode = mapping->host;
+ inode = backing_dev->f_mapping->host;
/* Support only block device in this moment */
if (!S_ISBLK(inode->i_mode)) {
@@ -513,14 +509,6 @@ static ssize_t backing_dev_store(struct device *dev,
goto out;
}
- bdev_file = bdev_file_open_by_dev(inode->i_rdev,
- BLK_OPEN_READ | BLK_OPEN_WRITE, zram, NULL);
- if (IS_ERR(bdev_file)) {
- err = PTR_ERR(bdev_file);
- bdev_file = NULL;
- goto out;
- }
-
nr_pages = i_size_read(inode) >> PAGE_SHIFT;
bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
@@ -531,7 +519,7 @@ static ssize_t backing_dev_store(struct device *dev,
reset_bdev(zram);
- zram->bdev_file = bdev_file;
+ zram->bdev = I_BDEV(inode);
zram->backing_dev = backing_dev;
zram->bitmap = bitmap;
zram->nr_pages = nr_pages;
@@ -544,9 +532,6 @@ static ssize_t backing_dev_store(struct device *dev,
out:
kvfree(bitmap);
- if (bdev_file)
- fput(bdev_file);
-
if (backing_dev)
filp_close(backing_dev, NULL);
@@ -587,7 +572,7 @@ static void read_from_bdev_async(struct zram *zram, struct page *page,
{
struct bio *bio;
- bio = bio_alloc(file_bdev(zram->bdev_file), 1, parent->bi_opf, GFP_NOIO);
+ bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
__bio_add_page(bio, page, PAGE_SIZE, 0);
bio_chain(bio, parent);
@@ -703,7 +688,7 @@ static ssize_t writeback_store(struct device *dev,
continue;
}
- bio_init(&bio, file_bdev(zram->bdev_file), &bio_vec, 1,
+ bio_init(&bio, zram->bdev, &bio_vec, 1,
REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
__bio_add_page(&bio, page, PAGE_SIZE, 0);
@@ -785,7 +770,7 @@ static void zram_sync_read(struct work_struct *work)
struct bio_vec bv;
struct bio bio;
- bio_init(&bio, file_bdev(zw->zram->bdev_file), &bv, 1, REQ_OP_READ);
+ bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ);
bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
__bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
zw->error = submit_bio_wait(&bio);
@@ -1568,7 +1553,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
* Corresponding ZRAM slot should be locked.
*/
static int zram_recompress(struct zram *zram, u32 index, struct page *page,
- u32 threshold, u32 prio, u32 prio_max)
+ u64 *num_recomp_pages, u32 threshold, u32 prio,
+ u32 prio_max)
{
struct zcomp_strm *zstrm = NULL;
unsigned long handle_old;
@@ -1645,6 +1631,15 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
if (!zstrm)
return 0;
+ /*
+ * Decrement the limit (if set) on pages we can recompress, even
+ * when current recompression was unsuccessful or did not compress
+ * the page below the threshold, because we still spent resources
+ * on it.
+ */
+ if (*num_recomp_pages)
+ *num_recomp_pages -= 1;
+
if (class_index_new >= class_index_old) {
/*
* Secondary algorithms failed to re-compress the page
@@ -1710,6 +1705,7 @@ static ssize_t recompress_store(struct device *dev,
struct zram *zram = dev_to_zram(dev);
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
char *args, *param, *val, *algo = NULL;
+ u64 num_recomp_pages = ULLONG_MAX;
u32 mode = 0, threshold = 0;
unsigned long index;
struct page *page;
@@ -1732,6 +1728,17 @@ static ssize_t recompress_store(struct device *dev,
continue;
}
+ if (!strcmp(param, "max_pages")) {
+ /*
+ * Limit the number of entries (pages) we attempt to
+ * recompress.
+ */
+ ret = kstrtoull(val, 10, &num_recomp_pages);
+ if (ret)
+ return ret;
+ continue;
+ }
+
if (!strcmp(param, "threshold")) {
/*
* We will re-compress only idle objects equal or
@@ -1788,6 +1795,9 @@ static ssize_t recompress_store(struct device *dev,
for (index = 0; index < nr_pages; index++) {
int err = 0;
+ if (!num_recomp_pages)
+ break;
+
zram_slot_lock(zram, index);
if (!zram_allocated(zram, index))
@@ -1807,8 +1817,8 @@ static ssize_t recompress_store(struct device *dev,
zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
goto next;
- err = zram_recompress(zram, index, page, threshold,
- prio, prio_max);
+ err = zram_recompress(zram, index, page, &num_recomp_pages,
+ threshold, prio, prio_max);
next:
zram_slot_unlock(zram, index);
if (err) {
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 37bf29f34d..35e3221446 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -132,7 +132,7 @@ struct zram {
spinlock_t wb_limit_lock;
bool wb_limit_enable;
u64 bd_wb_limit;
- struct file *bdev_file;
+ struct block_device *bdev;
unsigned long *bitmap;
unsigned long nr_pages;
#endif