diff options
Diffstat (limited to 'block/blk-zoned.c')
-rw-r--r-- | block/blk-zoned.c | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/block/blk-zoned.c b/block/blk-zoned.c new file mode 100644 index 000000000..d33e89d28 --- /dev/null +++ b/block/blk-zoned.c @@ -0,0 +1,388 @@ +/* + * Zoned block device handling + * + * Copyright (c) 2015, Hannes Reinecke + * Copyright (c) 2015, SUSE Linux GmbH + * + * Copyright (c) 2016, Damien Le Moal + * Copyright (c) 2016, Western Digital + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/rbtree.h> +#include <linux/blkdev.h> + +static inline sector_t blk_zone_start(struct request_queue *q, + sector_t sector) +{ + sector_t zone_mask = blk_queue_zone_sectors(q) - 1; + + return sector & ~zone_mask; +} + +/* + * Return true if a request is a write requests that needs zone write locking. + */ +bool blk_req_needs_zone_write_lock(struct request *rq) +{ + if (!rq->q->seq_zones_wlock) + return false; + + if (blk_rq_is_passthrough(rq)) + return false; + + switch (req_op(rq)) { + case REQ_OP_WRITE_ZEROES: + case REQ_OP_WRITE_SAME: + case REQ_OP_WRITE: + return blk_rq_zone_is_seq(rq); + default: + return false; + } +} +EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock); + +void __blk_req_zone_write_lock(struct request *rq) +{ + if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq), + rq->q->seq_zones_wlock))) + return; + + WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED); + rq->rq_flags |= RQF_ZONE_WRITE_LOCKED; +} +EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock); + +void __blk_req_zone_write_unlock(struct request *rq) +{ + rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED; + if (rq->q->seq_zones_wlock) + WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq), + rq->q->seq_zones_wlock)); +} +EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); + +/* + * Check that a zone report belongs to the partition. + * If yes, fix its start sector and write pointer, copy it in the + * zone information array and return true. Return false otherwise. + */ +static bool blkdev_report_zone(struct block_device *bdev, + struct blk_zone *rep, + struct blk_zone *zone) +{ + sector_t offset = get_start_sect(bdev); + + if (rep->start < offset) + return false; + + rep->start -= offset; + if (rep->start + rep->len > bdev->bd_part->nr_sects) + return false; + + if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL) + rep->wp = rep->start + rep->len; + else + rep->wp -= offset; + memcpy(zone, rep, sizeof(struct blk_zone)); + + return true; +} + +/** + * blkdev_report_zones - Get zones information + * @bdev: Target block device + * @sector: Sector from which to report zones + * @zones: Array of zone structures where to return the zones information + * @nr_zones: Number of zone structures in the zone array + * @gfp_mask: Memory allocation flags (for bio_alloc) + * + * Description: + * Get zone information starting from the zone containing @sector. + * The number of zone information reported may be less than the number + * requested by @nr_zones. The number of zones actually reported is + * returned in @nr_zones. + */ +int blkdev_report_zones(struct block_device *bdev, + sector_t sector, + struct blk_zone *zones, + unsigned int *nr_zones, + gfp_t gfp_mask) +{ + struct request_queue *q = bdev_get_queue(bdev); + struct blk_zone_report_hdr *hdr; + unsigned int nrz = *nr_zones; + struct page *page; + unsigned int nr_rep; + size_t rep_bytes; + unsigned int nr_pages; + struct bio *bio; + struct bio_vec *bv; + unsigned int i, n, nz; + unsigned int ofst; + void *addr; + int ret; + + if (!q) + return -ENXIO; + + if (!blk_queue_is_zoned(q)) + return -EOPNOTSUPP; + + if (!nrz) + return 0; + + if (sector > bdev->bd_part->nr_sects) { + *nr_zones = 0; + return 0; + } + + /* + * The zone report has a header. So make room for it in the + * payload. Also make sure that the report fits in a single BIO + * that will not be split down the stack. + */ + rep_bytes = sizeof(struct blk_zone_report_hdr) + + sizeof(struct blk_zone) * nrz; + rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK; + if (rep_bytes > (queue_max_sectors(q) << 9)) + rep_bytes = queue_max_sectors(q) << 9; + + nr_pages = min_t(unsigned int, BIO_MAX_PAGES, + rep_bytes >> PAGE_SHIFT); + nr_pages = min_t(unsigned int, nr_pages, + queue_max_segments(q)); + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return -ENOMEM; + + bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = blk_zone_start(q, sector); + bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0); + + for (i = 0; i < nr_pages; i++) { + page = alloc_page(gfp_mask); + if (!page) { + ret = -ENOMEM; + goto out; + } + if (!bio_add_page(bio, page, PAGE_SIZE, 0)) { + __free_page(page); + break; + } + } + + if (i == 0) + ret = -ENOMEM; + else + ret = submit_bio_wait(bio); + if (ret) + goto out; + + /* + * Process the report result: skip the header and go through the + * reported zones to fixup and fixup the zone information for + * partitions. At the same time, return the zone information into + * the zone array. + */ + n = 0; + nz = 0; + nr_rep = 0; + bio_for_each_segment_all(bv, bio, i) { + + if (!bv->bv_page) + break; + + addr = kmap_atomic(bv->bv_page); + + /* Get header in the first page */ + ofst = 0; + if (!nr_rep) { + hdr = addr; + nr_rep = hdr->nr_zones; + ofst = sizeof(struct blk_zone_report_hdr); + } + + /* Fixup and report zones */ + while (ofst < bv->bv_len && + n < nr_rep && nz < nrz) { + if (blkdev_report_zone(bdev, addr + ofst, &zones[nz])) + nz++; + ofst += sizeof(struct blk_zone); + n++; + } + + kunmap_atomic(addr); + + if (n >= nr_rep || nz >= nrz) + break; + + } + + *nr_zones = nz; +out: + bio_for_each_segment_all(bv, bio, i) + __free_page(bv->bv_page); + bio_put(bio); + + return ret; +} +EXPORT_SYMBOL_GPL(blkdev_report_zones); + +/** + * blkdev_reset_zones - Reset zones write pointer + * @bdev: Target block device + * @sector: Start sector of the first zone to reset + * @nr_sectors: Number of sectors, at least the length of one zone + * @gfp_mask: Memory allocation flags (for bio_alloc) + * + * Description: + * Reset the write pointer of the zones contained in the range + * @sector..@sector+@nr_sectors. Specifying the entire disk sector range + * is valid, but the specified range should not contain conventional zones. + */ +int blkdev_reset_zones(struct block_device *bdev, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) +{ + struct request_queue *q = bdev_get_queue(bdev); + sector_t zone_sectors; + sector_t end_sector = sector + nr_sectors; + struct bio *bio; + int ret; + + if (!q) + return -ENXIO; + + if (!blk_queue_is_zoned(q)) + return -EOPNOTSUPP; + + if (end_sector > bdev->bd_part->nr_sects) + /* Out of range */ + return -EINVAL; + + /* Check alignment (handle eventual smaller last zone) */ + zone_sectors = blk_queue_zone_sectors(q); + if (sector & (zone_sectors - 1)) + return -EINVAL; + + if ((nr_sectors & (zone_sectors - 1)) && + end_sector != bdev->bd_part->nr_sects) + return -EINVAL; + + while (sector < end_sector) { + + bio = bio_alloc(gfp_mask, 0); + bio->bi_iter.bi_sector = sector; + bio_set_dev(bio, bdev); + bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0); + + ret = submit_bio_wait(bio); + bio_put(bio); + + if (ret) + return ret; + + sector += zone_sectors; + + /* This may take a while, so be nice to others */ + cond_resched(); + + } + + return 0; +} +EXPORT_SYMBOL_GPL(blkdev_reset_zones); + +/* + * BLKREPORTZONE ioctl processing. + * Called from blkdev_ioctl. + */ +int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct request_queue *q; + struct blk_zone_report rep; + struct blk_zone *zones; + int ret; + + if (!argp) + return -EINVAL; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + if (!blk_queue_is_zoned(q)) + return -ENOTTY; + + if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) + return -EFAULT; + + if (!rep.nr_zones) + return -EINVAL; + + if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone)) + return -ERANGE; + + zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone), + GFP_KERNEL | __GFP_ZERO); + if (!zones) + return -ENOMEM; + + ret = blkdev_report_zones(bdev, rep.sector, + zones, &rep.nr_zones, + GFP_KERNEL); + if (ret) + goto out; + + if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) { + ret = -EFAULT; + goto out; + } + + if (rep.nr_zones) { + if (copy_to_user(argp + sizeof(struct blk_zone_report), zones, + sizeof(struct blk_zone) * rep.nr_zones)) + ret = -EFAULT; + } + + out: + kvfree(zones); + + return ret; +} + +/* + * BLKRESETZONE ioctl processing. + * Called from blkdev_ioctl. + */ +int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct request_queue *q; + struct blk_zone_range zrange; + + if (!argp) + return -EINVAL; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + if (!blk_queue_is_zoned(q)) + return -ENOTTY; + + if (!(mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) + return -EFAULT; + + return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors, + GFP_KERNEL); +} |