summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c87
-rw-r--r--block/blk-core.c9
-rw-r--r--block/blk-flush.c3
-rw-r--r--block/blk-merge.c2
-rw-r--r--block/blk-mq.c4
-rw-r--r--block/blk-settings.c2
-rw-r--r--block/blk.h1
-rw-r--r--block/fops.c2
-rw-r--r--block/genhd.c2
-rw-r--r--block/ioctl.c2
-rw-r--r--block/partitions/cmdline.c49
-rw-r--r--block/sed-opal.c2
12 files changed, 95 insertions, 70 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 059467086b..96af822499 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -323,6 +323,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
blkg->q = disk->queue;
INIT_LIST_HEAD(&blkg->q_node);
blkg->blkcg = blkcg;
+ blkg->iostat.blkg = blkg;
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
spin_lock_init(&blkg->async_bio_lock);
bio_list_init(&blkg->async_bios);
@@ -619,12 +620,45 @@ restart:
spin_unlock_irq(&q->queue_lock);
}
+static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+ int i;
+
+ for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+ dst->bytes[i] = src->bytes[i];
+ dst->ios[i] = src->ios[i];
+ }
+}
+
+static void __blkg_clear_stat(struct blkg_iostat_set *bis)
+{
+ struct blkg_iostat cur = {0};
+ unsigned long flags;
+
+ flags = u64_stats_update_begin_irqsave(&bis->sync);
+ blkg_iostat_set(&bis->cur, &cur);
+ blkg_iostat_set(&bis->last, &cur);
+ u64_stats_update_end_irqrestore(&bis->sync, flags);
+}
+
+static void blkg_clear_stat(struct blkcg_gq *blkg)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct blkg_iostat_set *s = per_cpu_ptr(blkg->iostat_cpu, cpu);
+
+ __blkg_clear_stat(s);
+ }
+ __blkg_clear_stat(&blkg->iostat);
+}
+
static int blkcg_reset_stats(struct cgroup_subsys_state *css,
struct cftype *cftype, u64 val)
{
struct blkcg *blkcg = css_to_blkcg(css);
struct blkcg_gq *blkg;
- int i, cpu;
+ int i;
mutex_lock(&blkcg_pol_mutex);
spin_lock_irq(&blkcg->lock);
@@ -635,18 +669,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
* anyway. If you get hit by a race, retry.
*/
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
- for_each_possible_cpu(cpu) {
- struct blkg_iostat_set *bis =
- per_cpu_ptr(blkg->iostat_cpu, cpu);
- memset(bis, 0, sizeof(*bis));
-
- /* Re-initialize the cleared blkg_iostat_set */
- u64_stats_init(&bis->sync);
- bis->blkg = blkg;
- }
- memset(&blkg->iostat, 0, sizeof(blkg->iostat));
- u64_stats_init(&blkg->iostat.sync);
-
+ blkg_clear_stat(blkg);
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -949,16 +972,6 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
}
EXPORT_SYMBOL_GPL(blkg_conf_exit);
-static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
-{
- int i;
-
- for (i = 0; i < BLKG_IOSTAT_NR; i++) {
- dst->bytes[i] = src->bytes[i];
- dst->ios[i] = src->ios[i];
- }
-}
-
static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
{
int i;
@@ -1024,7 +1037,19 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
struct blkg_iostat cur;
unsigned int seq;
+ /*
+ * Order assignment of `next_bisc` from `bisc->lnode.next` in
+ * llist_for_each_entry_safe and clearing `bisc->lqueued` for
+ * avoiding to assign `next_bisc` with new next pointer added
+ * in blk_cgroup_bio_start() in case of re-ordering.
+ *
+ * The pair barrier is implied in llist_add() in blk_cgroup_bio_start().
+ */
+ smp_mb();
+
WRITE_ONCE(bisc->lqueued, false);
+ if (bisc == &blkg->iostat)
+ goto propagate_up; /* propagate up to parent only */
/* fetch the current per-cpu values */
do {
@@ -1034,10 +1059,24 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
blkcg_iostat_update(blkg, &cur, &bisc->last);
+propagate_up:
/* propagate global delta to parent (unless that's root) */
- if (parent && parent->parent)
+ if (parent && parent->parent) {
blkcg_iostat_update(parent, &blkg->iostat.cur,
&blkg->iostat.last);
+ /*
+ * Queue parent->iostat to its blkcg's lockless
+ * list to propagate up to the grandparent if the
+ * iostat hasn't been queued yet.
+ */
+ if (!parent->iostat.lqueued) {
+ struct llist_head *plhead;
+
+ plhead = per_cpu_ptr(parent->blkcg->lhead, cpu);
+ llist_add(&parent->iostat.lnode, plhead);
+ parent->iostat.lqueued = true;
+ }
+ }
}
raw_spin_unlock_irqrestore(&blkg_stat_lock, flags);
out:
diff --git a/block/blk-core.c b/block/blk-core.c
index b795ac1772..6fcf8ed5fc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -987,10 +987,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
unsigned long stamp;
again:
stamp = READ_ONCE(part->bd_stamp);
- if (unlikely(time_after(now, stamp))) {
- if (likely(try_cmpxchg(&part->bd_stamp, &stamp, now)))
- __part_stat_add(part, io_ticks, end ? now - stamp : 1);
- }
+ if (unlikely(time_after(now, stamp)) &&
+ likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) &&
+ (end || part_in_flight(part)))
+ __part_stat_add(part, io_ticks, now - stamp);
+
if (part->bd_partno) {
part = bdev_whole(part);
goto again;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index b0f314f4bc..9944414bf7 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -183,7 +183,7 @@ static void blk_flush_complete_seq(struct request *rq,
/* queue for flush */
if (list_empty(pending))
fq->flush_pending_since = jiffies;
- list_move_tail(&rq->queuelist, pending);
+ list_add_tail(&rq->queuelist, pending);
break;
case REQ_FSEQ_DATA:
@@ -261,6 +261,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
unsigned int seq = blk_flush_cur_seq(rq);
BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
+ list_del_init(&rq->queuelist);
blk_flush_complete_seq(rq, fq, seq, error);
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4e3483a16b..ae61a9c2fc 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -779,6 +779,8 @@ static void blk_account_io_merge_request(struct request *req)
if (blk_do_io_stat(req)) {
part_stat_lock();
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
+ part_stat_local_dec(req->part,
+ in_flight[op_is_write(req_op(req))]);
part_stat_unlock();
}
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 32afb87efb..5ac2087b05 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -997,6 +997,8 @@ static inline void blk_account_io_done(struct request *req, u64 now)
update_io_ticks(req->part, jiffies, true);
part_stat_inc(req->part, ios[sgrp]);
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
+ part_stat_local_dec(req->part,
+ in_flight[op_is_write(req_op(req))]);
part_stat_unlock();
}
}
@@ -1019,6 +1021,8 @@ static inline void blk_account_io_start(struct request *req)
part_stat_lock();
update_io_ticks(req->part, jiffies, false);
+ part_stat_local_inc(req->part,
+ in_flight[op_is_write(req_op(req))]);
part_stat_unlock();
}
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 9d6033e01f..15319b217b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -751,6 +751,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
unsigned int top, bottom, alignment, ret = 0;
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
+ t->max_user_sectors = min_not_zero(t->max_user_sectors,
+ b->max_user_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
diff --git a/block/blk.h b/block/blk.h
index d9f584984b..da5dbc6b13 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -357,6 +357,7 @@ static inline bool blk_do_io_stat(struct request *rq)
}
void update_io_ticks(struct block_device *part, unsigned long now, bool end);
+unsigned int part_in_flight(struct block_device *part);
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
{
diff --git a/block/fops.c b/block/fops.c
index 679d9b752f..df2c68d3f1 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -390,7 +390,7 @@ static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->bdev = bdev;
iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
- if (iomap->offset >= isize)
+ if (offset >= isize)
return -EIO;
iomap->type = IOMAP_MAPPED;
iomap->addr = iomap->offset;
diff --git a/block/genhd.c b/block/genhd.c
index 52a4521df0..345d80ab97 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -118,7 +118,7 @@ static void part_stat_read_all(struct block_device *part,
}
}
-static unsigned int part_in_flight(struct block_device *part)
+unsigned int part_in_flight(struct block_device *part)
{
unsigned int inflight = 0;
int cpu;
diff --git a/block/ioctl.c b/block/ioctl.c
index f505f9c341..2639ce9df3 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -33,7 +33,7 @@ static int blkpg_do_ioctl(struct block_device *bdev,
if (op == BLKPG_DEL_PARTITION)
return bdev_del_partition(disk, p.pno);
- if (p.start < 0 || p.length <= 0 || p.start + p.length < 0)
+ if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start)
return -EINVAL;
/* Check that the partition is aligned to the block size */
if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev)))
diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c
index c03bc105e5..152c85df92 100644
--- a/block/partitions/cmdline.c
+++ b/block/partitions/cmdline.c
@@ -70,8 +70,8 @@ static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
}
if (*partdef == '(') {
- int length;
- char *next = strchr(++partdef, ')');
+ partdef++;
+ char *next = strsep(&partdef, ")");
if (!next) {
pr_warn("cmdline partition format is invalid.");
@@ -79,11 +79,7 @@ static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
goto fail;
}
- length = min_t(int, next - partdef,
- sizeof(new_subpart->name) - 1);
- strscpy(new_subpart->name, partdef, length);
-
- partdef = ++next;
+ strscpy(new_subpart->name, next, sizeof(new_subpart->name));
} else
new_subpart->name[0] = '\0';
@@ -117,14 +113,12 @@ static void free_subpart(struct cmdline_parts *parts)
}
}
-static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
+static int parse_parts(struct cmdline_parts **parts, char *bdevdef)
{
int ret = -EINVAL;
char *next;
- int length;
struct cmdline_subpart **next_subpart;
struct cmdline_parts *newparts;
- char buf[BDEVNAME_SIZE + 32 + 4];
*parts = NULL;
@@ -132,28 +126,19 @@ static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
if (!newparts)
return -ENOMEM;
- next = strchr(bdevdef, ':');
+ next = strsep(&bdevdef, ":");
if (!next) {
pr_warn("cmdline partition has no block device.");
goto fail;
}
- length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1);
- strscpy(newparts->name, bdevdef, length);
+ strscpy(newparts->name, next, sizeof(newparts->name));
newparts->nr_subparts = 0;
next_subpart = &newparts->subpart;
- while (next && *(++next)) {
- bdevdef = next;
- next = strchr(bdevdef, ',');
-
- length = (!next) ? (sizeof(buf) - 1) :
- min_t(int, next - bdevdef, sizeof(buf) - 1);
-
- strscpy(buf, bdevdef, length);
-
- ret = parse_subpart(next_subpart, buf);
+ while ((next = strsep(&bdevdef, ","))) {
+ ret = parse_subpart(next_subpart, next);
if (ret)
goto fail;
@@ -199,24 +184,17 @@ static int cmdline_parts_parse(struct cmdline_parts **parts,
*parts = NULL;
- next = pbuf = buf = kstrdup(cmdline, GFP_KERNEL);
+ pbuf = buf = kstrdup(cmdline, GFP_KERNEL);
if (!buf)
return -ENOMEM;
next_parts = parts;
- while (next && *pbuf) {
- next = strchr(pbuf, ';');
- if (next)
- *next = '\0';
-
- ret = parse_parts(next_parts, pbuf);
+ while ((next = strsep(&pbuf, ";"))) {
+ ret = parse_parts(next_parts, next);
if (ret)
goto fail;
- if (next)
- pbuf = ++next;
-
next_parts = &(*next_parts)->next_parts;
}
@@ -250,7 +228,6 @@ static struct cmdline_parts *bdev_parts;
static int add_part(int slot, struct cmdline_subpart *subpart,
struct parsed_partitions *state)
{
- int label_min;
struct partition_meta_info *info;
char tmp[sizeof(info->volname) + 4];
@@ -262,9 +239,7 @@ static int add_part(int slot, struct cmdline_subpart *subpart,
info = &state->parts[slot].info;
- label_min = min_t(int, sizeof(info->volname) - 1,
- sizeof(subpart->name));
- strscpy(info->volname, subpart->name, label_min);
+ strscpy(info->volname, subpart->name, sizeof(info->volname));
snprintf(tmp, sizeof(tmp), "(%s)", info->volname);
strlcat(state->pp_buf, tmp, PAGE_SIZE);
diff --git a/block/sed-opal.c b/block/sed-opal.c
index 14fe0fef81..598fd3e7fc 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -314,7 +314,7 @@ static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen)
&key_type_user, key_name, true);
if (IS_ERR(kref))
- ret = PTR_ERR(kref);
+ return PTR_ERR(kref);
key = key_ref_to_ptr(kref);
down_read(&key->sem);