diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/alloc.c | 35 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 21 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 10 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-smq.c | 28 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 17 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 15 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 9 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-verity-fec.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-verity-target.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-verity.h | 6 | ||||
-rw-r--r-- | drivers/md/md-bitmap.c | 17 | ||||
-rw-r--r-- | drivers/md/md.c | 23 | ||||
-rw-r--r-- | drivers/md/raid0.c | 62 | ||||
-rw-r--r-- | drivers/md/raid0.h | 1 | ||||
-rw-r--r-- | drivers/md/raid1.c | 3 | ||||
-rw-r--r-- | drivers/md/raid10.c | 24 | ||||
-rw-r--r-- | drivers/md/raid5.c | 2 |
23 files changed, 201 insertions, 94 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 46794cac1..5310e1f4a 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -49,7 +49,7 @@ * * bch_bucket_alloc() allocates a single bucket from a specific cache. * - * bch_bucket_alloc_set() allocates one or more buckets from different caches + * bch_bucket_alloc_set() allocates one bucket from different caches * out of a cache set. * * free_some_buckets() drives all the processes described above. It's called @@ -488,34 +488,29 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k) } int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, - struct bkey *k, int n, bool wait) + struct bkey *k, bool wait) { - int i; + struct cache *ca; + long b; /* No allocation if CACHE_SET_IO_DISABLE bit is set */ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) return -1; lockdep_assert_held(&c->bucket_lock); - BUG_ON(!n || n > c->caches_loaded || n > 8); bkey_init(k); - /* sort by free space/prio of oldest data in caches */ - - for (i = 0; i < n; i++) { - struct cache *ca = c->cache_by_alloc[i]; - long b = bch_bucket_alloc(ca, reserve, wait); + ca = c->cache_by_alloc[0]; + b = bch_bucket_alloc(ca, reserve, wait); + if (b == -1) + goto err; - if (b == -1) - goto err; + k->ptr[0] = MAKE_PTR(ca->buckets[b].gen, + bucket_to_sector(c, b), + ca->sb.nr_this_dev); - k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, - bucket_to_sector(c, b), - ca->sb.nr_this_dev); - - SET_KEY_PTRS(k, i + 1); - } + SET_KEY_PTRS(k, 1); return 0; err: @@ -525,12 +520,12 @@ err: } int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, - struct bkey *k, int n, bool wait) + struct bkey *k, bool wait) { int ret; mutex_lock(&c->bucket_lock); - ret = __bch_bucket_alloc_set(c, reserve, k, n, wait); + ret = __bch_bucket_alloc_set(c, reserve, k, wait); mutex_unlock(&c->bucket_lock); return ret; } @@ -638,7 +633,7 @@ bool bch_alloc_sectors(struct cache_set *c, spin_unlock(&c->data_bucket_lock); - if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait)) + if (bch_bucket_alloc_set(c, watermark, &alloc.key, wait)) return false; spin_lock(&c->data_bucket_lock); diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 6a380ed49..d0311e306 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -265,6 +265,7 @@ struct bcache_device { #define BCACHE_DEV_WB_RUNNING 3 #define BCACHE_DEV_RATE_DW_RUNNING 4 int nr_stripes; +#define BCH_MIN_STRIPE_SZ ((4 << 20) >> SECTOR_SHIFT) unsigned int stripe_size; atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; @@ -952,9 +953,9 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k); long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait); int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, - struct bkey *k, int n, bool wait); + struct bkey *k, bool wait); int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, - struct bkey *k, int n, bool wait); + struct bkey *k, bool wait); bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned int sectors, unsigned int write_point, unsigned int write_prio, bool wait); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e388e7bb7..de1eb7961 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1008,6 +1008,9 @@ err: * * The btree node will have either a read or a write lock held, depending on * level and op->lock. + * + * Note: Only error code or btree pointer will be returned, it is unncessary + * for callers to check NULL pointer. */ struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, struct bkey *k, int level, bool write, @@ -1120,16 +1123,22 @@ retry: mutex_unlock(&b->c->bucket_lock); } +/* + * Only error code or btree pointer will be returned, it is unncessary for + * callers to check NULL pointer. + */ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, int level, bool wait, struct btree *parent) { BKEY_PADDED(key) k; - struct btree *b = ERR_PTR(-EAGAIN); + struct btree *b; mutex_lock(&c->bucket_lock); retry: - if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait)) + /* return ERR_PTR(-EAGAIN) when it fails */ + b = ERR_PTR(-EAGAIN); + if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait)) goto err; bkey_put(c, &k.key); @@ -1174,7 +1183,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, { struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); - if (!IS_ERR_OR_NULL(n)) { + if (!IS_ERR(n)) { mutex_lock(&n->write_lock); bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); bkey_copy_key(&n->key, &b->key); @@ -1389,7 +1398,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, for (i = 0; i < nodes; i++) { new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL); - if (IS_ERR_OR_NULL(new_nodes[i])) + if (IS_ERR(new_nodes[i])) goto out_nocoalesce; } @@ -1541,6 +1550,8 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, return 0; n = btree_node_alloc_replacement(replace, NULL); + if (IS_ERR(n)) + return 0; /* recheck reserve after allocating replacement node */ if (btree_check_reserve(b, NULL)) { @@ -1706,7 +1717,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, if (should_rewrite) { n = btree_node_alloc_replacement(b, NULL); - if (!IS_ERR_OR_NULL(n)) { + if (!IS_ERR(n)) { bch_btree_node_write_sync(n); bch_btree_set_root(n); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 2df75db52..70f0f3096 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -423,7 +423,7 @@ static int __uuid_write(struct cache_set *c) closure_init_stack(&cl); lockdep_assert_held(&bch_register_lock); - if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true)) + if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, true)) return 1; SET_KEY_SIZE(&k.key, c->sb.bucket_size); @@ -807,6 +807,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, if (!d->stripe_size) d->stripe_size = 1 << 31; + else if (d->stripe_size < BCH_MIN_STRIPE_SZ) + d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size); d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); @@ -1576,7 +1578,7 @@ static void cache_set_flush(struct closure *cl) if (!IS_ERR_OR_NULL(c->gc_thread)) kthread_stop(c->gc_thread); - if (!IS_ERR_OR_NULL(c->root)) + if (!IS_ERR(c->root)) list_add(&c->root->list, &c->btree_cache); /* Should skip this if we're unregistering because of an error */ @@ -1844,7 +1846,7 @@ static int run_cache_set(struct cache_set *c) c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL); - if (IS_ERR_OR_NULL(c->root)) + if (IS_ERR(c->root)) goto err; list_del_init(&c->root->list); @@ -1921,7 +1923,7 @@ static int run_cache_set(struct cache_set *c) err = "cannot allocate new btree root"; c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL); - if (IS_ERR_OR_NULL(c->root)) + if (IS_ERR(c->root)) goto err; mutex_lock(&c->root->write_lock); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 591d9c810..64a72222a 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -992,7 +992,7 @@ SHOW(__bch_cache) sum += INITIAL_PRIO - cached[i]; if (n) - do_div(sum, n); + sum = div64_u64(sum, n); for (i = 0; i < ARRAY_SIZE(q); i++) q[i] = INITIAL_PRIO - cached[n * (i + 1) / diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 1b5b9ad9e..6030193b2 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -854,7 +854,13 @@ struct smq_policy { struct background_tracker *bg_work; - bool migrations_allowed; + bool migrations_allowed:1; + + /* + * If this is set the policy will try and clean the whole cache + * even if the device is not idle. + */ + bool cleaner:1; }; /*----------------------------------------------------------------*/ @@ -1133,7 +1139,7 @@ static bool clean_target_met(struct smq_policy *mq, bool idle) * Cache entries may not be populated. So we cannot rely on the * size of the clean queue. */ - if (idle) { + if (idle || mq->cleaner) { /* * We'd like to clean everything. */ @@ -1716,11 +1722,9 @@ static void calc_hotspot_params(sector_t origin_size, *hotspot_block_size /= 2u; } -static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, - sector_t origin_size, - sector_t cache_block_size, - bool mimic_mq, - bool migrations_allowed) +static struct dm_cache_policy * +__smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size, + bool mimic_mq, bool migrations_allowed, bool cleaner) { unsigned i; unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS; @@ -1807,6 +1811,7 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, goto bad_btracker; mq->migrations_allowed = migrations_allowed; + mq->cleaner = cleaner; return &mq->policy; @@ -1830,21 +1835,24 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size) { - return __smq_create(cache_size, origin_size, cache_block_size, false, true); + return __smq_create(cache_size, origin_size, cache_block_size, + false, true, false); } static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size) { - return __smq_create(cache_size, origin_size, cache_block_size, true, true); + return __smq_create(cache_size, origin_size, cache_block_size, + true, true, false); } static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size) { - return __smq_create(cache_size, origin_size, cache_block_size, false, false); + return __smq_create(cache_size, origin_size, cache_block_size, + false, false, true); } /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index f496213f8..7c0e7c662 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -30,7 +30,7 @@ struct delay_c { struct workqueue_struct *kdelayd_wq; struct work_struct flush_expired_bios; struct list_head delayed_bios; - atomic_t may_delay; + bool may_delay; struct delay_class read; struct delay_class write; @@ -191,7 +191,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); INIT_LIST_HEAD(&dc->delayed_bios); mutex_init(&dc->timer_lock); - atomic_set(&dc->may_delay, 1); + dc->may_delay = true; dc->argc = argc; ret = delay_class_ctr(ti, &dc->read, argv); @@ -245,7 +245,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) struct dm_delay_info *delayed; unsigned long expires = 0; - if (!c->delay || !atomic_read(&dc->may_delay)) + if (!c->delay) return DM_MAPIO_REMAPPED; delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); @@ -254,6 +254,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); mutex_lock(&delayed_bios_lock); + if (unlikely(!dc->may_delay)) { + mutex_unlock(&delayed_bios_lock); + return DM_MAPIO_REMAPPED; + } c->ops++; list_add_tail(&delayed->list, &dc->delayed_bios); mutex_unlock(&delayed_bios_lock); @@ -267,7 +271,10 @@ static void delay_presuspend(struct dm_target *ti) { struct delay_c *dc = ti->private; - atomic_set(&dc->may_delay, 0); + mutex_lock(&delayed_bios_lock); + dc->may_delay = false; + mutex_unlock(&delayed_bios_lock); + del_timer_sync(&dc->delay_timer); flush_bios(flush_delayed_bios(dc, 1)); } @@ -276,7 +283,7 @@ static void delay_resume(struct dm_target *ti) { struct delay_c *dc = ti->private; - atomic_set(&dc->may_delay, 1); + dc->may_delay = true; } static int delay_map(struct dm_target *ti, struct bio *bio) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 0a4e44094..a884fcf65 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -29,11 +29,11 @@ #define DEFAULT_BUFFER_SECTORS 128 #define DEFAULT_JOURNAL_WATERMARK 50 #define DEFAULT_SYNC_MSEC 10000 -#define DEFAULT_MAX_JOURNAL_SECTORS 131072 +#define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192) #define MIN_LOG2_INTERLEAVE_SECTORS 3 #define MAX_LOG2_INTERLEAVE_SECTORS 31 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 -#define RECALC_SECTORS 8192 +#define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048) #define RECALC_WRITE_SUPER 16 /* @@ -1379,11 +1379,12 @@ static void integrity_metadata(struct work_struct *w) checksums = checksums_onstack; __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + struct bio_vec bv_copy = bv; unsigned pos; char *mem, *checksums_ptr; again: - mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset; + mem = (char *)kmap_atomic(bv_copy.bv_page) + bv_copy.bv_offset; pos = 0; checksums_ptr = checksums; do { @@ -1392,7 +1393,7 @@ again: sectors_to_process -= ic->sectors_per_block; pos += ic->sectors_per_block << SECTOR_SHIFT; sector += ic->sectors_per_block; - } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); + } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack); kunmap_atomic(mem); r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, @@ -1412,9 +1413,9 @@ again: if (!sectors_to_process) break; - if (unlikely(pos < bv.bv_len)) { - bv.bv_offset += pos; - bv.bv_len -= pos; + if (unlikely(pos < bv_copy.bv_len)) { + bv_copy.bv_offset += pos; + bv_copy.bv_len -= pos; goto again; } } diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 81ffc59d0..4312007d2 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -306,7 +306,7 @@ static void do_region(int op, int op_flags, unsigned region, struct request_queue *q = bdev_get_queue(where->bdev); unsigned short logical_block_size = queue_logical_block_size(q); sector_t num_sectors; - unsigned int uninitialized_var(special_cmd_max_sectors); + unsigned int special_cmd_max_sectors; /* * Reject unsupported discard and write same requests. diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e1603c17e..88e89796c 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1822,7 +1822,7 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us int ioctl_flags; int param_flags; unsigned int cmd; - struct dm_ioctl *uninitialized_var(param); + struct dm_ioctl *param; ioctl_fn fn = NULL; size_t input_param_size; struct dm_ioctl param_kernel; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5c45100f6..72aa5097b 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3289,15 +3289,19 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* Try to adjust the raid4/5/6 stripe cache size to the stripe size */ if (rs_is_raid456(rs)) { r = rs_set_raid456_stripe_cache(rs); - if (r) + if (r) { + mddev_unlock(&rs->md); goto bad_stripe_cache; + } } /* Now do an early reshape check */ if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { r = rs_check_reshape(rs); - if (r) + if (r) { + mddev_unlock(&rs->md); goto bad_check_reshape; + } /* Restore new, ctr requested layout to perform check */ rs_config_restore(rs, &rs_layout); @@ -3306,6 +3310,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) r = rs->md.pers->check_reshape(&rs->md); if (r) { ti->error = "Reshape check failed"; + mddev_unlock(&rs->md); goto bad_check_reshape; } } diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 963d3774c..247089c2b 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -613,7 +613,7 @@ static int persistent_read_metadata(struct dm_exception_store *store, chunk_t old, chunk_t new), void *callback_context) { - int r, uninitialized_var(new_snapshot); + int r, new_snapshot; struct pstore *ps = get_info(store); /* diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 71d3fdbce..3faaf21be 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -671,7 +671,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table, */ unsigned short remaining = 0; - struct dm_target *uninitialized_var(ti); + struct dm_target *ti; struct queue_limits ti_limits; unsigned i; diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index a433f5824..67b533c19 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -28,7 +28,8 @@ bool verity_fec_is_enabled(struct dm_verity *v) */ static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io) { - return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io); + return (struct dm_verity_fec_io *) + ((char *)io + io->v->ti->per_io_data_size - sizeof(struct dm_verity_fec_io)); } /* diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index d116495a3..76d60c55d 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -579,7 +579,9 @@ static void verity_end_io(struct bio *bio) struct dm_verity_io *io = bio->bi_private; if (bio->bi_status && - (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { + (!verity_fec_is_enabled(io->v) || + verity_is_system_shutting_down() || + (bio->bi_opf & REQ_RAHEAD))) { verity_finish_io(io, bio->bi_status); return; } diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 3441c10b8..6e65ec0e6 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -109,12 +109,6 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size; } -static inline u8 *verity_io_digest_end(struct dm_verity *v, - struct dm_verity_io *io) -{ - return verity_io_want_digest(v, io) + v->digest_size; -} - extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, struct bvec_iter *iter, int (*process)(struct dm_verity *v, diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 1c4c46278..7ca81e917 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -53,14 +53,7 @@ __acquires(bitmap->lock) { unsigned char *mappage; - if (page >= bitmap->pages) { - /* This can happen if bitmap_start_sync goes beyond - * End-of-device while looking for a whole page. - * It is harmless. - */ - return -EINVAL; - } - + WARN_ON_ONCE(page >= bitmap->pages); if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ return 0; @@ -1368,6 +1361,14 @@ __acquires(bitmap->lock) sector_t csize; int err; + if (page >= bitmap->pages) { + /* + * This can happen if bitmap_start_sync goes beyond + * End-of-device while looking for a whole page or + * user set a huge number to sysfs bitmap_set_bits. + */ + return NULL; + } err = md_bitmap_checkpage(bitmap, page, create, 0); if (bitmap->bp[page].hijacked || diff --git a/drivers/md/md.c b/drivers/md/md.c index f8c111b36..6b074c220 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3671,8 +3671,9 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale) static ssize_t safe_delay_show(struct mddev *mddev, char *page) { - int msec = (mddev->safemode_delay*1000)/HZ; - return sprintf(page, "%d.%03d\n", msec/1000, msec%1000); + unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ; + + return sprintf(page, "%u.%03u\n", msec/1000, msec%1000); } static ssize_t safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len) @@ -3684,7 +3685,7 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len) return -EINVAL; } - if (strict_strtoul_scaled(cbuf, &msec, 3) < 0) + if (strict_strtoul_scaled(cbuf, &msec, 3) < 0 || msec > UINT_MAX / HZ) return -EINVAL; if (msec == 0) mddev->safemode_delay = 0; @@ -4336,6 +4337,8 @@ max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len rv = kstrtouint(buf, 10, &n); if (rv < 0) return rv; + if (n > INT_MAX) + return -EINVAL; atomic_set(&mddev->max_corr_read_errors, n); return len; } @@ -4636,11 +4639,21 @@ action_store(struct mddev *mddev, const char *page, size_t len) return -EINVAL; err = mddev_lock(mddev); if (!err) { - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { err = -EBUSY; - else { + } else if (mddev->reshape_position == MaxSector || + mddev->pers->check_reshape == NULL || + mddev->pers->check_reshape(mddev)) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); err = mddev->pers->start_reshape(mddev); + } else { + /* + * If reshape is still in progress, and + * md_check_recovery() can continue to reshape, + * don't restart reshape because data can be + * corrupted for raid456. + */ + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); } mddev_unlock(mddev); } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 252ef0eab..6f5710e83 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -296,6 +296,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) goto abort; } + if (conf->layout == RAID0_ORIG_LAYOUT) { + for (i = 1; i < conf->nr_strip_zones; i++) { + sector_t first_sector = conf->strip_zone[i-1].zone_end; + + sector_div(first_sector, mddev->chunk_sectors); + zone = conf->strip_zone + i; + /* disk_shift is first disk index used in the zone */ + zone->disk_shift = sector_div(first_sector, + zone->nb_dev); + } + } + pr_debug("md/raid0:%s: done.\n", mdname(mddev)); *private_conf = conf; @@ -482,6 +494,20 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev, } } +/* + * Convert disk_index to the disk order in which it is read/written. + * For example, if we have 4 disks, they are numbered 0,1,2,3. If we + * write the disks starting at disk 3, then the read/write order would + * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift() + * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map + * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in + * that 'output' space to understand the read/write disk ordering. + */ +static int map_disk_shift(int disk_index, int num_disks, int disk_shift) +{ + return ((disk_index + num_disks - disk_shift) % num_disks); +} + static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) { struct r0conf *conf = mddev->private; @@ -495,7 +521,9 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) sector_t end_disk_offset; unsigned int end_disk_index; unsigned int disk; + sector_t orig_start, orig_end; + orig_start = start; zone = find_zone(conf, &start); if (bio_end_sector(bio) > zone->zone_end) { @@ -509,6 +537,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) } else end = bio_end_sector(bio); + orig_end = end; if (zone != conf->strip_zone) end = end - zone[-1].zone_end; @@ -520,13 +549,26 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) last_stripe_index = end; sector_div(last_stripe_index, stripe_size); - start_disk_index = (int)(start - first_stripe_index * stripe_size) / - mddev->chunk_sectors; + /* In the first zone the original and alternate layouts are the same */ + if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) { + sector_div(orig_start, mddev->chunk_sectors); + start_disk_index = sector_div(orig_start, zone->nb_dev); + start_disk_index = map_disk_shift(start_disk_index, + zone->nb_dev, + zone->disk_shift); + sector_div(orig_end, mddev->chunk_sectors); + end_disk_index = sector_div(orig_end, zone->nb_dev); + end_disk_index = map_disk_shift(end_disk_index, + zone->nb_dev, zone->disk_shift); + } else { + start_disk_index = (int)(start - first_stripe_index * stripe_size) / + mddev->chunk_sectors; + end_disk_index = (int)(end - last_stripe_index * stripe_size) / + mddev->chunk_sectors; + } start_disk_offset = ((int)(start - first_stripe_index * stripe_size) % mddev->chunk_sectors) + first_stripe_index * mddev->chunk_sectors; - end_disk_index = (int)(end - last_stripe_index * stripe_size) / - mddev->chunk_sectors; end_disk_offset = ((int)(end - last_stripe_index * stripe_size) % mddev->chunk_sectors) + last_stripe_index * mddev->chunk_sectors; @@ -535,18 +577,22 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) sector_t dev_start, dev_end; struct bio *discard_bio = NULL; struct md_rdev *rdev; + int compare_disk; + + compare_disk = map_disk_shift(disk, zone->nb_dev, + zone->disk_shift); - if (disk < start_disk_index) + if (compare_disk < start_disk_index) dev_start = (first_stripe_index + 1) * mddev->chunk_sectors; - else if (disk > start_disk_index) + else if (compare_disk > start_disk_index) dev_start = first_stripe_index * mddev->chunk_sectors; else dev_start = start_disk_offset; - if (disk < end_disk_index) + if (compare_disk < end_disk_index) dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; - else if (disk > end_disk_index) + else if (compare_disk > end_disk_index) dev_end = last_stripe_index * mddev->chunk_sectors; else dev_end = end_disk_offset; diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 3816e5477..8cc761ca7 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -6,6 +6,7 @@ struct strip_zone { sector_t zone_end; /* Start of the next zone (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ int nb_dev; /* # of devices attached to the zone */ + int disk_shift; /* start disk for the original layout */ }; /* Linux 3.14 (20d0189b101) made an unintended change to diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0f8b1fb3d..5ff06fbcf 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1785,6 +1785,9 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) int number = rdev->raid_disk; struct raid1_info *p = conf->mirrors + number; + if (unlikely(number >= conf->raid_disks)) + goto abort; + if (rdev != p->rdev) p = conf->mirrors + conf->raid_disks + number; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f6d2be1d2..bee694be2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -781,8 +781,16 @@ static struct md_rdev *read_balance(struct r10conf *conf, disk = r10_bio->devs[slot].devnum; rdev = rcu_dereference(conf->mirrors[disk].replacement); if (rdev == NULL || test_bit(Faulty, &rdev->flags) || - r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) + r10_bio->devs[slot].addr + sectors > + rdev->recovery_offset) { + /* + * Read replacement first to prevent reading both rdev + * and replacement as NULL during replacement replace + * rdev. + */ + smp_mb(); rdev = rcu_dereference(conf->mirrors[disk].rdev); + } if (rdev == NULL || test_bit(Faulty, &rdev->flags)) continue; @@ -934,6 +942,7 @@ static void flush_pending_writes(struct r10conf *conf) else generic_make_request(bio); bio = next; + cond_resched(); } blk_finish_plug(&plug); } else @@ -1119,6 +1128,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) else generic_make_request(bio); bio = next; + cond_resched(); } kfree(plug); } @@ -1400,9 +1410,15 @@ retry_write: for (i = 0; i < conf->copies; i++) { int d = r10_bio->devs[i].devnum; - struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); - struct md_rdev *rrdev = rcu_dereference( - conf->mirrors[d].replacement); + struct md_rdev *rdev, *rrdev; + + rrdev = rcu_dereference(conf->mirrors[d].replacement); + /* + * Read replacement first to prevent reading both rdev and + * replacement as NULL during replacement replace rdev. + */ + smp_mb(); + rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev == rrdev) rrdev = NULL; if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7eeae0301..b98abe927 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2603,7 +2603,7 @@ static void raid5_end_write_request(struct bio *bi) struct stripe_head *sh = bi->bi_private; struct r5conf *conf = sh->raid_conf; int disks = sh->disks, i; - struct md_rdev *uninitialized_var(rdev); + struct md_rdev *rdev; sector_t first_bad; int bad_sectors; int replacement = 0; |