diff options
Diffstat (limited to '')
-rw-r--r-- | drivers/md/dm-core.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 10 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 9 | ||||
-rw-r--r-- | drivers/md/dm-verity.h | 4 | ||||
-rw-r--r-- | drivers/md/dm.c | 26 | ||||
-rw-r--r-- | drivers/md/md.c | 130 |
10 files changed, 134 insertions, 62 deletions
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 3db92d9a0..ff73b2c17 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -19,6 +19,8 @@ #include "dm.h" #define DM_RESERVED_MAX_IOS 1024 +#define DM_MAX_TARGETS 1048576 +#define DM_MAX_TARGET_PARAMS 1024 struct dm_kobject_holder { struct kobject kobj; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5d772f322..5deda6c6f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -48,11 +48,11 @@ struct convert_context { struct completion restart; struct bio *bio_in; - struct bio *bio_out; struct bvec_iter iter_in; + struct bio *bio_out; struct bvec_iter iter_out; - u64 cc_sector; atomic_t cc_pending; + u64 cc_sector; union { struct skcipher_request *req; struct aead_request *req_aead; @@ -2064,6 +2064,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.bio_out = clone; io->ctx.iter_out = clone->bi_iter; + if (crypt_integrity_aead(cc)) { + bio_copy_data(clone, io->base_bio); + io->ctx.bio_in = clone; + io->ctx.iter_in = clone->bi_iter; + } + sector += bio_sectors(clone); crypt_inc_pending(io); diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 62cae34ca..067be1d9f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3937,7 +3937,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { - if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { + if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { r = -EINVAL; ti->error = "Invalid bitmap_flush_interval argument"; goto bad; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 5f9b9178c..4184c8a2d 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1760,7 +1760,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern if (copy_from_user(param_kernel, user, minimum_data_size)) return -EFAULT; - if (param_kernel->data_size < minimum_data_size) + if (unlikely(param_kernel->data_size < minimum_data_size) || + unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) return -EINVAL; secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 140bdf2a6..99995b180 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3329,14 +3329,14 @@ static int raid_map(struct dm_target *ti, struct bio *bio) struct mddev *mddev = &rs->md; /* - * If we're reshaping to add disk(s)), ti->len and + * If we're reshaping to add disk(s), ti->len and * mddev->array_sectors will differ during the process * (ti->len > mddev->array_sectors), so we have to requeue * bios with addresses > mddev->array_sectors here or * there will occur accesses past EOD of the component * data images thus erroring the raid set. */ - if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) + if (unlikely(bio_has_data(bio) && bio_end_sector(bio) > mddev->array_sectors)) return DM_MAPIO_REQUEUE; md_handle_request(mddev, bio); @@ -4019,7 +4019,9 @@ static void raid_resume(struct dm_target *ti) * Take this opportunity to check whether any failed * devices are reachable again. */ + mddev_lock_nointr(mddev); attempt_restore_of_faulty_devices(rs); + mddev_unlock(mddev); } if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 41735a25d..de73fe796 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -685,8 +685,10 @@ static void dm_exception_table_exit(struct dm_exception_table *et, for (i = 0; i < size; i++) { slot = et->table + i; - hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) + hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) { kmem_cache_free(mem, ex); + cond_resched(); + } } vfree(et->table); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5c590895c..31bcdcd93 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -144,7 +144,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, fmode_t mode, unsigned num_targets, struct mapped_device *md) { - struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t; + + if (num_targets > DM_MAX_TARGETS) + return -EOVERFLOW; + + t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; @@ -158,7 +163,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, if (!num_targets) { kfree(t); - return -ENOMEM; + return -EOVERFLOW; } if (alloc_targets(t, num_targets)) { diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 78d1e5119..f61c89c79 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -74,11 +74,11 @@ struct dm_verity_io { /* original value of bio->bi_end_io */ bio_end_io_t *orig_bi_end_io; + struct bvec_iter iter; + sector_t block; unsigned n_blocks; - struct bvec_iter iter; - struct work_struct work; /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9029c1004..dc8498b4b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2733,6 +2733,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla static void __dm_internal_resume(struct mapped_device *md) { + int r; + struct dm_table *map; + BUG_ON(!md->internal_suspend_count); if (--md->internal_suspend_count) @@ -2741,12 +2744,23 @@ static void __dm_internal_resume(struct mapped_device *md) if (dm_suspended_md(md)) goto done; /* resume from nested suspend */ - /* - * NOTE: existing callers don't need to call dm_table_resume_targets - * (which may fail -- so best to avoid it for now by passing NULL map) - */ - (void) __dm_resume(md, NULL); - + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + r = __dm_resume(md, map); + if (r) { + /* + * If a preresume method of some target failed, we are in a + * tricky situation. We can't return an error to the caller. We + * can't fake success because then the "resume" and + * "postsuspend" methods would not be paired correctly, and it + * would break various targets, for example it would cause list + * corruption in the "origin" target. + * + * So, we fake normal suspend here, to make sure that the + * "resume" and "postsuspend" methods will be paired correctly. + */ + DMERR("Preresume method failed: %d", r); + set_bit(DMF_SUSPENDED, &md->flags); + } done: clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); smp_mb__after_atomic(); diff --git a/drivers/md/md.c b/drivers/md/md.c index 6efe49f7b..09c7f5215 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1179,6 +1179,7 @@ struct super_type { struct md_rdev *refdev, int minor_version); int (*validate_super)(struct mddev *mddev, + struct md_rdev *freshest, struct md_rdev *rdev); void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); @@ -1317,8 +1318,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor /* * validate_super for 0.90.0 + * note: we are not using "freshest" for 0.9 superblock */ -static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) +static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev) { mdp_disk_t *desc; mdp_super_t *sb = page_address(rdev->sb_page); @@ -1833,7 +1835,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ return ret; } -static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) +static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev) { struct mdp_superblock_1 *sb = page_address(rdev->sb_page); __u64 ev1 = le64_to_cpu(sb->events); @@ -1929,13 +1931,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling, except for - * spares (which don't need an event count) */ - ++ev1; + * spares (which don't need an event count). + * Similar to mdadm, we allow event counter difference of 1 + * from the freshest device. + */ if (rdev->desc_nr >= 0 && rdev->desc_nr < le32_to_cpu(sb->max_dev) && (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX || le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)) - if (ev1 < mddev->events) + if (ev1 + 1 < mddev->events) return -EINVAL; } else if (mddev->bitmap) { /* If adding to array with a bitmap, then we can accept an @@ -1956,8 +1960,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { role = MD_DISK_ROLE_SPARE; rdev->desc_nr = -1; - } else + } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) { + /* + * If we are assembling, and our event counter is smaller than the + * highest event counter, we cannot trust our superblock about the role. + * It could happen that our rdev was marked as Faulty, and all other + * superblocks were updated with +1 event counter. + * Then, before the next superblock update, which typically happens when + * remove_and_add_spares() removes the device from the array, there was + * a crash or reboot. + * If we allow current rdev without consulting the freshest superblock, + * we could cause data corruption. + * Note that in this case our event counter is smaller by 1 than the + * highest, otherwise, this rdev would not be allowed into array; + * both kernel and mdadm allow event counter difference of 1. + */ + struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page); + u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev); + + if (rdev->desc_nr >= freshest_max_dev) { + /* this is unexpected, better not proceed */ + pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n", + mdname(mddev), rdev->bdev, rdev->desc_nr, + freshest->bdev, freshest_max_dev); + return -EUCLEAN; + } + + role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]); + pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n", + mdname(mddev), rdev->bdev, role, role, freshest->bdev); + } else { role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + } switch(role) { case MD_DISK_ROLE_SPARE: /* spare */ break; @@ -2896,7 +2930,7 @@ static int add_bound_rdev(struct md_rdev *rdev) * and should be added immediately. */ super_types[mddev->major_version]. - validate_super(mddev, rdev); + validate_super(mddev, NULL/*freshest*/, rdev); if (add_journal) mddev_suspend(mddev); err = mddev->pers->hot_add_disk(mddev, rdev); @@ -3814,7 +3848,7 @@ static int analyze_sbs(struct mddev *mddev) } super_types[mddev->major_version]. - validate_super(mddev, freshest); + validate_super(mddev, NULL/*freshest*/, freshest); i = 0; rdev_for_each_safe(rdev, tmp, mddev) { @@ -3829,7 +3863,7 @@ static int analyze_sbs(struct mddev *mddev) } if (rdev != freshest) { if (super_types[mddev->major_version]. - validate_super(mddev, rdev)) { + validate_super(mddev, freshest, rdev)) { pr_warn("md: kicking non-fresh %s from array!\n", bdevname(rdev->bdev,b)); md_kick_rdev_from_array(rdev); @@ -6209,7 +6243,15 @@ static void md_clean(struct mddev *mddev) mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; - mddev->flags = 0; + /* + * Don't clear MD_CLOSING, or mddev can be opened again. + * 'hold_active != 0' means mddev is still in the creation + * process and will be used later. + */ + if (mddev->hold_active) + mddev->flags = 0; + else + mddev->flags &= BIT_ULL_MASK(MD_CLOSING); mddev->sb_flags = 0; mddev->ro = 0; mddev->metadata_type[0] = 0; @@ -6817,7 +6859,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) rdev->saved_raid_disk = rdev->raid_disk; } else super_types[mddev->major_version]. - validate_super(mddev, rdev); + validate_super(mddev, NULL/*freshest*/, rdev); if ((info->state & (1<<MD_DISK_SYNC)) && rdev->raid_disk != info->raid_disk) { /* This was a hot-add request, but events doesn't @@ -7502,7 +7544,6 @@ static inline bool md_ioctl_valid(unsigned int cmd) { switch (cmd) { case ADD_NEW_DISK: - case BLKROSET: case GET_ARRAY_INFO: case GET_BITMAP_FILE: case GET_DISK_INFO: @@ -7529,8 +7570,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, int err = 0; void __user *argp = (void __user *)arg; struct mddev *mddev = NULL; - int ro; - bool did_set_md_closing = false; if (!md_ioctl_valid(cmd)) return -ENOTTY; @@ -7617,7 +7656,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } - did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); } @@ -7712,35 +7750,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, goto unlock; } break; - - case BLKROSET: - if (get_user(ro, (int __user *)(arg))) { - err = -EFAULT; - goto unlock; - } - err = -EINVAL; - - /* if the bdev is going readonly the value of mddev->ro - * does not matter, no writes are coming - */ - if (ro) - goto unlock; - - /* are we are already prepared for writes? */ - if (mddev->ro != 1) - goto unlock; - - /* transitioning to readauto need only happen for - * arrays that call md_write_start - */ - if (mddev->pers) { - err = restart_array(mddev); - if (err == 0) { - mddev->ro = 2; - set_disk_ro(mddev->gendisk, 0); - } - } - goto unlock; } /* @@ -7810,7 +7819,7 @@ unlock: mddev->hold_active = 0; mddev_unlock(mddev); out: - if(did_set_md_closing) + if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY)) clear_bit(MD_CLOSING, &mddev->flags); return err; } @@ -7834,6 +7843,36 @@ static int md_compat_ioctl(struct block_device *bdev, fmode_t mode, } #endif /* CONFIG_COMPAT */ +static int md_set_read_only(struct block_device *bdev, bool ro) +{ + struct mddev *mddev = bdev->bd_disk->private_data; + int err; + + err = mddev_lock(mddev); + if (err) + return err; + + if (!mddev->raid_disks && !mddev->external) { + err = -ENODEV; + goto out_unlock; + } + + /* + * Transitioning to read-auto need only happen for arrays that call + * md_write_start and which are not ready for writes yet. + */ + if (!ro && mddev->ro == 1 && mddev->pers) { + err = restart_array(mddev); + if (err) + goto out_unlock; + mddev->ro = 2; + } + +out_unlock: + mddev_unlock(mddev); + return err; +} + static int md_open(struct block_device *bdev, fmode_t mode) { /* @@ -7910,6 +7949,7 @@ const struct block_device_operations md_fops = #endif .getgeo = md_getgeo, .check_events = md_check_events, + .set_read_only = md_set_read_only, }; static int md_thread(void *arg) |