diff options
Diffstat (limited to '')
-rw-r--r-- | drivers/md/dm-raid.c | 97 |
1 files changed, 74 insertions, 23 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index eb009d6bb0..d97355e9b9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -213,6 +213,7 @@ struct raid_dev { #define RT_FLAG_RS_IN_SYNC 6 #define RT_FLAG_RS_RESYNCING 7 #define RT_FLAG_RS_GROW 8 +#define RT_FLAG_RS_FROZEN 9 /* Array elements of 64 bit needed for rebuild/failed disk bits */ #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) @@ -3240,11 +3241,12 @@ size_check: rs->md.ro = 1; rs->md.in_sync = 1; - /* Keep array frozen until resume. */ - set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); - /* Has to be held on running the array */ mddev_suspend_and_lock_nointr(&rs->md); + + /* Keep array frozen until resume. */ + md_frozen_sync_thread(&rs->md); + r = md_run(&rs->md); rs->md.in_sync = 0; /* Assume already marked dirty */ if (r) { @@ -3329,17 +3331,18 @@ static int raid_map(struct dm_target *ti, struct bio *bio) struct mddev *mddev = &rs->md; /* - * If we're reshaping to add disk(s)), ti->len and + * If we're reshaping to add disk(s), ti->len and * mddev->array_sectors will differ during the process * (ti->len > mddev->array_sectors), so we have to requeue * bios with addresses > mddev->array_sectors here or * there will occur accesses past EOD of the component * data images thus erroring the raid set. */ - if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) + if (unlikely(bio_has_data(bio) && bio_end_sector(bio) > mddev->array_sectors)) return DM_MAPIO_REQUEUE; - md_handle_request(mddev, bio); + if (unlikely(!md_handle_request(mddev, bio))) + return DM_MAPIO_REQUEUE; return DM_MAPIO_SUBMITTED; } @@ -3718,21 +3721,33 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; + int ret = 0; if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (!strcasecmp(argv[0], "frozen")) - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - else - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags) || + test_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags)) + return -EBUSY; - if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) { - if (mddev->sync_thread) { - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_reap_sync_thread(mddev); - } - } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle) + if (!strcasecmp(argv[0], "frozen")) { + ret = mddev_lock(mddev); + if (ret) + return ret; + + md_frozen_sync_thread(mddev); + mddev_unlock(mddev); + } else if (!strcasecmp(argv[0], "idle")) { + ret = mddev_lock(mddev); + if (ret) + return ret; + + md_idle_sync_thread(mddev); + mddev_unlock(mddev); + } + + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + if (decipher_sync_action(mddev, mddev->recovery) != st_idle) return -EBUSY; else if (!strcasecmp(argv[0], "resync")) ; /* MD_RECOVERY_NEEDED set below */ @@ -3791,15 +3806,46 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); } +static void raid_presuspend(struct dm_target *ti) +{ + struct raid_set *rs = ti->private; + struct mddev *mddev = &rs->md; + + /* + * From now on, disallow raid_message() to change sync_thread until + * resume, raid_postsuspend() is too late. + */ + set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); + + if (!reshape_interrupted(mddev)) + return; + + /* + * For raid456, if reshape is interrupted, IO across reshape position + * will never make progress, while caller will wait for IO to be done. + * Inform raid456 to handle those IO to prevent deadlock. + */ + if (mddev->pers && mddev->pers->prepare_suspend) + mddev->pers->prepare_suspend(mddev); +} + +static void raid_presuspend_undo(struct dm_target *ti) +{ + struct raid_set *rs = ti->private; + + clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); +} + static void raid_postsuspend(struct dm_target *ti) { struct raid_set *rs = ti->private; if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { - /* Writes have to be stopped before suspending to avoid deadlocks. */ - if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery)) - md_stop_writes(&rs->md); - + /* + * sync_thread must be stopped during suspend, and writes have + * to be stopped before suspending to avoid deadlocks. + */ + md_stop_writes(&rs->md); mddev_suspend(&rs->md, false); } } @@ -4012,8 +4058,6 @@ static int raid_preresume(struct dm_target *ti) } /* Check for any resize/reshape on @rs and adjust/initiate */ - /* Be prepared for mddev_resume() in raid_resume() */ - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); mddev->resync_min = mddev->recovery_cp; @@ -4047,7 +4091,9 @@ static void raid_resume(struct dm_target *ti) * Take this opportunity to check whether any failed * devices are reachable again. */ + mddev_lock_nointr(mddev); attempt_restore_of_faulty_devices(rs); + mddev_unlock(mddev); } if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { @@ -4055,10 +4101,13 @@ static void raid_resume(struct dm_target *ti) if (mddev->delta_disks < 0) rs_set_capacity(rs); + WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)); + WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); + clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); mddev_lock_nointr(mddev); - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); mddev->ro = 0; mddev->in_sync = 0; + md_unfrozen_sync_thread(mddev); mddev_unlock_and_resume(mddev); } } @@ -4074,6 +4123,8 @@ static struct target_type raid_target = { .message = raid_message, .iterate_devices = raid_iterate_devices, .io_hints = raid_io_hints, + .presuspend = raid_presuspend, + .presuspend_undo = raid_presuspend_undo, .postsuspend = raid_postsuspend, .preresume = raid_preresume, .resume = raid_resume, |