From 68c1b0995e963349e50f8a8b00ebc140908f7882 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 8 May 2024 06:15:15 +0200 Subject: Merging upstream version 4.19.282. Signed-off-by: Daniel Baumann --- drivers/md/dm-cache-metadata.c | 54 ++++++++++++++++++++++++++++++++++++------ drivers/md/dm-cache-target.c | 15 ++++++++---- drivers/md/dm-crypt.c | 1 + drivers/md/dm-flakey.c | 30 +++++++++++++---------- drivers/md/dm-stats.c | 7 +++++- drivers/md/dm-stats.h | 2 +- drivers/md/dm-thin-metadata.c | 9 +++++++ drivers/md/dm-thin.c | 22 +++++++++++++---- drivers/md/dm.c | 5 ++-- drivers/md/md-bitmap.c | 47 ++++++++++++++++++++---------------- drivers/md/md.c | 12 +++++++--- drivers/md/raid1.c | 1 + 12 files changed, 150 insertions(+), 55 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index af6d4f898..2ecd0db0f 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -551,11 +551,13 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, return r; } -static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd) +static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd, + bool destroy_bm) { dm_sm_destroy(cmd->metadata_sm); dm_tm_destroy(cmd->tm); - dm_block_manager_destroy(cmd->bm); + if (destroy_bm) + dm_block_manager_destroy(cmd->bm); } typedef unsigned long (*flags_mutator)(unsigned long); @@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, cmd2 = lookup(bdev); if (cmd2) { mutex_unlock(&table_lock); - __destroy_persistent_data_objects(cmd); + __destroy_persistent_data_objects(cmd, true); kfree(cmd); return cmd2; } @@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd) mutex_unlock(&table_lock); if (!cmd->fail_io) - __destroy_persistent_data_objects(cmd); + __destroy_persistent_data_objects(cmd, true); kfree(cmd); } } @@ -1808,14 +1810,52 @@ int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result) int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) { - int r; + int r = -EINVAL; + struct dm_block_manager *old_bm = NULL, *new_bm = NULL; + + /* fail_io is double-checked with cmd->root_lock held below */ + if (unlikely(cmd->fail_io)) + return r; + + /* + * Replacement block manager (new_bm) is created and old_bm destroyed outside of + * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of + * shrinker associated with the block manager's bufio client vs cmd root_lock). + * - must take shrinker_rwsem without holding cmd->root_lock + */ + new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, + CACHE_MAX_CONCURRENT_LOCKS); WRITE_LOCK(cmd); - __destroy_persistent_data_objects(cmd); - r = __create_persistent_data_objects(cmd, false); + if (cmd->fail_io) { + WRITE_UNLOCK(cmd); + goto out; + } + + __destroy_persistent_data_objects(cmd, false); + old_bm = cmd->bm; + if (IS_ERR(new_bm)) { + DMERR("could not create block manager during abort"); + cmd->bm = NULL; + r = PTR_ERR(new_bm); + goto out_unlock; + } + + cmd->bm = new_bm; + r = __open_or_format_metadata(cmd, false); + if (r) { + cmd->bm = NULL; + goto out_unlock; + } + new_bm = NULL; +out_unlock: if (r) cmd->fail_io = true; WRITE_UNLOCK(cmd); + dm_block_manager_destroy(old_bm); +out: + if (new_bm && !IS_ERR(new_bm)) + dm_block_manager_destroy(new_bm); return r; } diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2ddd575e9..b3371812a 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1010,16 +1010,16 @@ static void abort_transaction(struct cache *cache) if (get_cache_mode(cache) >= CM_READ_ONLY) return; - if (dm_cache_metadata_set_needs_check(cache->cmd)) { - DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); - set_cache_mode(cache, CM_FAIL); - } - DMERR_LIMIT("%s: aborting current metadata transaction", dev_name); if (dm_cache_metadata_abort(cache->cmd)) { DMERR("%s: failed to abort metadata transaction", dev_name); set_cache_mode(cache, CM_FAIL); } + + if (dm_cache_metadata_set_needs_check(cache->cmd)) { + DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); + set_cache_mode(cache, CM_FAIL); + } } static void metadata_operation_failed(struct cache *cache, const char *op, int r) @@ -1905,6 +1905,7 @@ static void process_deferred_bios(struct work_struct *ws) else commit_needed = process_bio(cache, bio) || commit_needed; + cond_resched(); } if (commit_needed) @@ -1927,6 +1928,7 @@ static void requeue_deferred_bios(struct cache *cache) while ((bio = bio_list_pop(&bios))) { bio->bi_status = BLK_STS_DM_REQUEUE; bio_endio(bio); + cond_resched(); } } @@ -1967,6 +1969,8 @@ static void check_migrations(struct work_struct *ws) r = mg_start(cache, op, NULL); if (r) break; + + cond_resched(); } } @@ -1987,6 +1991,7 @@ static void destroy(struct cache *cache) if (cache->prison) dm_bio_prison_destroy_v2(cache->prison); + cancel_delayed_work_sync(&cache->waker); if (cache->wq) destroy_workqueue(cache->wq); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index e38c713e8..908bf0768 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1661,6 +1661,7 @@ pop_from_list: io = crypt_io_from_node(rb_first(&write_tree)); rb_erase(&io->rb_node, &write_tree); kcryptd_io_write(io); + cond_resched(); } while (!RB_EMPTY_ROOT(&write_tree)); blk_finish_plug(&plug); } diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 2fcf62fb2..1f1614af5 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -301,8 +301,11 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) */ bio_for_each_segment(bvec, bio, iter) { if (bio_iter_len(bio, iter) > corrupt_bio_byte) { - char *segment = (page_address(bio_iter_page(bio, iter)) - + bio_iter_offset(bio, iter)); + char *segment; + struct page *page = bio_iter_page(bio, iter); + if (unlikely(page == ZERO_PAGE(0))) + break; + segment = (page_address(page) + bio_iter_offset(bio, iter)); segment[corrupt_bio_byte] = fc->corrupt_bio_value; DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n", @@ -364,9 +367,11 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) /* * Corrupt matching writes. */ - if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) { - if (all_corrupt_bio_flags_match(bio, fc)) - corrupt_bio_data(bio, fc); + if (fc->corrupt_bio_byte) { + if (fc->corrupt_bio_rw == WRITE) { + if (all_corrupt_bio_flags_match(bio, fc)) + corrupt_bio_data(bio, fc); + } goto map_bio; } @@ -397,13 +402,14 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, } if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { - if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && - all_corrupt_bio_flags_match(bio, fc)) { - /* - * Corrupt successful matching READs while in down state. - */ - corrupt_bio_data(bio, fc); - + if (fc->corrupt_bio_byte) { + if ((fc->corrupt_bio_rw == READ) && + all_corrupt_bio_flags_match(bio, fc)) { + /* + * Corrupt successful matching READs while in down state. + */ + corrupt_bio_data(bio, fc); + } } else if (!test_bit(DROP_WRITES, &fc->flags) && !test_bit(ERROR_WRITES, &fc->flags)) { /* diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 3d59f3e20..0eb48e739 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -188,7 +188,7 @@ static int dm_stat_in_flight(struct dm_stat_shared *shared) atomic_read(&shared->in_flight[WRITE]); } -void dm_stats_init(struct dm_stats *stats) +int dm_stats_init(struct dm_stats *stats) { int cpu; struct dm_stats_last_position *last; @@ -196,11 +196,16 @@ void dm_stats_init(struct dm_stats *stats) mutex_init(&stats->mutex); INIT_LIST_HEAD(&stats->list); stats->last = alloc_percpu(struct dm_stats_last_position); + if (!stats->last) + return -ENOMEM; + for_each_possible_cpu(cpu) { last = per_cpu_ptr(stats->last, cpu); last->last_sector = (sector_t)ULLONG_MAX; last->last_rw = UINT_MAX; } + + return 0; } void dm_stats_cleanup(struct dm_stats *stats) diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h index 2ddfae678..dcac11fce 100644 --- a/drivers/md/dm-stats.h +++ b/drivers/md/dm-stats.h @@ -22,7 +22,7 @@ struct dm_stats_aux { unsigned long long duration_ns; }; -void dm_stats_init(struct dm_stats *st); +int dm_stats_init(struct dm_stats *st); void dm_stats_cleanup(struct dm_stats *st); struct mapped_device; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index a6a5cee6b..f374f593f 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -660,6 +660,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd) goto bad_cleanup_data_sm; } + /* + * For pool metadata opening process, root setting is redundant + * because it will be set again in __begin_transaction(). But dm + * pool aborting process really needs to get last transaction's + * root to avoid accessing broken btree. + */ + pmd->root = le64_to_cpu(disk_super->data_mapping_root); + pmd->details_root = le64_to_cpu(disk_super->device_details_root); + __setup_btree_details(pmd); dm_bm_unlock(sblock); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 435a2ee4a..a1bbf00e6 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2222,6 +2222,7 @@ static void process_thin_deferred_bios(struct thin_c *tc) throttle_work_update(&pool->throttle); dm_pool_issue_prefetches(pool->pmd); } + cond_resched(); } blk_finish_plug(&plug); } @@ -2305,6 +2306,7 @@ static void process_thin_deferred_cells(struct thin_c *tc) else pool->process_cell(tc, cell); } + cond_resched(); } while (!list_empty(&cells)); } @@ -2921,6 +2923,8 @@ static void __pool_destroy(struct pool *pool) dm_bio_prison_destroy(pool->prison); dm_kcopyd_client_destroy(pool->copier); + cancel_delayed_work_sync(&pool->waker); + cancel_delayed_work_sync(&pool->no_space_timeout); if (pool->wq) destroy_workqueue(pool->wq); @@ -3361,6 +3365,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->low_water_blocks = low_water_blocks; pt->adjusted_pf = pt->requested_pf = pf; ti->num_flush_bios = 1; + ti->limit_swap_bios = true; /* * Only need to enable discards if the pool should pass @@ -3547,20 +3552,28 @@ static int pool_preresume(struct dm_target *ti) */ r = bind_control_target(pool, ti); if (r) - return r; + goto out; r = maybe_resize_data_dev(ti, &need_commit1); if (r) - return r; + goto out; r = maybe_resize_metadata_dev(ti, &need_commit2); if (r) - return r; + goto out; if (need_commit1 || need_commit2) (void) commit(pool); +out: + /* + * When a thin-pool is PM_FAIL, it cannot be rebuilt if + * bio is in deferred list. Therefore need to return 0 + * to allow pool_resume() to flush IO. + */ + if (r && get_pool_mode(pool) == PM_FAIL) + r = 0; - return 0; + return r; } static void pool_suspend_active_thins(struct pool *pool) @@ -4233,6 +4246,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; ti->num_flush_bios = 1; + ti->limit_swap_bios = true; ti->flush_supported = true; ti->per_io_data_size = sizeof(struct dm_thin_endio_hook); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 324d1dd58..9a9b2adcf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -279,7 +279,6 @@ out_free_rq_tio_cache: static void local_exit(void) { - flush_scheduled_work(); destroy_workqueue(deferred_remove_workqueue); kmem_cache_destroy(_rq_cache); @@ -2022,7 +2021,9 @@ static struct mapped_device *alloc_dev(int minor) bio_set_dev(&md->flush_bio, md->bdev); md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; - dm_stats_init(&md->stats); + r = dm_stats_init(&md->stats); + if (r < 0) + goto bad; /* Populate the mapping, nobody knows we exist yet */ spin_lock(&_minor_lock); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 7cf9d34ce..1c4c46278 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -488,7 +488,7 @@ void md_bitmap_print_sb(struct bitmap *bitmap) sb = kmap_atomic(bitmap->storage.sb_page); pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); - pr_debug(" version: %d\n", le32_to_cpu(sb->version)); + pr_debug(" version: %u\n", le32_to_cpu(sb->version)); pr_debug(" uuid: %08x.%08x.%08x.%08x\n", le32_to_cpu(*(__u32 *)(sb->uuid+0)), le32_to_cpu(*(__u32 *)(sb->uuid+4)), @@ -499,11 +499,11 @@ void md_bitmap_print_sb(struct bitmap *bitmap) pr_debug("events cleared: %llu\n", (unsigned long long) le64_to_cpu(sb->events_cleared)); pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); - pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); - pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); + pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); + pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); pr_debug(" sync size: %llu KB\n", (unsigned long long)le64_to_cpu(sb->sync_size)/2); - pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); + pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); kunmap_atomic(sb); } @@ -2101,7 +2101,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bytes = DIV_ROUND_UP(chunks, 8); if (!bitmap->mddev->bitmap_info.external) bytes += sizeof(bitmap_super_t); - } while (bytes > (space << 9)); + } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < + (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); } else chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; @@ -2146,7 +2147,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->counts.missing_pages = pages; bitmap->counts.chunkshift = chunkshift; bitmap->counts.chunks = chunks; - bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift + + bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + BITMAP_BLOCK_SHIFT); blocks = min(old_counts.chunks << old_counts.chunkshift, @@ -2172,8 +2173,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->counts.missing_pages = old_counts.pages; bitmap->counts.chunkshift = old_counts.chunkshift; bitmap->counts.chunks = old_counts.chunks; - bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + - BITMAP_BLOCK_SHIFT); + bitmap->mddev->bitmap_info.chunksize = + 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); blocks = old_counts.chunks << old_counts.chunkshift; pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); break; @@ -2191,20 +2192,23 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (set) { bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); - if (*bmc_new == 0) { - /* need to set on-disk bits too. */ - sector_t end = block + new_blocks; - sector_t start = block >> chunkshift; - start <<= chunkshift; - while (start < end) { - md_bitmap_file_set_bit(bitmap, block); - start += 1 << chunkshift; + if (bmc_new) { + if (*bmc_new == 0) { + /* need to set on-disk bits too. */ + sector_t end = block + new_blocks; + sector_t start = block >> chunkshift; + + start <<= chunkshift; + while (start < end) { + md_bitmap_file_set_bit(bitmap, block); + start += 1 << chunkshift; + } + *bmc_new = 2; + md_bitmap_count_page(&bitmap->counts, block, 1); + md_bitmap_set_pending(&bitmap->counts, block); } - *bmc_new = 2; - md_bitmap_count_page(&bitmap->counts, block, 1); - md_bitmap_set_pending(&bitmap->counts, block); + *bmc_new |= NEEDED_MASK; } - *bmc_new |= NEEDED_MASK; if (new_blocks < old_blocks) old_blocks = new_blocks; } @@ -2496,6 +2500,9 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len) if (csize < 512 || !is_power_of_2(csize)) return -EINVAL; + if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * + sizeof(((bitmap_super_t *)0)->chunksize)))) + return -EOVERFLOW; mddev->bitmap_info.chunksize = csize; return len; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 38cbde906..f8c111b36 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -417,13 +417,14 @@ static void md_end_flush(struct bio *bio) struct md_rdev *rdev = bio->bi_private; struct mddev *mddev = rdev->mddev; + bio_put(bio); + rdev_dec_pending(rdev, mddev); if (atomic_dec_and_test(&mddev->flush_pending)) { /* The pre-request flush has finished */ queue_work(md_wq, &mddev->flush_work); } - bio_put(bio); } static void md_submit_flush_data(struct work_struct *ws); @@ -821,10 +822,12 @@ static void super_written(struct bio *bio) } else clear_bit(LastDev, &rdev->flags); + bio_put(bio); + + rdev_dec_pending(rdev, mddev); + if (atomic_dec_and_test(&mddev->pending_writes)) wake_up(&mddev->sb_wait); - rdev_dec_pending(rdev, mddev); - bio_put(bio); } void md_super_write(struct mddev *mddev, struct md_rdev *rdev, @@ -2988,6 +2991,9 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) err = kstrtouint(buf, 10, (unsigned int *)&slot); if (err < 0) return err; + if (slot < 0) + /* overflow */ + return -ENOSPC; } if (rdev->mddev->pers && slot == -1) { /* Setting 'slot' on an active array requires also diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 876d3e133..0f8b1fb3d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3110,6 +3110,7 @@ static int raid1_run(struct mddev *mddev) * RAID1 needs at least one disk in active */ if (conf->raid_disks - mddev->degraded < 1) { + md_unregister_thread(&conf->thread); ret = -EINVAL; goto abort; } -- cgit v1.2.3