From 85c675d0d09a45a135bddd15d7b385f8758c32fb Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:35:05 +0200 Subject: Adding upstream version 6.7.7. Signed-off-by: Daniel Baumann --- fs/ext4/mballoc.c | 613 ++++++++++++++++++++++-------------------------------- 1 file changed, 248 insertions(+), 365 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9a4b73485..7497a789d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -18,6 +18,7 @@ #include #include #include +#include /* * MUSTDO: @@ -841,7 +842,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) struct ext4_sb_info *sbi = EXT4_SB(sb); int new_order; - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) return; new_order = mb_avg_fragment_size_order(sb, @@ -1232,6 +1233,24 @@ void ext4_mb_generate_buddy(struct super_block *sb, atomic64_add(period, &sbi->s_mb_generation_time); } +static void mb_regenerate_buddy(struct ext4_buddy *e4b) +{ + int count; + int order = 1; + void *buddy; + + while ((buddy = mb_find_buddy(e4b, order++, &count))) + mb_set_bits(buddy, 0, count); + + e4b->bd_info->bb_fragments = 0; + memset(e4b->bd_info->bb_counters, 0, + sizeof(*e4b->bd_info->bb_counters) * + (e4b->bd_sb->s_blocksize_bits + 2)); + + ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, + e4b->bd_bitmap, e4b->bd_group, e4b->bd_info); +} + /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1891,11 +1910,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); - this_cpu_inc(discard_pa_seq); - e4b->bd_info->bb_free += count; - if (first < e4b->bd_info->bb_first_free) - e4b->bd_info->bb_first_free = first; - /* access memory sequentially: check left neighbour, * clear range and then check right neighbour */ @@ -1909,21 +1923,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t blocknr; + /* + * Fastcommit replay can free already freed blocks which + * corrupts allocation info. Regenerate it. + */ + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + mb_regenerate_buddy(e4b); + goto check; + } + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { - ext4_grp_locked_error(sb, e4b->bd_group, - inode ? inode->i_ino : 0, - blocknr, - "freeing already freed block (bit %u); block bitmap corrupt.", - block); - ext4_mark_group_bitmap_corrupted( - sb, e4b->bd_group, + ext4_grp_locked_error(sb, e4b->bd_group, + inode ? inode->i_ino : 0, blocknr, + "freeing already freed block (bit %u); block bitmap corrupt.", + block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); - } - goto done; + return; } + this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free += count; + if (first < e4b->bd_info->bb_first_free) + e4b->bd_info->bb_first_free = first; + /* let's maintain fragments counter */ if (left_is_free && right_is_free) e4b->bd_info->bb_fragments--; @@ -1948,9 +1972,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (first <= last) mb_buddy_mark_free(e4b, first >> 1, last >> 1); -done: mb_set_largest_free_order(sb, e4b->bd_info); mb_update_avg_fragment_size(sb, e4b->bd_info); +check: mb_check_buddy(e4b); } @@ -2281,6 +2305,9 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, return; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -2288,6 +2315,7 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); } @@ -2314,12 +2342,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -2352,6 +2378,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); @@ -3629,7 +3656,8 @@ int ext4_mb_init(struct super_block *sb) spin_lock_init(&sbi->s_md_lock); sbi->s_mb_free_pending = 0; - INIT_LIST_HEAD(&sbi->s_freed_data_list); + INIT_LIST_HEAD(&sbi->s_freed_data_list[0]); + INIT_LIST_HEAD(&sbi->s_freed_data_list[1]); INIT_LIST_HEAD(&sbi->s_discard_list); INIT_WORK(&sbi->s_discard_work, ext4_discard_work); atomic_set(&sbi->s_retry_alloc_pending, 0); @@ -3881,19 +3909,10 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_free_data *entry, *tmp; LIST_HEAD(freed_data_list); - struct list_head *cut_pos = NULL; + struct list_head *s_freed_head = &sbi->s_freed_data_list[commit_tid & 1]; bool wake; - spin_lock(&sbi->s_md_lock); - list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) { - if (entry->efd_tid != commit_tid) - break; - cut_pos = &entry->efd_list; - } - if (cut_pos) - list_cut_position(&freed_data_list, &sbi->s_freed_data_list, - cut_pos); - spin_unlock(&sbi->s_md_lock); + list_replace_init(s_freed_head, &freed_data_list); list_for_each_entry(entry, &freed_data_list, efd_list) ext4_free_data_in_buddy(sb, entry); @@ -3951,6 +3970,111 @@ void ext4_exit_mballoc(void) ext4_groupinfo_destroy_slabs(); } +#define EXT4_MB_BITMAP_MARKED_CHECK 0x0001 +#define EXT4_MB_SYNC_UPDATE 0x0002 +static int +ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state, + ext4_group_t group, ext4_grpblk_t blkoff, + ext4_grpblk_t len, int flags, ext4_grpblk_t *ret_changed) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bitmap_bh = NULL; + struct ext4_group_desc *gdp; + struct buffer_head *gdp_bh; + int err; + unsigned int i, already, changed = len; + + KUNIT_STATIC_STUB_REDIRECT(ext4_mb_mark_context, + handle, sb, state, group, blkoff, len, + flags, ret_changed); + + if (ret_changed) + *ret_changed = 0; + bitmap_bh = ext4_read_block_bitmap(sb, group); + if (IS_ERR(bitmap_bh)) + return PTR_ERR(bitmap_bh); + + if (handle) { + BUFFER_TRACE(bitmap_bh, "getting write access"); + err = ext4_journal_get_write_access(handle, sb, bitmap_bh, + EXT4_JTR_NONE); + if (err) + goto out_err; + } + + err = -EIO; + gdp = ext4_get_group_desc(sb, group, &gdp_bh); + if (!gdp) + goto out_err; + + if (handle) { + BUFFER_TRACE(gdp_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, gdp_bh, + EXT4_JTR_NONE); + if (err) + goto out_err; + } + + ext4_lock_group(sb, group); + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_clusters_after_init(sb, group, gdp)); + } + + if (flags & EXT4_MB_BITMAP_MARKED_CHECK) { + already = 0; + for (i = 0; i < len; i++) + if (mb_test_bit(blkoff + i, bitmap_bh->b_data) == + state) + already++; + changed = len - already; + } + + if (state) { + mb_set_bits(bitmap_bh->b_data, blkoff, len); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_group_clusters(sb, gdp) - changed); + } else { + mb_clear_bits(bitmap_bh->b_data, blkoff, len); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_group_clusters(sb, gdp) + changed); + } + + ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); + ext4_unlock_group(sb, group); + if (ret_changed) + *ret_changed = changed; + + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, group); + struct flex_groups *fg = sbi_array_rcu_deref(sbi, + s_flex_groups, flex_group); + + if (state) + atomic64_sub(changed, &fg->free_clusters); + else + atomic64_add(changed, &fg->free_clusters); + } + + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); + if (err) + goto out_err; + err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); + if (err) + goto out_err; + + if (flags & EXT4_MB_SYNC_UPDATE) { + sync_dirty_buffer(bitmap_bh); + sync_dirty_buffer(gdp_bh); + } + +out_err: + brelse(bitmap_bh); + return err; +} /* * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps @@ -3960,13 +4084,13 @@ static noinline_for_stack int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, handle_t *handle, unsigned int reserv_clstrs) { - struct buffer_head *bitmap_bh = NULL; struct ext4_group_desc *gdp; - struct buffer_head *gdp_bh; struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block; int err, len; + int flags = 0; + ext4_grpblk_t changed; BUG_ON(ac->ac_status != AC_STATUS_FOUND); BUG_ON(ac->ac_b_ex.fe_len <= 0); @@ -3974,32 +4098,13 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, sb = ac->ac_sb; sbi = EXT4_SB(sb); - bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); - if (IS_ERR(bitmap_bh)) { - return PTR_ERR(bitmap_bh); - } - - BUFFER_TRACE(bitmap_bh, "getting write access"); - err = ext4_journal_get_write_access(handle, sb, bitmap_bh, - EXT4_JTR_NONE); - if (err) - goto out_err; - - err = -EIO; - gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh); + gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, NULL); if (!gdp) - goto out_err; - + return -EIO; ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, ext4_free_group_clusters(sb, gdp)); - BUFFER_TRACE(gdp_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE); - if (err) - goto out_err; - block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); - len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); if (!ext4_inode_block_valid(ac->ac_inode, block, len)) { ext4_error(sb, "Allocating blocks %llu-%llu which overlap " @@ -4008,41 +4113,29 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, * Fix the bitmap and return EFSCORRUPTED * We leak some of the blocks here. */ - ext4_lock_group(sb, ac->ac_b_ex.fe_group); - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, - ac->ac_b_ex.fe_len); - ext4_unlock_group(sb, ac->ac_b_ex.fe_group); - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); + err = ext4_mb_mark_context(handle, sb, true, + ac->ac_b_ex.fe_group, + ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len, + 0, NULL); if (!err) err = -EFSCORRUPTED; - goto out_err; + return err; } - ext4_lock_group(sb, ac->ac_b_ex.fe_group); #ifdef AGGRESSIVE_CHECK - { - int i; - for (i = 0; i < ac->ac_b_ex.fe_len; i++) { - BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i, - bitmap_bh->b_data)); - } - } + flags |= EXT4_MB_BITMAP_MARKED_CHECK; #endif - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, - ac->ac_b_ex.fe_len); - if (ext4_has_group_desc_csum(sb) && - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - ext4_free_group_clusters_set(sb, gdp, - ext4_free_clusters_after_init(sb, - ac->ac_b_ex.fe_group, gdp)); - } - len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; - ext4_free_group_clusters_set(sb, gdp, len); - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp); + err = ext4_mb_mark_context(handle, sb, true, ac->ac_b_ex.fe_group, + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len, + flags, &changed); + + if (err && changed == 0) + return err; - ext4_unlock_group(sb, ac->ac_b_ex.fe_group); +#ifdef AGGRESSIVE_CHECK + BUG_ON(changed != ac->ac_b_ex.fe_len); +#endif percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); /* * Now reduce the dirty block count also. Should not go negative @@ -4052,21 +4145,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, percpu_counter_sub(&sbi->s_dirtyclusters_counter, reserv_clstrs); - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, - ac->ac_b_ex.fe_group); - atomic64_sub(ac->ac_b_ex.fe_len, - &sbi_array_rcu_deref(sbi, s_flex_groups, - flex_group)->free_clusters); - } - - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); - if (err) - goto out_err; - err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); - -out_err: - brelse(bitmap_bh); return err; } @@ -4075,17 +4153,13 @@ out_err: * blocks in bitmaps and update counters. */ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, - int len, int state) + int len, bool state) { - struct buffer_head *bitmap_bh = NULL; - struct ext4_group_desc *gdp; - struct buffer_head *gdp_bh; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ext4_grpblk_t blkoff; - int i, err = 0; - int already; - unsigned int clen, clen_changed, thisgrp_len; + int err = 0; + unsigned int clen, thisgrp_len; while (len > 0) { ext4_get_group_no_and_offset(sb, block, &group, &blkoff); @@ -4106,80 +4180,21 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, ext4_error(sb, "Marking blocks in system zone - " "Block = %llu, len = %u", block, thisgrp_len); - bitmap_bh = NULL; break; } - bitmap_bh = ext4_read_block_bitmap(sb, group); - if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - bitmap_bh = NULL; - break; - } - - err = -EIO; - gdp = ext4_get_group_desc(sb, group, &gdp_bh); - if (!gdp) - break; - - ext4_lock_group(sb, group); - already = 0; - for (i = 0; i < clen; i++) - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == - !state) - already++; - - clen_changed = clen - already; - if (state) - mb_set_bits(bitmap_bh->b_data, blkoff, clen); - else - mb_clear_bits(bitmap_bh->b_data, blkoff, clen); - if (ext4_has_group_desc_csum(sb) && - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - ext4_free_group_clusters_set(sb, gdp, - ext4_free_clusters_after_init(sb, group, gdp)); - } - if (state) - clen = ext4_free_group_clusters(sb, gdp) - clen_changed; - else - clen = ext4_free_group_clusters(sb, gdp) + clen_changed; - - ext4_free_group_clusters_set(sb, gdp, clen); - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); - - ext4_unlock_group(sb, group); - - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, group); - struct flex_groups *fg = sbi_array_rcu_deref(sbi, - s_flex_groups, flex_group); - - if (state) - atomic64_sub(clen_changed, &fg->free_clusters); - else - atomic64_add(clen_changed, &fg->free_clusters); - - } - - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); - if (err) - break; - sync_dirty_buffer(bitmap_bh); - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); - sync_dirty_buffer(gdp_bh); + err = ext4_mb_mark_context(NULL, sb, state, + group, blkoff, clen, + EXT4_MB_BITMAP_MARKED_CHECK | + EXT4_MB_SYNC_UPDATE, + NULL); if (err) break; block += thisgrp_len; len -= thisgrp_len; - brelse(bitmap_bh); BUG_ON(len < 0); } - - if (err) - brelse(bitmap_bh); } /* @@ -6107,7 +6122,7 @@ ext4_mb_new_blocks_simple(struct ext4_allocation_request *ar, int *errp) } block = ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, i); - ext4_mb_mark_bb(sb, block, 1, 1); + ext4_mb_mark_bb(sb, block, 1, true); ar->len = 1; return block; @@ -6355,7 +6370,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, } spin_lock(&sbi->s_md_lock); - list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list); + list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list[new_entry->efd_tid & 1]); sbi->s_mb_free_pending += clusters; spin_unlock(&sbi->s_md_lock); } @@ -6363,43 +6378,15 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, unsigned long count) { - struct buffer_head *bitmap_bh; struct super_block *sb = inode->i_sb; - struct ext4_group_desc *gdp; - struct buffer_head *gdp_bh; ext4_group_t group; ext4_grpblk_t blkoff; - int already_freed = 0, err, i; ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - bitmap_bh = ext4_read_block_bitmap(sb, group); - if (IS_ERR(bitmap_bh)) { - pr_warn("Failed to read block bitmap\n"); - return; - } - gdp = ext4_get_group_desc(sb, group, &gdp_bh); - if (!gdp) - goto err_out; - - for (i = 0; i < count; i++) { - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data)) - already_freed++; - } - mb_clear_bits(bitmap_bh->b_data, blkoff, count); - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); - if (err) - goto err_out; - ext4_free_group_clusters_set( - sb, gdp, ext4_free_group_clusters(sb, gdp) + - count - already_freed); - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); - ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); - sync_dirty_buffer(bitmap_bh); - sync_dirty_buffer(gdp_bh); - -err_out: - brelse(bitmap_bh); + ext4_mb_mark_context(NULL, sb, false, group, blkoff, count, + EXT4_MB_BITMAP_MARKED_CHECK | + EXT4_MB_SYNC_UPDATE, + NULL); } /** @@ -6415,19 +6402,17 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, ext4_fsblk_t block, unsigned long count, int flags) { - struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; - struct ext4_group_desc *gdp; struct ext4_group_info *grp; unsigned int overflow; ext4_grpblk_t bit; - struct buffer_head *gd_bh; ext4_group_t block_group; struct ext4_sb_info *sbi; struct ext4_buddy e4b; unsigned int count_clusters; int err = 0; - int ret; + int mark_flags = 0; + ext4_grpblk_t changed; sbi = EXT4_SB(sb); @@ -6436,7 +6421,7 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ - goto error_return; + goto error_out; } flags |= EXT4_FREE_BLOCKS_VALIDATED; @@ -6460,55 +6445,35 @@ do_more: flags &= ~EXT4_FREE_BLOCKS_VALIDATED; } count_clusters = EXT4_NUM_B2C(sbi, count); - bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - bitmap_bh = NULL; - goto error_return; - } - gdp = ext4_get_group_desc(sb, block_group, &gd_bh); - if (!gdp) { - err = -EIO; - goto error_return; - } + trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters); + + /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */ + err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b, + GFP_NOFS|__GFP_NOFAIL); + if (err) + goto error_out; if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && !ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ - goto error_return; + goto error_clean; } - BUFFER_TRACE(bitmap_bh, "getting write access"); - err = ext4_journal_get_write_access(handle, sb, bitmap_bh, - EXT4_JTR_NONE); - if (err) - goto error_return; - - /* - * We are about to modify some metadata. Call the journal APIs - * to unshare ->b_data if a currently-committing transaction is - * using it - */ - BUFFER_TRACE(gd_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE); - if (err) - goto error_return; #ifdef AGGRESSIVE_CHECK - { - int i; - for (i = 0; i < count_clusters; i++) - BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); - } + mark_flags |= EXT4_MB_BITMAP_MARKED_CHECK; #endif - trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters); + err = ext4_mb_mark_context(handle, sb, false, block_group, bit, + count_clusters, mark_flags, &changed); - /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */ - err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b, - GFP_NOFS|__GFP_NOFAIL); - if (err) - goto error_return; + + if (err && changed == 0) + goto error_clean; + +#ifdef AGGRESSIVE_CHECK + BUG_ON(changed != count_clusters); +#endif /* * We need to make sure we don't reuse the freed block until after the @@ -6532,13 +6497,8 @@ do_more: new_entry->efd_tid = handle->h_transaction->t_tid; ext4_lock_group(sb, block_group); - mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); ext4_mb_free_metadata(handle, &e4b, new_entry); } else { - /* need to update group_info->bb_free and bitmap - * with group lock held. generate_buddy look at - * them with group lock_held - */ if (test_opt(sb, DISCARD)) { err = ext4_issue_discard(sb, block_group, bit, count_clusters, NULL); @@ -6551,23 +6511,11 @@ do_more: EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); ext4_lock_group(sb, block_group); - mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); mb_free_blocks(inode, &e4b, bit, count_clusters); } - ret = ext4_free_group_clusters(sb, gdp) + count_clusters; - ext4_free_group_clusters_set(sb, gdp, ret); - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, block_group, gdp); ext4_unlock_group(sb, block_group); - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic64_add(count_clusters, - &sbi_array_rcu_deref(sbi, s_flex_groups, - flex_group)->free_clusters); - } - /* * on a bigalloc file system, defer the s_freeclusters_counter * update to the caller (ext4_remove_space and friends) so they @@ -6580,28 +6528,18 @@ do_more: count_clusters); } - ext4_mb_unload_buddy(&e4b); - - /* We dirtied the bitmap block */ - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); - - /* And the group descriptor block */ - BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); - ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); - if (!err) - err = ret; - if (overflow && !err) { block += count; count = overflow; - put_bh(bitmap_bh); + ext4_mb_unload_buddy(&e4b); /* The range changed so it's no longer validated */ flags &= ~EXT4_FREE_BLOCKS_VALIDATED; goto do_more; } -error_return: - brelse(bitmap_bh); + +error_clean: + ext4_mb_unload_buddy(&e4b); +error_out: ext4_std_error(sb, err); } @@ -6719,23 +6657,19 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count) { - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gd_bh; ext4_group_t block_group; ext4_grpblk_t bit; - unsigned int i; - struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_buddy e4b; - int err = 0, ret, free_clusters_count; - ext4_grpblk_t clusters_freed; + int err = 0; ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block); ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1); unsigned long cluster_count = last_cluster - first_cluster + 1; + ext4_grpblk_t changed; ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); - if (count == 0) + if (cluster_count == 0) return 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); @@ -6747,99 +6681,39 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_warning(sb, "too many blocks added to group %u", block_group); err = -EINVAL; - goto error_return; + goto error_out; } - bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - bitmap_bh = NULL; - goto error_return; - } - - desc = ext4_get_group_desc(sb, block_group, &gd_bh); - if (!desc) { - err = -EIO; - goto error_return; - } + err = ext4_mb_load_buddy(sb, block_group, &e4b); + if (err) + goto error_out; if (!ext4_sb_block_valid(sb, NULL, block, count)) { ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); err = -EINVAL; - goto error_return; + goto error_clean; } - BUFFER_TRACE(bitmap_bh, "getting write access"); - err = ext4_journal_get_write_access(handle, sb, bitmap_bh, - EXT4_JTR_NONE); - if (err) - goto error_return; - - /* - * We are about to modify some metadata. Call the journal APIs - * to unshare ->b_data if a currently-committing transaction is - * using it - */ - BUFFER_TRACE(gd_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE); - if (err) - goto error_return; - - for (i = 0, clusters_freed = 0; i < cluster_count; i++) { - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { - ext4_error(sb, "bit already cleared for block %llu", - (ext4_fsblk_t)(block + i)); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else { - clusters_freed++; - } - } + err = ext4_mb_mark_context(handle, sb, false, block_group, bit, + cluster_count, EXT4_MB_BITMAP_MARKED_CHECK, + &changed); + if (err && changed == 0) + goto error_clean; - err = ext4_mb_load_buddy(sb, block_group, &e4b); - if (err) - goto error_return; + if (changed != cluster_count) + ext4_error(sb, "bit already cleared in group %u", block_group); - /* - * need to update group_info->bb_free and bitmap - * with group lock held. generate_buddy look at - * them with group lock_held - */ ext4_lock_group(sb, block_group); - mb_clear_bits(bitmap_bh->b_data, bit, cluster_count); mb_free_blocks(NULL, &e4b, bit, cluster_count); - free_clusters_count = clusters_freed + - ext4_free_group_clusters(sb, desc); - ext4_free_group_clusters_set(sb, desc, free_clusters_count); - ext4_block_bitmap_csum_set(sb, desc, bitmap_bh); - ext4_group_desc_csum_set(sb, block_group, desc); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, - clusters_freed); - - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic64_add(clusters_freed, - &sbi_array_rcu_deref(sbi, s_flex_groups, - flex_group)->free_clusters); - } + changed); +error_clean: ext4_mb_unload_buddy(&e4b); - - /* We dirtied the bitmap block */ - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); - - /* And the group descriptor block */ - BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); - ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); - if (!err) - err = ret; - -error_return: - brelse(bitmap_bh); +error_out: ext4_std_error(sb, err); return err; } @@ -6910,13 +6784,15 @@ static int ext4_try_to_trim_range(struct super_block *sb, __acquires(ext4_group_lock_ptr(sb, e4b->bd_group)) __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) { - ext4_grpblk_t next, count, free_count; + ext4_grpblk_t next, count, free_count, last, origin_start; bool set_trimmed = false; void *bitmap; + last = ext4_last_grp_cluster(sb, e4b->bd_group); bitmap = e4b->bd_bitmap; - if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) + if (start == 0 && max >= last) set_trimmed = true; + origin_start = start; start = max(e4b->bd_info->bb_first_free, start); count = 0; free_count = 0; @@ -6925,7 +6801,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) start = mb_find_next_zero_bit(bitmap, max + 1, start); if (start > max) break; - next = mb_find_next_bit(bitmap, max + 1, start); + + next = mb_find_next_bit(bitmap, last + 1, start); + if (origin_start == 0 && next >= last) + set_trimmed = true; if ((next - start) >= minblocks) { int ret = ext4_trim_extent(sb, start, next - start, e4b); @@ -7152,3 +7031,7 @@ out_unload: return error; } + +#ifdef CONFIG_EXT4_KUNIT_TESTS +#include "mballoc-test.c" +#endif -- cgit v1.2.3