diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 8 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 5 | ||||
-rw-r--r-- | fs/ext4/extents.c | 17 | ||||
-rw-r--r-- | fs/ext4/file.c | 2 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 2 | ||||
-rw-r--r-- | fs/ext4/inline.c | 3 | ||||
-rw-r--r-- | fs/ext4/inode.c | 125 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 6 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 118 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 1 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 4 | ||||
-rw-r--r-- | fs/ext4/namei.c | 23 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 2 | ||||
-rw-r--r-- | fs/ext4/resize.c | 14 | ||||
-rw-r--r-- | fs/ext4/super.c | 33 | ||||
-rw-r--r-- | fs/ext4/symlink.c | 8 |
16 files changed, 179 insertions, 192 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3205d46bc9..4fb938ba3f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -252,8 +252,10 @@ struct ext4_allocation_request { #define EXT4_MAP_MAPPED BIT(BH_Mapped) #define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten) #define EXT4_MAP_BOUNDARY BIT(BH_Boundary) +#define EXT4_MAP_DELAYED BIT(BH_Delay) #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ - EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) + EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ + EXT4_MAP_DELAYED) struct ext4_map_blocks { ext4_fsblk_t m_pblk; @@ -2912,10 +2914,10 @@ extern const struct seq_operations ext4_mb_seq_groups_ops; extern const struct seq_operations ext4_mb_seq_structs_summary_ops; extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset); extern int ext4_mb_init(struct super_block *); -extern int ext4_mb_release(struct super_block *); +extern void ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); -extern void ext4_discard_preallocations(struct inode *, unsigned int); +extern void ext4_discard_preallocations(struct inode *); extern int __init ext4_init_mballoc(void); extern void ext4_exit_mballoc(void); extern ext4_group_t ext4_mb_prefetch(struct super_block *sb, diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index d1a2e66244..5d8055161a 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -235,8 +235,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, might_sleep(); - ext4_check_bdev_write_error(sb); - if (ext4_handle_valid(handle)) { err = jbd2_journal_get_write_access(handle, bh); if (err) { @@ -244,7 +242,8 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, handle, err); return err; } - } + } else + ext4_check_bdev_write_error(sb); if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb)) return 0; BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index fadfca75c3..7669d154c0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; @@ -4076,8 +4076,11 @@ again: /* * The delalloc extent containing lblk, it must have been * added after ext4_map_blocks() checked the extent status - * tree, adjust the length to the delalloc extent's after - * lblk. + * tree so we are not holding i_rwsem and delalloc info is + * only stabilized by i_data_sem we are going to release + * soon. Don't modify the extent status tree and report + * extent as a hole, just adjust the length to the delalloc + * extent's after lblk. */ len = es.es_lblk + es.es_len - lblk; return len; @@ -4342,7 +4345,7 @@ got_allocated_blocks: * not a good idea to call discard here directly, * but otherwise we'd need to call it every free(). */ - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; ext4_free_blocks(handle, inode, NULL, newblock, @@ -5386,7 +5389,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start); ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); @@ -5394,7 +5397,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ret = ext4_ext_shift_extents(inode, handle, punch_stop, punch_stop - punch_start, SHIFT_LEFT); @@ -5526,7 +5529,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) goto out_stop; down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); path = ext4_find_extent(inode, offset_lblk, NULL, 0); if (IS_ERR(path)) { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6aa15dafc6..54d6ff2258 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -174,7 +174,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) (atomic_read(&inode->i_writecount) == 1) && !EXT4_I(inode)->i_reserved_data_blocks) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); } if (is_dx(inode) && filp->private_data) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index a9f3716119..d8ca7f64f9 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -714,7 +714,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 9a84a5f9fe..d5bd1e3a5d 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -502,9 +502,8 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) BUG_ON(len > PAGE_SIZE); kaddr = kmap_local_folio(folio, 0); ret = ext4_read_inline_data(inode, kaddr, len, &iloc); - flush_dcache_folio(folio); + kaddr = folio_zero_tail(folio, len, kaddr + len); kunmap_local(kaddr); - folio_zero_segment(folio, len, folio_size(folio)); folio_mark_uptodate(folio); brelse(iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 61277f7f87..2ccf3b5e3a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -371,7 +371,7 @@ void ext4_da_update_reserve_space(struct inode *inode, */ if ((ei->i_reserved_data_blocks == 0) && !inode_is_open_for_write(inode)) - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); } static int __check_block_validity(struct inode *inode, const char *func, @@ -515,6 +515,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, map->m_len = retval; } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { map->m_pblk = 0; + map->m_flags |= ext4_es_is_delayed(&es) ? + EXT4_MAP_DELAYED : 0; retval = es.es_len - (map->m_lblk - es.es_lblk); if (retval > map->m_len) retval = map->m_len; @@ -1261,7 +1263,7 @@ static int write_end_fn(handle_t *handle, struct inode *inode, * We need to pick up the new inode size which generic_commit_write gave us * `file' can be NULL - eg, when called from page_symlink(). * - * ext4 never places buffers on inode->i_mapping->private_list. metadata + * ext4 never places buffers on inode->i_mapping->i_private_list. metadata * buffers are managed internally. */ static int ext4_write_end(struct file *file, @@ -1703,11 +1705,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { - if (ext4_es_is_hole(&es)) { - retval = 0; - down_read(&EXT4_I(inode)->i_data_sem); + if (ext4_es_is_hole(&es)) goto add_delayed; - } /* * Delayed extent could be allocated by fallocate. @@ -1749,26 +1748,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - -add_delayed: - if (retval == 0) { - int ret; - - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ - - ret = ext4_insert_delayed_block(inode, map->m_lblk); - if (ret != 0) { - retval = ret; - goto out_unlock; - } - - map_bh(bh, inode->i_sb, invalid_block); - set_buffer_new(bh); - set_buffer_delay(bh); - } else if (retval > 0) { + if (retval < 0) { + up_read(&EXT4_I(inode)->i_data_sem); + return retval; + } + if (retval > 0) { unsigned int status; if (unlikely(retval != map->m_len)) { @@ -1783,11 +1767,21 @@ add_delayed: EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); + up_read(&EXT4_I(inode)->i_data_sem); + return retval; } + up_read(&EXT4_I(inode)->i_data_sem); -out_unlock: - up_read((&EXT4_I(inode)->i_data_sem)); +add_delayed: + down_write(&EXT4_I(inode)->i_data_sem); + retval = ext4_insert_delayed_block(inode, map->m_lblk); + up_write(&EXT4_I(inode)->i_data_sem); + if (retval) + return retval; + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); return retval; } @@ -2947,7 +2941,7 @@ static int ext4_da_should_update_i_disksize(struct folio *folio, static int ext4_da_do_write_end(struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page) + struct folio *folio) { struct inode *inode = mapping->host; loff_t old_size = inode->i_size; @@ -2958,12 +2952,13 @@ static int ext4_da_do_write_end(struct address_space *mapping, * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES * flag, which all that's needed to trigger page writeback. */ - copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL); + copied = block_write_end(NULL, mapping, pos, len, copied, + &folio->page, NULL); new_i_size = pos + copied; /* - * It's important to update i_size while still holding page lock, - * because page writeout could otherwise come in and zero beyond + * It's important to update i_size while still holding folio lock, + * because folio writeout could otherwise come in and zero beyond * i_size. * * Since we are holding inode lock, we are sure i_disksize <= @@ -2981,14 +2976,14 @@ static int ext4_da_do_write_end(struct address_space *mapping, i_size_write(inode, new_i_size); end = (new_i_size - 1) & (PAGE_SIZE - 1); - if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) { + if (copied && ext4_da_should_update_i_disksize(folio, end)) { ext4_update_i_disksize(inode, new_i_size); disksize_changed = true; } } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); @@ -3027,10 +3022,10 @@ static int ext4_da_write_end(struct file *file, return ext4_write_inline_data_end(inode, pos, len, copied, folio); - if (unlikely(copied < len) && !PageUptodate(page)) + if (unlikely(copied < len) && !folio_test_uptodate(folio)) copied = 0; - return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page); + return ext4_da_do_write_end(mapping, pos, len, copied, folio); } /* @@ -3213,7 +3208,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode) } /* Any metadata buffers to write? */ - if (!list_empty(&inode->i_mapping->private_list)) + if (!list_empty(&inode->i_mapping->i_private_list)) return true; return inode->i_state & I_DIRTY_DATASYNC; } @@ -3267,6 +3262,9 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, iomap->addr = (u64) map->m_pblk << blkbits; if (flags & IOMAP_DAX) iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; + } else if (map->m_flags & EXT4_MAP_DELAYED) { + iomap->type = IOMAP_DELALLOC; + iomap->addr = IOMAP_NULL_ADDR; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; @@ -3429,35 +3427,11 @@ const struct iomap_ops ext4_iomap_overwrite_ops = { .iomap_end = ext4_iomap_end, }; -static bool ext4_iomap_is_delalloc(struct inode *inode, - struct ext4_map_blocks *map) -{ - struct extent_status es; - ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, - map->m_lblk, end, &es); - - if (!es.es_len || es.es_lblk > end) - return false; - - if (es.es_lblk > map->m_lblk) { - map->m_len = es.es_lblk - map->m_lblk; - return false; - } - - offset = map->m_lblk - es.es_lblk; - map->m_len = es.es_len - offset; - - return true; -} - static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { int ret; - bool delalloc = false; struct ext4_map_blocks map; u8 blkbits = inode->i_blkbits; @@ -3498,13 +3472,8 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret < 0) return ret; - if (ret == 0) - delalloc = ext4_iomap_is_delalloc(inode, &map); - set_iomap: ext4_set_iomap(inode, iomap, &map, offset, length, flags); - if (delalloc && iomap->type == IOMAP_HOLE) - iomap->type = IOMAP_DELALLOC; return 0; } @@ -3564,7 +3533,7 @@ static const struct address_space_operations ext4_aops = { .direct_IO = noop_direct_IO, .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, + .error_remove_folio = generic_error_remove_folio, .swap_activate = ext4_iomap_swap_activate, }; @@ -3581,7 +3550,7 @@ static const struct address_space_operations ext4_journalled_aops = { .direct_IO = noop_direct_IO, .migrate_folio = buffer_migrate_folio_norefs, .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, + .error_remove_folio = generic_error_remove_folio, .swap_activate = ext4_iomap_swap_activate, }; @@ -3598,7 +3567,7 @@ static const struct address_space_operations ext4_da_aops = { .direct_IO = noop_direct_IO, .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, + .error_remove_folio = generic_error_remove_folio, .swap_activate = ext4_iomap_swap_activate, }; @@ -3630,6 +3599,12 @@ void ext4_set_aops(struct inode *inode) inode->i_mapping->a_ops = &ext4_aops; } +/* + * Here we can't skip an unwritten buffer even though it usually reads zero + * because it might have data in pagecache (eg, if called from ext4_zero_range, + * ext4_punch_hole, etc) which needs to be properly zeroed out. Otherwise a + * racing writeback can come later and flush the stale pagecache to disk. + */ static int __ext4_block_zero_page_range(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length) { @@ -4008,12 +3983,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) /* If there are blocks to remove, do it */ if (stop_block > first_block) { + ext4_lblk_t hole_len = stop_block - first_block; down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); - ext4_es_remove_extent(inode, first_block, - stop_block - first_block); + ext4_es_remove_extent(inode, first_block, hole_len); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) ret = ext4_ext_remove_space(inode, first_block, @@ -4022,6 +3997,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) ret = ext4_ind_remove_space(handle, inode, first_block, stop_block); + ext4_es_insert_extent(inode, first_block, hole_len, ~0, + EXTENT_STATUS_HOLE); up_write(&EXT4_I(inode)->i_data_sem); } ext4_fc_track_range(handle, inode, first_block, stop_block); @@ -4163,7 +4140,7 @@ int ext4_truncate(struct inode *inode) down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) err = ext4_ext_truncate(handle, inode); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 4f931f80cb..7160a71044 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -467,7 +467,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_reset_inode_seed(inode); ext4_reset_inode_seed(inode_bl); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); err = ext4_mark_inode_dirty(handle, inode); if (err < 0) { @@ -819,11 +819,11 @@ int ext4_force_shutdown(struct super_block *sb, u32 flags) switch (flags) { case EXT4_GOING_FLAGS_DEFAULT: - ret = freeze_bdev(sb->s_bdev); + ret = bdev_freeze(sb->s_bdev); if (ret) return ret; set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); - thaw_bdev(sb->s_bdev); + bdev_thaw(sb->s_bdev); break; case EXT4_GOING_FLAGS_LOGFLUSH: set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 38ec0fdb33..cf407425d0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(EXT4_SB(sb), first + i); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, "freeing block already freed " "(bit %u)", first + i); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); } mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); } @@ -677,7 +677,7 @@ do { \ } \ } while (0) -static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, +static void __mb_check_buddy(struct ext4_buddy *e4b, char *file, const char *function, int line) { struct super_block *sb = e4b->bd_sb; @@ -696,7 +696,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, void *buddy2; if (e4b->bd_info->bb_check_counter++ % 10) - return 0; + return; while (order > 1) { buddy = mb_find_buddy(e4b, order, &max); @@ -758,7 +758,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, grp = ext4_get_group_info(sb, e4b->bd_group); if (!grp) - return NULL; + return; list_for_each(cur, &grp->bb_prealloc_list) { ext4_group_t groupnr; struct ext4_prealloc_space *pa; @@ -768,7 +768,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, for (i = 0; i < pa->pa_len; i++) MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); } - return 0; } #undef MB_CHECK_ASSERT #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ @@ -871,7 +870,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) * cr level needs an update. */ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *iter; @@ -945,7 +944,7 @@ ext4_mb_find_good_group_avg_frag_lists(struct ext4_allocation_context *ac, int o * order. Updates *new_cr if cr level needs an update. */ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = NULL; @@ -990,7 +989,7 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context * * much and fall to CR_GOAL_LEN_SLOW in that case. */ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = NULL; @@ -1125,11 +1124,11 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac, } if (*new_cr == CR_POWER2_ALIGNED) { - ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group); } else if (*new_cr == CR_GOAL_LEN_FAST) { - ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_goal_fast(ac, new_cr, group); } else if (*new_cr == CR_BEST_AVAIL_LEN) { - ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_best_avail(ac, new_cr, group); } else { /* * TODO: For CR=2, we can arrange groups in an rb tree sorted by @@ -1474,9 +1473,8 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, return 0; } - block++; - pnum = block / blocks_per_page; - page = find_or_create_page(inode->i_mapping, pnum, gfp); + /* blocks_per_page == 1, hence we need another page for the buddy */ + page = find_or_create_page(inode->i_mapping, block + 1, gfp); if (!page) return -ENOMEM; BUG_ON(page->mapping != inode->i_mapping); @@ -1934,12 +1932,12 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, "freeing already freed block (bit %u); block bitmap corrupt.", block); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); return; } @@ -1981,8 +1979,7 @@ check: static int mb_find_extent(struct ext4_buddy *e4b, int block, int needed, struct ext4_free_extent *ex) { - int next = block; - int max, order; + int max, order, next; void *buddy; assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); @@ -2000,16 +1997,12 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block, /* find actual order */ order = mb_find_order_for_block(e4b, block); - block = block >> order; - ex->fe_len = 1 << order; - ex->fe_start = block << order; + ex->fe_len = (1 << order) - (block & ((1 << order) - 1)); + ex->fe_start = block; ex->fe_group = e4b->bd_group; - /* calc difference from given start */ - next = next - ex->fe_start; - ex->fe_len -= next; - ex->fe_start += next; + block = block >> order; while (needed > ex->fe_len && mb_find_buddy(e4b, order, &max)) { @@ -2412,12 +2405,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, k = mb_find_next_zero_bit(buddy, max, 0); if (k >= max) { + ext4_mark_group_bitmap_corrupted(ac->ac_sb, + e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0, "%d free clusters of order %d. But found 0", grp->bb_counters[i], i); - ext4_mark_group_bitmap_corrupted(ac->ac_sb, - e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); break; } ac->ac_found++; @@ -2468,12 +2461,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, * free blocks even though group info says we * have free blocks */ + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But bitmap says 0", free); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); break; } @@ -2499,12 +2492,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, if (WARN_ON(ex.fe_len <= 0)) break; if (free < ex.fe_len) { + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But got %d blocks", free, ex.fe_len); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); /* * The number of free blocks differs. This mostly * indicate that the bitmap is corrupt. So exit @@ -2921,14 +2914,19 @@ repeat: ac->ac_groups_scanned++; if (cr == CR_POWER2_ALIGNED) ext4_mb_simple_scan_group(ac, &e4b); - else if ((cr == CR_GOAL_LEN_FAST || - cr == CR_BEST_AVAIL_LEN) && - sbi->s_stripe && - !(ac->ac_g_ex.fe_len % - EXT4_B2C(sbi, sbi->s_stripe))) - ext4_mb_scan_aligned(ac, &e4b); - else - ext4_mb_complex_scan_group(ac, &e4b); + else { + bool is_stripe_aligned = sbi->s_stripe && + !(ac->ac_g_ex.fe_len % + EXT4_B2C(sbi, sbi->s_stripe)); + + if ((cr == CR_GOAL_LEN_FAST || + cr == CR_BEST_AVAIL_LEN) && + is_stripe_aligned) + ext4_mb_scan_aligned(ac, &e4b); + + if (ac->ac_status == AC_STATUS_CONTINUE) + ext4_mb_complex_scan_group(ac, &e4b); + } ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); @@ -3061,7 +3059,10 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? sg.info.bb_counters[i] : 0); - seq_puts(seq, " ]\n"); + seq_puts(seq, " ]"); + if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info)) + seq_puts(seq, " Block bitmap corrupted!"); + seq_puts(seq, "\n"); return 0; } @@ -3752,7 +3753,7 @@ static int ext4_mb_cleanup_pa(struct ext4_group_info *grp) return count; } -int ext4_mb_release(struct super_block *sb) +void ext4_mb_release(struct super_block *sb) { ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; @@ -3828,8 +3829,6 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); - - return 0; } static inline int ext4_issue_discard(struct super_block *sb, @@ -5316,7 +5315,7 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac) * the caller MUST hold group/inode locks. * TODO: optimize the case when there are no in-core structures yet */ -static noinline_for_stack int +static noinline_for_stack void ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, struct ext4_prealloc_space *pa) { @@ -5366,11 +5365,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, */ } atomic_add(free, &sbi->s_mb_discarded); - - return 0; } -static noinline_for_stack int +static noinline_for_stack void ext4_mb_release_group_pa(struct ext4_buddy *e4b, struct ext4_prealloc_space *pa) { @@ -5384,13 +5381,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", e4b->bd_group, group, pa->pa_pstart); - return 0; + return; } mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); - - return 0; } /* @@ -5511,7 +5506,7 @@ out_dbg: * * FIXME!! Make sure it is valid at all the call sites */ -void ext4_discard_preallocations(struct inode *inode, unsigned int needed) +void ext4_discard_preallocations(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct super_block *sb = inode->i_sb; @@ -5523,9 +5518,8 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed) struct rb_node *iter; int err; - if (!S_ISREG(inode->i_mode)) { + if (!S_ISREG(inode->i_mode)) return; - } if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) return; @@ -5533,15 +5527,12 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed) mb_debug(sb, "discard preallocation for inode %lu\n", inode->i_ino); trace_ext4_discard_preallocations(inode, - atomic_read(&ei->i_prealloc_active), needed); - - if (needed == 0) - needed = UINT_MAX; + atomic_read(&ei->i_prealloc_active)); repeat: /* first, collect all pa's in the inode */ write_lock(&ei->i_prealloc_lock); - for (iter = rb_first(&ei->i_prealloc_node); iter && needed; + for (iter = rb_first(&ei->i_prealloc_node); iter; iter = rb_next(iter)) { pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); @@ -5565,7 +5556,6 @@ repeat: spin_unlock(&pa->pa_lock); rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node); list_add(&pa->u.pa_tmp_list, &list); - needed--; continue; } @@ -5975,7 +5965,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* * release all resource we used in allocation */ -static int ext4_mb_release_context(struct ext4_allocation_context *ac) +static void ext4_mb_release_context(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_prealloc_space *pa = ac->ac_pa; @@ -6012,7 +6002,6 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) mutex_unlock(&ac->ac_lg->lg_mutex); ext4_mb_collect_stats(ac); - return 0; } static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) @@ -6793,6 +6782,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) bool set_trimmed = false; void *bitmap; + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + return 0; + last = ext4_last_grp_cluster(sb, e4b->bd_group); bitmap = e4b->bd_bitmap; if (start == 0 && max >= last) diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index d7aeb5da7d..56938532b4 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -192,7 +192,6 @@ struct ext4_allocation_context { */ ext4_grpblk_t ac_orig_goal_len; - __u32 ac_groups_considered; __u32 ac_flags; /* allocation hints */ __u16 ac_groups_scanned; __u16 ac_groups_linear_remaining; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 391efa6d4c..7cd4afa4de 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -686,8 +686,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, out: if (*moved_len) { - ext4_discard_preallocations(orig_inode, 0); - ext4_discard_preallocations(donor_inode, 0); + ext4_discard_preallocations(orig_inode); + ext4_discard_preallocations(donor_inode); } ext4_free_ext_path(path); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d252935f9c..05b647e6bc 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2388,8 +2388,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, sb = dir->i_sb; blocksize = sb->s_blocksize; - if (!dentry->d_name.len) - return -EINVAL; if (fscrypt_is_nokey_name(dentry)) return -ENOKEY; @@ -3591,10 +3589,14 @@ struct ext4_renament { int dir_inlined; }; -static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent) +static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent, bool is_cross) { int retval; + ent->is_dir = true; + if (!is_cross) + return 0; + ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode, &retval, &ent->parent_de, &ent->dir_inlined); @@ -3612,6 +3614,9 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, { int retval; + if (!ent->dir_bh) + return 0; + ent->parent_de->inode = cpu_to_le32(dir_ino); BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata"); if (!ent->dir_inlined) { @@ -3900,7 +3905,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) goto end_rename; } - retval = ext4_rename_dir_prepare(handle, &old); + retval = ext4_rename_dir_prepare(handle, &old, new.dir != old.dir); if (retval) goto end_rename; } @@ -3964,7 +3969,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, } inode_set_mtime_to_ts(old.dir, inode_set_ctime_current(old.dir)); ext4_update_dx_flag(old.dir); - if (old.dir_bh) { + if (old.is_dir) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); if (retval) goto end_rename; @@ -3987,7 +3992,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (unlikely(retval)) goto end_rename; - if (S_ISDIR(old.inode->i_mode)) { + if (old.is_dir) { /* * We disable fast commits here that's because the * replay code is not yet capable of changing dot dot @@ -4114,14 +4119,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, ext4_handle_sync(handle); if (S_ISDIR(old.inode->i_mode)) { - old.is_dir = true; - retval = ext4_rename_dir_prepare(handle, &old); + retval = ext4_rename_dir_prepare(handle, &old, new.dir != old.dir); if (retval) goto end_rename; } if (S_ISDIR(new.inode->i_mode)) { - new.is_dir = true; - retval = ext4_rename_dir_prepare(handle, &new); + retval = ext4_rename_dir_prepare(handle, &new, new.dir != old.dir); if (retval) goto end_rename; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index dfdd7e5cf0..312bc68133 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -444,7 +444,7 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, folio_clear_error(folio); /* - * Comments copied from block_write_full_page: + * Comments copied from block_write_full_folio: * * The folio straddles i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9a39596d2a..3c0d12382e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -235,8 +235,10 @@ struct ext4_new_flex_group_data { * * Returns NULL on failure otherwise address of the allocated structure. */ -static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size) +static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size, + ext4_group_t o_group, ext4_group_t n_group) { + ext4_group_t last_group; struct ext4_new_flex_group_data *flex_gd; flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); @@ -248,6 +250,14 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size) else flex_gd->resize_bg = flexbg_size; + /* Avoid allocating large 'groups' array if not needed */ + last_group = o_group | (flex_gd->resize_bg - 1); + if (n_group <= last_group) + flex_gd->resize_bg = 1 << fls(n_group - o_group + 1); + else if (n_group - last_group < flex_gd->resize_bg) + flex_gd->resize_bg = 1 << max(fls(last_group - o_group + 1), + fls(n_group - last_group)); + flex_gd->groups = kmalloc_array(flex_gd->resize_bg, sizeof(struct ext4_new_group_data), GFP_NOFS); @@ -2132,7 +2142,7 @@ retry: if (err) goto out; - flex_gd = alloc_flex_gd(flexbg_size); + flex_gd = alloc_flex_gd(flexbg_size, o_group, n_group); if (flex_gd == NULL) { err = -ENOMEM; goto out; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3f1fa5d182..4866657c6d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1525,7 +1525,7 @@ void ext4_clear_inode(struct inode *inode) ext4_fc_del(inode); invalidate_inode_buffers(inode); clear_inode(inode); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); dquot_drop(inode); if (EXT4_I(inode)->jinode) { @@ -2793,15 +2793,6 @@ static int ext4_check_opt_consistency(struct fs_context *fc, return -EINVAL; } - if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) { - int blocksize = - BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if (blocksize < PAGE_SIZE) - ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an " - "experimental mount option 'dioread_nolock' " - "for blocksize < PAGE_SIZE"); - } - err = ext4_check_test_dummy_encryption(fc, sb); if (err) return err; @@ -4410,7 +4401,7 @@ static void ext4_set_def_opts(struct super_block *sb, ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) set_opt(sb, DELALLOC); - if (sb->s_blocksize == PAGE_SIZE) + if (sb->s_blocksize <= PAGE_SIZE) set_opt(sb, DIOREAD_NOLOCK); } @@ -5864,11 +5855,9 @@ static struct bdev_handle *ext4_get_journal_blkdev(struct super_block *sb, struct ext4_super_block *es; int errno; - /* see get_tree_bdev why this is needed and safe */ - up_write(&sb->s_umount); - bdev_handle = bdev_open_by_dev(j_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, - sb, &fs_holder_ops); - down_write(&sb->s_umount); + bdev_handle = bdev_open_by_dev(j_dev, + BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES, + sb, &fs_holder_ops); if (IS_ERR(bdev_handle)) { ext4_msg(sb, KERN_ERR, "failed to open journal device unknown-block(%u,%u) %ld", @@ -6875,6 +6864,10 @@ static int ext4_write_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to commit dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6891,6 +6884,10 @@ static int ext4_acquire_dquot(struct dquot *dquot) if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to acquire dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; @@ -6910,6 +6907,10 @@ static int ext4_release_dquot(struct dquot *dquot) return PTR_ERR(handle); } ret = dquot_release(dquot); + if (ret < 0) + ext4_error_err(dquot->dq_sb, -ret, + "Failed to release dquot type %d", + dquot->dq_id.type); err = ext4_journal_stop(handle); if (!ret) ret = err; diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 75bf1f8884..645240cc02 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -92,10 +92,12 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode, if (!dentry) { bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT); - if (IS_ERR(bh)) - return ERR_CAST(bh); - if (!bh || !ext4_buffer_uptodate(bh)) + if (IS_ERR(bh) || !bh) return ERR_PTR(-ECHILD); + if (!ext4_buffer_uptodate(bh)) { + brelse(bh); + return ERR_PTR(-ECHILD); + } } else { bh = ext4_bread(NULL, inode, 0, 0); if (IS_ERR(bh)) |