/* * readahead.c -- Prefetch filesystem metadata to speed up fsck. * * Copyright (C) 2014 Oracle. * * %Begin-Header% * This file may be redistributed under the terms of the GNU Library * General Public License, version 2. * %End-Header% */ #include "config.h" #include #include "e2fsck.h" #undef DEBUG #ifdef DEBUG # define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0) #else # define dbg_printf(f, a...) #endif struct read_dblist { errcode_t err; blk64_t run_start; blk64_t run_len; int flags; }; static int readahead_dir_block(ext2_filsys fs, struct ext2_db_entry2 *db, void *priv_data) { struct read_dblist *pr = priv_data; e2_blkcnt_t count = (pr->flags & E2FSCK_RA_DBLIST_IGNORE_BLOCKCNT ? 1 : db->blockcnt); if (!pr->run_len || db->blk != pr->run_start + pr->run_len) { if (pr->run_len) { pr->err = io_channel_cache_readahead(fs->io, pr->run_start, pr->run_len); dbg_printf("readahead start=%llu len=%llu err=%d\n", pr->run_start, pr->run_len, (int)pr->err); } pr->run_start = db->blk; pr->run_len = 0; } pr->run_len += count; return pr->err ? DBLIST_ABORT : 0; } errcode_t e2fsck_readahead_dblist(ext2_filsys fs, int flags, ext2_dblist dblist, unsigned long long start, unsigned long long count) { errcode_t err; struct read_dblist pr; dbg_printf("%s: flags=0x%x\n", __func__, flags); if (flags & ~E2FSCK_RA_DBLIST_ALL_FLAGS) return EXT2_ET_INVALID_ARGUMENT; memset(&pr, 0, sizeof(pr)); pr.flags = flags; err = ext2fs_dblist_iterate3(dblist, readahead_dir_block, start, count, &pr); if (pr.err) return pr.err; if (err) return err; if (pr.run_len) err = io_channel_cache_readahead(fs->io, pr.run_start, pr.run_len); return err; } static errcode_t e2fsck_readahead_bitmap(ext2_filsys fs, ext2fs_block_bitmap ra_map) { blk64_t start, end, out; errcode_t err; start = 1; end = ext2fs_blocks_count(fs->super) - 1; err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, &out); while (err == 0) { start = out; err = ext2fs_find_first_zero_block_bitmap2(ra_map, start, end, &out); if (err == ENOENT) { out = end; err = 0; if (out == start) break; } else if (err) break; err = io_channel_cache_readahead(fs->io, start, out - start); if (err) break; start = out; err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, &out); } if (err == ENOENT) err = 0; return err; } /* Try not to spew bitmap range errors for readahead */ static errcode_t mark_bmap_range(ext2fs_block_bitmap map, blk64_t blk, unsigned int num) { if (blk >= ext2fs_get_generic_bmap_start(map) && blk + num <= ext2fs_get_generic_bmap_end(map)) ext2fs_mark_block_bitmap_range2(map, blk, num); else return EXT2_ET_INVALID_ARGUMENT; return 0; } static errcode_t mark_bmap(ext2fs_block_bitmap map, blk64_t blk) { if (blk >= ext2fs_get_generic_bmap_start(map) && blk <= ext2fs_get_generic_bmap_end(map)) ext2fs_mark_block_bitmap2(map, blk); else return EXT2_ET_INVALID_ARGUMENT; return 0; } errcode_t e2fsck_readahead(ext2_filsys fs, int flags, dgrp_t start, dgrp_t ngroups) { blk64_t super, old_gdt, new_gdt; blk_t blocks; dgrp_t i; ext2fs_block_bitmap ra_map = NULL; dgrp_t end = start + ngroups; errcode_t err = 0; dbg_printf("%s: flags=0x%x start=%d groups=%d\n", __func__, flags, start, ngroups); if (flags & ~E2FSCK_READA_ALL_FLAGS) return EXT2_ET_INVALID_ARGUMENT; if (end > fs->group_desc_count) end = fs->group_desc_count; if (flags == 0) return 0; err = ext2fs_allocate_block_bitmap(fs, "readahead bitmap", &ra_map); if (err) return err; for (i = start; i < end; i++) { err = ext2fs_super_and_bgd_loc2(fs, i, &super, &old_gdt, &new_gdt, &blocks); if (err) break; if (flags & E2FSCK_READA_SUPER) { err = mark_bmap(ra_map, super); if (err) break; } if (flags & E2FSCK_READA_GDT) { err = mark_bmap_range(ra_map, old_gdt ? old_gdt : new_gdt, blocks); if (err) break; } if ((flags & E2FSCK_READA_BBITMAP) && !ext2fs_bg_flags_test(fs, i, EXT2_BG_BLOCK_UNINIT) && ext2fs_bg_free_blocks_count(fs, i) < fs->super->s_blocks_per_group) { super = ext2fs_block_bitmap_loc(fs, i); err = mark_bmap(ra_map, super); if (err) break; } if ((flags & E2FSCK_READA_IBITMAP) && !ext2fs_bg_flags_test(fs, i, EXT2_BG_INODE_UNINIT) && ext2fs_bg_free_inodes_count(fs, i) < fs->super->s_inodes_per_group) { super = ext2fs_inode_bitmap_loc(fs, i); err = mark_bmap(ra_map, super); if (err) break; } if ((flags & E2FSCK_READA_ITABLE) && ext2fs_bg_free_inodes_count(fs, i) < fs->super->s_inodes_per_group) { super = ext2fs_inode_table_loc(fs, i); blocks = fs->inode_blocks_per_group - (ext2fs_bg_itable_unused(fs, i) * EXT2_INODE_SIZE(fs->super) / fs->blocksize); err = mark_bmap_range(ra_map, super, blocks); if (err) break; } } if (!err) err = e2fsck_readahead_bitmap(fs, ra_map); ext2fs_free_block_bitmap(ra_map); return err; } int e2fsck_can_readahead(ext2_filsys fs) { errcode_t err; err = io_channel_cache_readahead(fs->io, 0, 1); dbg_printf("%s: supp=%d\n", __func__, err != EXT2_ET_OP_NOT_SUPPORTED); return err != EXT2_ET_OP_NOT_SUPPORTED; } unsigned long long e2fsck_guess_readahead(ext2_filsys fs) { unsigned long long guess; /* * The optimal readahead sizes were experimentally determined by * djwong in August 2014. Setting the RA size to two block groups' * worth of inode table blocks seems to yield the largest reductions * in e2fsck runtime. */ guess = 2ULL * fs->blocksize * fs->inode_blocks_per_group; /* Disable RA if it'd use more 1/50th of RAM. */ if (get_memory_size() > (guess * 50)) return guess / 1024; return 0; }