diff options
Diffstat (limited to '')
-rw-r--r-- | fs/xfs/scrub/dabtree.c | 596 |
1 files changed, 596 insertions, 0 deletions
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c new file mode 100644 index 000000000..84fe3d33d --- /dev/null +++ b/fs/xfs/scrub/dabtree.c @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_attr_leaf.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/dabtree.h" + +/* Directory/Attribute Btree */ + +/* + * Check for da btree operation errors. See the section about handling + * operational errors in common.c. + */ +bool +xchk_da_process_error( + struct xchk_da_btree *ds, + int level, + int *error) +{ + struct xfs_scrub *sc = ds->sc; + + if (*error == 0) + return true; + + switch (*error) { + case -EDEADLOCK: + /* Used to restart an op with deadlock avoidance. */ + trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); + break; + case -EFSBADCRC: + case -EFSCORRUPTED: + /* Note the badness but don't abort. */ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + *error = 0; + fallthrough; + default: + trace_xchk_file_op_error(sc, ds->dargs.whichfork, + xfs_dir2_da_to_db(ds->dargs.geo, + ds->state->path.blk[level].blkno), + *error, __return_address); + break; + } + return false; +} + +/* + * Check for da btree corruption. See the section about handling + * operational errors in common.c. + */ +void +xchk_da_set_corrupt( + struct xchk_da_btree *ds, + int level) +{ + struct xfs_scrub *sc = ds->sc; + + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + + trace_xchk_fblock_error(sc, ds->dargs.whichfork, + xfs_dir2_da_to_db(ds->dargs.geo, + ds->state->path.blk[level].blkno), + __return_address); +} + +static struct xfs_da_node_entry * +xchk_da_btree_node_entry( + struct xchk_da_btree *ds, + int level) +{ + struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; + struct xfs_da3_icnode_hdr hdr; + + ASSERT(blk->magic == XFS_DA_NODE_MAGIC); + + xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr); + return hdr.btree + blk->index; +} + +/* Scrub a da btree hash (key). */ +int +xchk_da_btree_hash( + struct xchk_da_btree *ds, + int level, + __be32 *hashp) +{ + struct xfs_da_node_entry *entry; + xfs_dahash_t hash; + xfs_dahash_t parent_hash; + + /* Is this hash in order? */ + hash = be32_to_cpu(*hashp); + if (hash < ds->hashes[level]) + xchk_da_set_corrupt(ds, level); + ds->hashes[level] = hash; + + if (level == 0) + return 0; + + /* Is this hash no larger than the parent hash? */ + entry = xchk_da_btree_node_entry(ds, level - 1); + parent_hash = be32_to_cpu(entry->hashval); + if (parent_hash < hash) + xchk_da_set_corrupt(ds, level); + + return 0; +} + +/* + * Check a da btree pointer. Returns true if it's ok to use this + * pointer. + */ +STATIC bool +xchk_da_btree_ptr_ok( + struct xchk_da_btree *ds, + int level, + xfs_dablk_t blkno) +{ + if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) { + xchk_da_set_corrupt(ds, level); + return false; + } + + return true; +} + +/* + * The da btree scrubber can handle leaf1 blocks as a degenerate + * form of leafn blocks. Since the regular da code doesn't handle + * leaf1, we must multiplex the verifiers. + */ +static void +xchk_da_btree_read_verify( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + bp->b_ops = &xfs_dir3_leaf1_buf_ops; + bp->b_ops->verify_read(bp); + return; + default: + /* + * xfs_da3_node_buf_ops already know how to handle + * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. + */ + bp->b_ops = &xfs_da3_node_buf_ops; + bp->b_ops->verify_read(bp); + return; + } +} +static void +xchk_da_btree_write_verify( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + bp->b_ops = &xfs_dir3_leaf1_buf_ops; + bp->b_ops->verify_write(bp); + return; + default: + /* + * xfs_da3_node_buf_ops already know how to handle + * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. + */ + bp->b_ops = &xfs_da3_node_buf_ops; + bp->b_ops->verify_write(bp); + return; + } +} +static void * +xchk_da_btree_verify( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + bp->b_ops = &xfs_dir3_leaf1_buf_ops; + return bp->b_ops->verify_struct(bp); + default: + bp->b_ops = &xfs_da3_node_buf_ops; + return bp->b_ops->verify_struct(bp); + } +} + +static const struct xfs_buf_ops xchk_da_btree_buf_ops = { + .name = "xchk_da_btree", + .verify_read = xchk_da_btree_read_verify, + .verify_write = xchk_da_btree_write_verify, + .verify_struct = xchk_da_btree_verify, +}; + +/* Check a block's sibling. */ +STATIC int +xchk_da_btree_block_check_sibling( + struct xchk_da_btree *ds, + int level, + int direction, + xfs_dablk_t sibling) +{ + struct xfs_da_state_path *path = &ds->state->path; + struct xfs_da_state_path *altpath = &ds->state->altpath; + int retval; + int plevel; + int error; + + memcpy(altpath, path, sizeof(ds->state->altpath)); + + /* + * If the pointer is null, we shouldn't be able to move the upper + * level pointer anywhere. + */ + if (sibling == 0) { + error = xfs_da3_path_shift(ds->state, altpath, direction, + false, &retval); + if (error == 0 && retval == 0) + xchk_da_set_corrupt(ds, level); + error = 0; + goto out; + } + + /* Move the alternate cursor one block in the direction given. */ + error = xfs_da3_path_shift(ds->state, altpath, direction, false, + &retval); + if (!xchk_da_process_error(ds, level, &error)) + goto out; + if (retval) { + xchk_da_set_corrupt(ds, level); + goto out; + } + if (altpath->blk[level].bp) + xchk_buffer_recheck(ds->sc, altpath->blk[level].bp); + + /* Compare upper level pointer to sibling pointer. */ + if (altpath->blk[level].blkno != sibling) + xchk_da_set_corrupt(ds, level); + +out: + /* Free all buffers in the altpath that aren't referenced from path. */ + for (plevel = 0; plevel < altpath->active; plevel++) { + if (altpath->blk[plevel].bp == NULL || + (plevel < path->active && + altpath->blk[plevel].bp == path->blk[plevel].bp)) + continue; + + xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp); + altpath->blk[plevel].bp = NULL; + } + + return error; +} + +/* Check a block's sibling pointers. */ +STATIC int +xchk_da_btree_block_check_siblings( + struct xchk_da_btree *ds, + int level, + struct xfs_da_blkinfo *hdr) +{ + xfs_dablk_t forw; + xfs_dablk_t back; + int error = 0; + + forw = be32_to_cpu(hdr->forw); + back = be32_to_cpu(hdr->back); + + /* Top level blocks should not have sibling pointers. */ + if (level == 0) { + if (forw != 0 || back != 0) + xchk_da_set_corrupt(ds, level); + return 0; + } + + /* + * Check back (left) and forw (right) pointers. These functions + * absorb error codes for us. + */ + error = xchk_da_btree_block_check_sibling(ds, level, 0, back); + if (error) + goto out; + error = xchk_da_btree_block_check_sibling(ds, level, 1, forw); + +out: + memset(&ds->state->altpath, 0, sizeof(ds->state->altpath)); + return error; +} + +/* Load a dir/attribute block from a btree. */ +STATIC int +xchk_da_btree_block( + struct xchk_da_btree *ds, + int level, + xfs_dablk_t blkno) +{ + struct xfs_da_state_blk *blk; + struct xfs_da_intnode *node; + struct xfs_da_node_entry *btree; + struct xfs_da3_blkinfo *hdr3; + struct xfs_da_args *dargs = &ds->dargs; + struct xfs_inode *ip = ds->dargs.dp; + xfs_ino_t owner; + int *pmaxrecs; + struct xfs_da3_icnode_hdr nodehdr; + int error = 0; + + blk = &ds->state->path.blk[level]; + ds->state->path.active = level + 1; + + /* Release old block. */ + if (blk->bp) { + xfs_trans_brelse(dargs->trans, blk->bp); + blk->bp = NULL; + } + + /* Check the pointer. */ + blk->blkno = blkno; + if (!xchk_da_btree_ptr_ok(ds, level, blkno)) + goto out_nobuf; + + /* Read the buffer. */ + error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, + XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork, + &xchk_da_btree_buf_ops); + if (!xchk_da_process_error(ds, level, &error)) + goto out_nobuf; + if (blk->bp) + xchk_buffer_recheck(ds->sc, blk->bp); + + /* + * We didn't find a dir btree root block, which means that + * there's no LEAF1/LEAFN tree (at least not where it's supposed + * to be), so jump out now. + */ + if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 && + blk->bp == NULL) + goto out_nobuf; + + /* It's /not/ ok for attr trees not to have a da btree. */ + if (blk->bp == NULL) { + xchk_da_set_corrupt(ds, level); + goto out_nobuf; + } + + hdr3 = blk->bp->b_addr; + blk->magic = be16_to_cpu(hdr3->hdr.magic); + pmaxrecs = &ds->maxrecs[level]; + + /* We only started zeroing the header on v5 filesystems. */ + if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad) + xchk_da_set_corrupt(ds, level); + + /* Check the owner. */ + if (xfs_has_crc(ip->i_mount)) { + owner = be64_to_cpu(hdr3->owner); + if (owner != ip->i_ino) + xchk_da_set_corrupt(ds, level); + } + + /* Check the siblings. */ + error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr); + if (error) + goto out; + + /* Interpret the buffer. */ + switch (blk->magic) { + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + xfs_trans_buf_set_type(dargs->trans, blk->bp, + XFS_BLFT_ATTR_LEAF_BUF); + blk->magic = XFS_ATTR_LEAF_MAGIC; + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs); + if (ds->tree_level != 0) + xchk_da_set_corrupt(ds, level); + break; + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + xfs_trans_buf_set_type(dargs->trans, blk->bp, + XFS_BLFT_DIR_LEAFN_BUF); + blk->magic = XFS_DIR2_LEAFN_MAGIC; + blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); + if (ds->tree_level != 0) + xchk_da_set_corrupt(ds, level); + break; + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + xfs_trans_buf_set_type(dargs->trans, blk->bp, + XFS_BLFT_DIR_LEAF1_BUF); + blk->magic = XFS_DIR2_LEAF1_MAGIC; + blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); + if (ds->tree_level != 0) + xchk_da_set_corrupt(ds, level); + break; + case XFS_DA_NODE_MAGIC: + case XFS_DA3_NODE_MAGIC: + xfs_trans_buf_set_type(dargs->trans, blk->bp, + XFS_BLFT_DA_NODE_BUF); + blk->magic = XFS_DA_NODE_MAGIC; + node = blk->bp->b_addr; + xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node); + btree = nodehdr.btree; + *pmaxrecs = nodehdr.count; + blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); + if (level == 0) { + if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { + xchk_da_set_corrupt(ds, level); + goto out_freebp; + } + ds->tree_level = nodehdr.level; + } else { + if (ds->tree_level != nodehdr.level) { + xchk_da_set_corrupt(ds, level); + goto out_freebp; + } + } + + /* XXX: Check hdr3.pad32 once we know how to fix it. */ + break; + default: + xchk_da_set_corrupt(ds, level); + goto out_freebp; + } + + /* + * If we've been handed a block that is below the dabtree root, does + * its hashval match what the parent block expected to see? + */ + if (level > 0) { + struct xfs_da_node_entry *key; + + key = xchk_da_btree_node_entry(ds, level - 1); + if (be32_to_cpu(key->hashval) != blk->hashval) { + xchk_da_set_corrupt(ds, level); + goto out_freebp; + } + } + +out: + return error; +out_freebp: + xfs_trans_brelse(dargs->trans, blk->bp); + blk->bp = NULL; +out_nobuf: + blk->blkno = 0; + return error; +} + +/* Visit all nodes and leaves of a da btree. */ +int +xchk_da_btree( + struct xfs_scrub *sc, + int whichfork, + xchk_da_btree_rec_fn scrub_fn, + void *private) +{ + struct xchk_da_btree *ds; + struct xfs_mount *mp = sc->mp; + struct xfs_da_state_blk *blks; + struct xfs_da_node_entry *key; + xfs_dablk_t blkno; + int level; + int error; + + /* Skip short format data structures; no btree to scan. */ + if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork))) + return 0; + + /* Set up initial da state. */ + ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL); + if (!ds) + return -ENOMEM; + ds->dargs.dp = sc->ip; + ds->dargs.whichfork = whichfork; + ds->dargs.trans = sc->tp; + ds->dargs.op_flags = XFS_DA_OP_OKNOENT; + ds->state = xfs_da_state_alloc(&ds->dargs); + ds->sc = sc; + ds->private = private; + if (whichfork == XFS_ATTR_FORK) { + ds->dargs.geo = mp->m_attr_geo; + ds->lowest = 0; + ds->highest = 0; + } else { + ds->dargs.geo = mp->m_dir_geo; + ds->lowest = ds->dargs.geo->leafblk; + ds->highest = ds->dargs.geo->freeblk; + } + blkno = ds->lowest; + level = 0; + + /* Find the root of the da tree, if present. */ + blks = ds->state->path.blk; + error = xchk_da_btree_block(ds, level, blkno); + if (error) + goto out_state; + /* + * We didn't find a block at ds->lowest, which means that there's + * no LEAF1/LEAFN tree (at least not where it's supposed to be), + * so jump out now. + */ + if (blks[level].bp == NULL) + goto out_state; + + blks[level].index = 0; + while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) { + /* Handle leaf block. */ + if (blks[level].magic != XFS_DA_NODE_MAGIC) { + /* End of leaf, pop back towards the root. */ + if (blks[level].index >= ds->maxrecs[level]) { + if (level > 0) + blks[level - 1].index++; + ds->tree_level++; + level--; + continue; + } + + /* Dispatch record scrubbing. */ + error = scrub_fn(ds, level); + if (error) + break; + if (xchk_should_terminate(sc, &error) || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + break; + + blks[level].index++; + continue; + } + + + /* End of node, pop back towards the root. */ + if (blks[level].index >= ds->maxrecs[level]) { + if (level > 0) + blks[level - 1].index++; + ds->tree_level++; + level--; + continue; + } + + /* Hashes in order for scrub? */ + key = xchk_da_btree_node_entry(ds, level); + error = xchk_da_btree_hash(ds, level, &key->hashval); + if (error) + goto out; + + /* Drill another level deeper. */ + blkno = be32_to_cpu(key->before); + level++; + if (level >= XFS_DA_NODE_MAXDEPTH) { + /* Too deep! */ + xchk_da_set_corrupt(ds, level - 1); + break; + } + ds->tree_level--; + error = xchk_da_btree_block(ds, level, blkno); + if (error) + goto out; + if (blks[level].bp == NULL) + goto out; + + blks[level].index = 0; + } + +out: + /* Release all the buffers we're tracking. */ + for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) { + if (blks[level].bp == NULL) + continue; + xfs_trans_brelse(sc->tp, blks[level].bp); + blks[level].bp = NULL; + } + +out_state: + xfs_da_state_free(ds->state); + kmem_free(ds); + return error; +} |