summaryrefslogtreecommitdiffstats
path: root/fs/xfs/scrub/dabtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub/dabtree.c')
-rw-r--r--fs/xfs/scrub/dabtree.c597
1 files changed, 597 insertions, 0 deletions
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
new file mode 100644
index 0000000000..82b150d3b8
--- /dev/null
+++ b/fs/xfs/scrub/dabtree.c
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_attr_leaf.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/dabtree.h"
+
+/* Directory/Attribute Btree */
+
+/*
+ * Check for da btree operation errors. See the section about handling
+ * operational errors in common.c.
+ */
+bool
+xchk_da_process_error(
+ struct xchk_da_btree *ds,
+ int level,
+ int *error)
+{
+ struct xfs_scrub *sc = ds->sc;
+
+ if (*error == 0)
+ return true;
+
+ switch (*error) {
+ case -EDEADLOCK:
+ case -ECHRNG:
+ /* Used to restart an op with deadlock avoidance. */
+ trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
+ break;
+ case -EFSBADCRC:
+ case -EFSCORRUPTED:
+ /* Note the badness but don't abort. */
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ *error = 0;
+ fallthrough;
+ default:
+ trace_xchk_file_op_error(sc, ds->dargs.whichfork,
+ xfs_dir2_da_to_db(ds->dargs.geo,
+ ds->state->path.blk[level].blkno),
+ *error, __return_address);
+ break;
+ }
+ return false;
+}
+
+/*
+ * Check for da btree corruption. See the section about handling
+ * operational errors in common.c.
+ */
+void
+xchk_da_set_corrupt(
+ struct xchk_da_btree *ds,
+ int level)
+{
+ struct xfs_scrub *sc = ds->sc;
+
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+
+ trace_xchk_fblock_error(sc, ds->dargs.whichfork,
+ xfs_dir2_da_to_db(ds->dargs.geo,
+ ds->state->path.blk[level].blkno),
+ __return_address);
+}
+
+static struct xfs_da_node_entry *
+xchk_da_btree_node_entry(
+ struct xchk_da_btree *ds,
+ int level)
+{
+ struct xfs_da_state_blk *blk = &ds->state->path.blk[level];
+ struct xfs_da3_icnode_hdr hdr;
+
+ ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
+
+ xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
+ return hdr.btree + blk->index;
+}
+
+/* Scrub a da btree hash (key). */
+int
+xchk_da_btree_hash(
+ struct xchk_da_btree *ds,
+ int level,
+ __be32 *hashp)
+{
+ struct xfs_da_node_entry *entry;
+ xfs_dahash_t hash;
+ xfs_dahash_t parent_hash;
+
+ /* Is this hash in order? */
+ hash = be32_to_cpu(*hashp);
+ if (hash < ds->hashes[level])
+ xchk_da_set_corrupt(ds, level);
+ ds->hashes[level] = hash;
+
+ if (level == 0)
+ return 0;
+
+ /* Is this hash no larger than the parent hash? */
+ entry = xchk_da_btree_node_entry(ds, level - 1);
+ parent_hash = be32_to_cpu(entry->hashval);
+ if (parent_hash < hash)
+ xchk_da_set_corrupt(ds, level);
+
+ return 0;
+}
+
+/*
+ * Check a da btree pointer. Returns true if it's ok to use this
+ * pointer.
+ */
+STATIC bool
+xchk_da_btree_ptr_ok(
+ struct xchk_da_btree *ds,
+ int level,
+ xfs_dablk_t blkno)
+{
+ if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
+ xchk_da_set_corrupt(ds, level);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * The da btree scrubber can handle leaf1 blocks as a degenerate
+ * form of leafn blocks. Since the regular da code doesn't handle
+ * leaf1, we must multiplex the verifiers.
+ */
+static void
+xchk_da_btree_read_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
+
+ switch (be16_to_cpu(info->magic)) {
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+ bp->b_ops->verify_read(bp);
+ return;
+ default:
+ /*
+ * xfs_da3_node_buf_ops already know how to handle
+ * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
+ */
+ bp->b_ops = &xfs_da3_node_buf_ops;
+ bp->b_ops->verify_read(bp);
+ return;
+ }
+}
+static void
+xchk_da_btree_write_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
+
+ switch (be16_to_cpu(info->magic)) {
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+ bp->b_ops->verify_write(bp);
+ return;
+ default:
+ /*
+ * xfs_da3_node_buf_ops already know how to handle
+ * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
+ */
+ bp->b_ops = &xfs_da3_node_buf_ops;
+ bp->b_ops->verify_write(bp);
+ return;
+ }
+}
+static void *
+xchk_da_btree_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
+
+ switch (be16_to_cpu(info->magic)) {
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+ return bp->b_ops->verify_struct(bp);
+ default:
+ bp->b_ops = &xfs_da3_node_buf_ops;
+ return bp->b_ops->verify_struct(bp);
+ }
+}
+
+static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
+ .name = "xchk_da_btree",
+ .verify_read = xchk_da_btree_read_verify,
+ .verify_write = xchk_da_btree_write_verify,
+ .verify_struct = xchk_da_btree_verify,
+};
+
+/* Check a block's sibling. */
+STATIC int
+xchk_da_btree_block_check_sibling(
+ struct xchk_da_btree *ds,
+ int level,
+ int direction,
+ xfs_dablk_t sibling)
+{
+ struct xfs_da_state_path *path = &ds->state->path;
+ struct xfs_da_state_path *altpath = &ds->state->altpath;
+ int retval;
+ int plevel;
+ int error;
+
+ memcpy(altpath, path, sizeof(ds->state->altpath));
+
+ /*
+ * If the pointer is null, we shouldn't be able to move the upper
+ * level pointer anywhere.
+ */
+ if (sibling == 0) {
+ error = xfs_da3_path_shift(ds->state, altpath, direction,
+ false, &retval);
+ if (error == 0 && retval == 0)
+ xchk_da_set_corrupt(ds, level);
+ error = 0;
+ goto out;
+ }
+
+ /* Move the alternate cursor one block in the direction given. */
+ error = xfs_da3_path_shift(ds->state, altpath, direction, false,
+ &retval);
+ if (!xchk_da_process_error(ds, level, &error))
+ goto out;
+ if (retval) {
+ xchk_da_set_corrupt(ds, level);
+ goto out;
+ }
+ if (altpath->blk[level].bp)
+ xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
+
+ /* Compare upper level pointer to sibling pointer. */
+ if (altpath->blk[level].blkno != sibling)
+ xchk_da_set_corrupt(ds, level);
+
+out:
+ /* Free all buffers in the altpath that aren't referenced from path. */
+ for (plevel = 0; plevel < altpath->active; plevel++) {
+ if (altpath->blk[plevel].bp == NULL ||
+ (plevel < path->active &&
+ altpath->blk[plevel].bp == path->blk[plevel].bp))
+ continue;
+
+ xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
+ altpath->blk[plevel].bp = NULL;
+ }
+
+ return error;
+}
+
+/* Check a block's sibling pointers. */
+STATIC int
+xchk_da_btree_block_check_siblings(
+ struct xchk_da_btree *ds,
+ int level,
+ struct xfs_da_blkinfo *hdr)
+{
+ xfs_dablk_t forw;
+ xfs_dablk_t back;
+ int error = 0;
+
+ forw = be32_to_cpu(hdr->forw);
+ back = be32_to_cpu(hdr->back);
+
+ /* Top level blocks should not have sibling pointers. */
+ if (level == 0) {
+ if (forw != 0 || back != 0)
+ xchk_da_set_corrupt(ds, level);
+ return 0;
+ }
+
+ /*
+ * Check back (left) and forw (right) pointers. These functions
+ * absorb error codes for us.
+ */
+ error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
+ if (error)
+ goto out;
+ error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
+
+out:
+ memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
+ return error;
+}
+
+/* Load a dir/attribute block from a btree. */
+STATIC int
+xchk_da_btree_block(
+ struct xchk_da_btree *ds,
+ int level,
+ xfs_dablk_t blkno)
+{
+ struct xfs_da_state_blk *blk;
+ struct xfs_da_intnode *node;
+ struct xfs_da_node_entry *btree;
+ struct xfs_da3_blkinfo *hdr3;
+ struct xfs_da_args *dargs = &ds->dargs;
+ struct xfs_inode *ip = ds->dargs.dp;
+ xfs_ino_t owner;
+ int *pmaxrecs;
+ struct xfs_da3_icnode_hdr nodehdr;
+ int error = 0;
+
+ blk = &ds->state->path.blk[level];
+ ds->state->path.active = level + 1;
+
+ /* Release old block. */
+ if (blk->bp) {
+ xfs_trans_brelse(dargs->trans, blk->bp);
+ blk->bp = NULL;
+ }
+
+ /* Check the pointer. */
+ blk->blkno = blkno;
+ if (!xchk_da_btree_ptr_ok(ds, level, blkno))
+ goto out_nobuf;
+
+ /* Read the buffer. */
+ error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
+ XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
+ &xchk_da_btree_buf_ops);
+ if (!xchk_da_process_error(ds, level, &error))
+ goto out_nobuf;
+ if (blk->bp)
+ xchk_buffer_recheck(ds->sc, blk->bp);
+
+ /*
+ * We didn't find a dir btree root block, which means that
+ * there's no LEAF1/LEAFN tree (at least not where it's supposed
+ * to be), so jump out now.
+ */
+ if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
+ blk->bp == NULL)
+ goto out_nobuf;
+
+ /* It's /not/ ok for attr trees not to have a da btree. */
+ if (blk->bp == NULL) {
+ xchk_da_set_corrupt(ds, level);
+ goto out_nobuf;
+ }
+
+ hdr3 = blk->bp->b_addr;
+ blk->magic = be16_to_cpu(hdr3->hdr.magic);
+ pmaxrecs = &ds->maxrecs[level];
+
+ /* We only started zeroing the header on v5 filesystems. */
+ if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
+ xchk_da_set_corrupt(ds, level);
+
+ /* Check the owner. */
+ if (xfs_has_crc(ip->i_mount)) {
+ owner = be64_to_cpu(hdr3->owner);
+ if (owner != ip->i_ino)
+ xchk_da_set_corrupt(ds, level);
+ }
+
+ /* Check the siblings. */
+ error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
+ if (error)
+ goto out;
+
+ /* Interpret the buffer. */
+ switch (blk->magic) {
+ case XFS_ATTR_LEAF_MAGIC:
+ case XFS_ATTR3_LEAF_MAGIC:
+ xfs_trans_buf_set_type(dargs->trans, blk->bp,
+ XFS_BLFT_ATTR_LEAF_BUF);
+ blk->magic = XFS_ATTR_LEAF_MAGIC;
+ blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
+ if (ds->tree_level != 0)
+ xchk_da_set_corrupt(ds, level);
+ break;
+ case XFS_DIR2_LEAFN_MAGIC:
+ case XFS_DIR3_LEAFN_MAGIC:
+ xfs_trans_buf_set_type(dargs->trans, blk->bp,
+ XFS_BLFT_DIR_LEAFN_BUF);
+ blk->magic = XFS_DIR2_LEAFN_MAGIC;
+ blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
+ if (ds->tree_level != 0)
+ xchk_da_set_corrupt(ds, level);
+ break;
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ xfs_trans_buf_set_type(dargs->trans, blk->bp,
+ XFS_BLFT_DIR_LEAF1_BUF);
+ blk->magic = XFS_DIR2_LEAF1_MAGIC;
+ blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
+ if (ds->tree_level != 0)
+ xchk_da_set_corrupt(ds, level);
+ break;
+ case XFS_DA_NODE_MAGIC:
+ case XFS_DA3_NODE_MAGIC:
+ xfs_trans_buf_set_type(dargs->trans, blk->bp,
+ XFS_BLFT_DA_NODE_BUF);
+ blk->magic = XFS_DA_NODE_MAGIC;
+ node = blk->bp->b_addr;
+ xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
+ btree = nodehdr.btree;
+ *pmaxrecs = nodehdr.count;
+ blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
+ if (level == 0) {
+ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
+ xchk_da_set_corrupt(ds, level);
+ goto out_freebp;
+ }
+ ds->tree_level = nodehdr.level;
+ } else {
+ if (ds->tree_level != nodehdr.level) {
+ xchk_da_set_corrupt(ds, level);
+ goto out_freebp;
+ }
+ }
+
+ /* XXX: Check hdr3.pad32 once we know how to fix it. */
+ break;
+ default:
+ xchk_da_set_corrupt(ds, level);
+ goto out_freebp;
+ }
+
+ /*
+ * If we've been handed a block that is below the dabtree root, does
+ * its hashval match what the parent block expected to see?
+ */
+ if (level > 0) {
+ struct xfs_da_node_entry *key;
+
+ key = xchk_da_btree_node_entry(ds, level - 1);
+ if (be32_to_cpu(key->hashval) != blk->hashval) {
+ xchk_da_set_corrupt(ds, level);
+ goto out_freebp;
+ }
+ }
+
+out:
+ return error;
+out_freebp:
+ xfs_trans_brelse(dargs->trans, blk->bp);
+ blk->bp = NULL;
+out_nobuf:
+ blk->blkno = 0;
+ return error;
+}
+
+/* Visit all nodes and leaves of a da btree. */
+int
+xchk_da_btree(
+ struct xfs_scrub *sc,
+ int whichfork,
+ xchk_da_btree_rec_fn scrub_fn,
+ void *private)
+{
+ struct xchk_da_btree *ds;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_da_state_blk *blks;
+ struct xfs_da_node_entry *key;
+ xfs_dablk_t blkno;
+ int level;
+ int error;
+
+ /* Skip short format data structures; no btree to scan. */
+ if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork)))
+ return 0;
+
+ /* Set up initial da state. */
+ ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS);
+ if (!ds)
+ return -ENOMEM;
+ ds->dargs.dp = sc->ip;
+ ds->dargs.whichfork = whichfork;
+ ds->dargs.trans = sc->tp;
+ ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
+ ds->state = xfs_da_state_alloc(&ds->dargs);
+ ds->sc = sc;
+ ds->private = private;
+ if (whichfork == XFS_ATTR_FORK) {
+ ds->dargs.geo = mp->m_attr_geo;
+ ds->lowest = 0;
+ ds->highest = 0;
+ } else {
+ ds->dargs.geo = mp->m_dir_geo;
+ ds->lowest = ds->dargs.geo->leafblk;
+ ds->highest = ds->dargs.geo->freeblk;
+ }
+ blkno = ds->lowest;
+ level = 0;
+
+ /* Find the root of the da tree, if present. */
+ blks = ds->state->path.blk;
+ error = xchk_da_btree_block(ds, level, blkno);
+ if (error)
+ goto out_state;
+ /*
+ * We didn't find a block at ds->lowest, which means that there's
+ * no LEAF1/LEAFN tree (at least not where it's supposed to be),
+ * so jump out now.
+ */
+ if (blks[level].bp == NULL)
+ goto out_state;
+
+ blks[level].index = 0;
+ while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
+ /* Handle leaf block. */
+ if (blks[level].magic != XFS_DA_NODE_MAGIC) {
+ /* End of leaf, pop back towards the root. */
+ if (blks[level].index >= ds->maxrecs[level]) {
+ if (level > 0)
+ blks[level - 1].index++;
+ ds->tree_level++;
+ level--;
+ continue;
+ }
+
+ /* Dispatch record scrubbing. */
+ error = scrub_fn(ds, level);
+ if (error)
+ break;
+ if (xchk_should_terminate(sc, &error) ||
+ (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ break;
+
+ blks[level].index++;
+ continue;
+ }
+
+
+ /* End of node, pop back towards the root. */
+ if (blks[level].index >= ds->maxrecs[level]) {
+ if (level > 0)
+ blks[level - 1].index++;
+ ds->tree_level++;
+ level--;
+ continue;
+ }
+
+ /* Hashes in order for scrub? */
+ key = xchk_da_btree_node_entry(ds, level);
+ error = xchk_da_btree_hash(ds, level, &key->hashval);
+ if (error)
+ goto out;
+
+ /* Drill another level deeper. */
+ blkno = be32_to_cpu(key->before);
+ level++;
+ if (level >= XFS_DA_NODE_MAXDEPTH) {
+ /* Too deep! */
+ xchk_da_set_corrupt(ds, level - 1);
+ break;
+ }
+ ds->tree_level--;
+ error = xchk_da_btree_block(ds, level, blkno);
+ if (error)
+ goto out;
+ if (blks[level].bp == NULL)
+ goto out;
+
+ blks[level].index = 0;
+ }
+
+out:
+ /* Release all the buffers we're tracking. */
+ for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
+ if (blks[level].bp == NULL)
+ continue;
+ xfs_trans_brelse(sc->tp, blks[level].bp);
+ blks[level].bp = NULL;
+ }
+
+out_state:
+ xfs_da_state_free(ds->state);
+ kfree(ds);
+ return error;
+}