summaryrefslogtreecommitdiffstats
path: root/fs/xfs/scrub/btree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub/btree.c')
-rw-r--r--fs/xfs/scrub/btree.c810
1 files changed, 810 insertions, 0 deletions
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
new file mode 100644
index 0000000000..1935b9ce18
--- /dev/null
+++ b/fs/xfs/scrub/btree.c
@@ -0,0 +1,810 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+
+/* btree scrubbing */
+
+/*
+ * Check for btree operation errors. See the section about handling
+ * operational errors in common.c.
+ */
+static bool
+__xchk_btree_process_error(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ int *error,
+ __u32 errflag,
+ void *ret_ip)
+{
+ if (*error == 0)
+ return true;
+
+ switch (*error) {
+ case -EDEADLOCK:
+ case -ECHRNG:
+ /* Used to restart an op with deadlock avoidance. */
+ trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
+ break;
+ case -EFSBADCRC:
+ case -EFSCORRUPTED:
+ /* Note the badness but don't abort. */
+ sc->sm->sm_flags |= errflag;
+ *error = 0;
+ fallthrough;
+ default:
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ trace_xchk_ifork_btree_op_error(sc, cur, level,
+ *error, ret_ip);
+ else
+ trace_xchk_btree_op_error(sc, cur, level,
+ *error, ret_ip);
+ break;
+ }
+ return false;
+}
+
+bool
+xchk_btree_process_error(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ int *error)
+{
+ return __xchk_btree_process_error(sc, cur, level, error,
+ XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xchk_btree_xref_process_error(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ int *error)
+{
+ return __xchk_btree_process_error(sc, cur, level, error,
+ XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
+/* Record btree block corruption. */
+static void
+__xchk_btree_set_corrupt(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ __u32 errflag,
+ void *ret_ip)
+{
+ sc->sm->sm_flags |= errflag;
+
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ trace_xchk_ifork_btree_error(sc, cur, level,
+ ret_ip);
+ else
+ trace_xchk_btree_error(sc, cur, level,
+ ret_ip);
+}
+
+void
+xchk_btree_set_corrupt(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
+ __return_address);
+}
+
+void
+xchk_btree_xref_set_corrupt(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
+ __return_address);
+}
+
+void
+xchk_btree_set_preen(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
+ __return_address);
+}
+
+/*
+ * Make sure this record is in order and doesn't stray outside of the parent
+ * keys.
+ */
+STATIC void
+xchk_btree_rec(
+ struct xchk_btree *bs)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ union xfs_btree_rec *rec;
+ union xfs_btree_key key;
+ union xfs_btree_key hkey;
+ union xfs_btree_key *keyp;
+ struct xfs_btree_block *block;
+ struct xfs_btree_block *keyblock;
+ struct xfs_buf *bp;
+
+ block = xfs_btree_get_block(cur, 0, &bp);
+ rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
+
+ trace_xchk_btree_rec(bs->sc, cur, 0);
+
+ /* Are all records across all record blocks in order? */
+ if (bs->lastrec_valid &&
+ !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
+ xchk_btree_set_corrupt(bs->sc, cur, 0);
+ memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
+ bs->lastrec_valid = true;
+
+ if (cur->bc_nlevels == 1)
+ return;
+
+ /* Is low_key(rec) at least as large as the parent low key? */
+ cur->bc_ops->init_key_from_rec(&key, rec);
+ keyblock = xfs_btree_get_block(cur, 1, &bp);
+ keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
+ if (xfs_btree_keycmp_lt(cur, &key, keyp))
+ xchk_btree_set_corrupt(bs->sc, cur, 1);
+
+ if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ return;
+
+ /* Is high_key(rec) no larger than the parent high key? */
+ cur->bc_ops->init_high_key_from_rec(&hkey, rec);
+ keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
+ if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
+ xchk_btree_set_corrupt(bs->sc, cur, 1);
+}
+
+/*
+ * Make sure this key is in order and doesn't stray outside of the parent
+ * keys.
+ */
+STATIC void
+xchk_btree_key(
+ struct xchk_btree *bs,
+ int level)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ union xfs_btree_key *key;
+ union xfs_btree_key *keyp;
+ struct xfs_btree_block *block;
+ struct xfs_btree_block *keyblock;
+ struct xfs_buf *bp;
+
+ block = xfs_btree_get_block(cur, level, &bp);
+ key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
+
+ trace_xchk_btree_key(bs->sc, cur, level);
+
+ /* Are all low keys across all node blocks in order? */
+ if (bs->lastkey[level - 1].valid &&
+ !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
+ xchk_btree_set_corrupt(bs->sc, cur, level);
+ memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
+ bs->lastkey[level - 1].valid = true;
+
+ if (level + 1 >= cur->bc_nlevels)
+ return;
+
+ /* Is this block's low key at least as large as the parent low key? */
+ keyblock = xfs_btree_get_block(cur, level + 1, &bp);
+ keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
+ if (xfs_btree_keycmp_lt(cur, key, keyp))
+ xchk_btree_set_corrupt(bs->sc, cur, level);
+
+ if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ return;
+
+ /* Is this block's high key no larger than the parent high key? */
+ key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
+ keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
+ keyblock);
+ if (xfs_btree_keycmp_lt(cur, keyp, key))
+ xchk_btree_set_corrupt(bs->sc, cur, level);
+}
+
+/*
+ * Check a btree pointer. Returns true if it's ok to use this pointer.
+ * Callers do not need to set the corrupt flag.
+ */
+static bool
+xchk_btree_ptr_ok(
+ struct xchk_btree *bs,
+ int level,
+ union xfs_btree_ptr *ptr)
+{
+ bool res;
+
+ /* A btree rooted in an inode has no block pointer to the root. */
+ if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == bs->cur->bc_nlevels)
+ return true;
+
+ /* Otherwise, check the pointers. */
+ if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
+ else
+ res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
+ if (!res)
+ xchk_btree_set_corrupt(bs->sc, bs->cur, level);
+
+ return res;
+}
+
+/* Check that a btree block's sibling matches what we expect it. */
+STATIC int
+xchk_btree_block_check_sibling(
+ struct xchk_btree *bs,
+ int level,
+ int direction,
+ union xfs_btree_ptr *sibling)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ struct xfs_btree_block *pblock;
+ struct xfs_buf *pbp;
+ struct xfs_btree_cur *ncur = NULL;
+ union xfs_btree_ptr *pp;
+ int success;
+ int error;
+
+ error = xfs_btree_dup_cursor(cur, &ncur);
+ if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
+ !ncur)
+ return error;
+
+ /*
+ * If the pointer is null, we shouldn't be able to move the upper
+ * level pointer anywhere.
+ */
+ if (xfs_btree_ptr_is_null(cur, sibling)) {
+ if (direction > 0)
+ error = xfs_btree_increment(ncur, level + 1, &success);
+ else
+ error = xfs_btree_decrement(ncur, level + 1, &success);
+ if (error == 0 && success)
+ xchk_btree_set_corrupt(bs->sc, cur, level);
+ error = 0;
+ goto out;
+ }
+
+ /* Increment upper level pointer. */
+ if (direction > 0)
+ error = xfs_btree_increment(ncur, level + 1, &success);
+ else
+ error = xfs_btree_decrement(ncur, level + 1, &success);
+ if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
+ goto out;
+ if (!success) {
+ xchk_btree_set_corrupt(bs->sc, cur, level + 1);
+ goto out;
+ }
+
+ /* Compare upper level pointer to sibling pointer. */
+ pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
+ pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
+ if (!xchk_btree_ptr_ok(bs, level + 1, pp))
+ goto out;
+ if (pbp)
+ xchk_buffer_recheck(bs->sc, pbp);
+
+ if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
+ xchk_btree_set_corrupt(bs->sc, cur, level);
+out:
+ xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/* Check the siblings of a btree block. */
+STATIC int
+xchk_btree_block_check_siblings(
+ struct xchk_btree *bs,
+ struct xfs_btree_block *block)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ union xfs_btree_ptr leftsib;
+ union xfs_btree_ptr rightsib;
+ int level;
+ int error = 0;
+
+ xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
+ xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
+ level = xfs_btree_get_level(block);
+
+ /* Root block should never have siblings. */
+ if (level == cur->bc_nlevels - 1) {
+ if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
+ !xfs_btree_ptr_is_null(cur, &rightsib))
+ xchk_btree_set_corrupt(bs->sc, cur, level);
+ goto out;
+ }
+
+ /*
+ * Does the left & right sibling pointers match the adjacent
+ * parent level pointers?
+ * (These function absorbs error codes for us.)
+ */
+ error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
+ if (error)
+ return error;
+ error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
+ if (error)
+ return error;
+out:
+ return error;
+}
+
+struct check_owner {
+ struct list_head list;
+ xfs_daddr_t daddr;
+ int level;
+};
+
+/*
+ * Make sure this btree block isn't in the free list and that there's
+ * an rmap record for it.
+ */
+STATIC int
+xchk_btree_check_block_owner(
+ struct xchk_btree *bs,
+ int level,
+ xfs_daddr_t daddr)
+{
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_btnum_t btnum;
+ bool init_sa;
+ int error = 0;
+
+ if (!bs->cur)
+ return 0;
+
+ btnum = bs->cur->bc_btnum;
+ agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
+ agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
+
+ init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
+ if (init_sa) {
+ error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
+ if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
+ level, &error))
+ goto out_free;
+ }
+
+ xchk_xref_is_used_space(bs->sc, agbno, 1);
+ /*
+ * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
+ * have to nullify it (to shut down further block owner checks) if
+ * self-xref encounters problems.
+ */
+ if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
+ bs->cur = NULL;
+
+ xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
+ if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
+ bs->cur = NULL;
+
+out_free:
+ if (init_sa)
+ xchk_ag_free(bs->sc, &bs->sc->sa);
+
+ return error;
+}
+
+/* Check the owner of a btree block. */
+STATIC int
+xchk_btree_check_owner(
+ struct xchk_btree *bs,
+ int level,
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+
+ /*
+ * In theory, xfs_btree_get_block should only give us a null buffer
+ * pointer for the root of a root-in-inode btree type, but we need
+ * to check defensively here in case the cursor state is also screwed
+ * up.
+ */
+ if (bp == NULL) {
+ if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
+ xchk_btree_set_corrupt(bs->sc, bs->cur, level);
+ return 0;
+ }
+
+ /*
+ * We want to cross-reference each btree block with the bnobt
+ * and the rmapbt. We cannot cross-reference the bnobt or
+ * rmapbt while scanning the bnobt or rmapbt, respectively,
+ * because we cannot alter the cursor and we'd prefer not to
+ * duplicate cursors. Therefore, save the buffer daddr for
+ * later scanning.
+ */
+ if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
+ struct check_owner *co;
+
+ co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
+ if (!co)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&co->list);
+ co->level = level;
+ co->daddr = xfs_buf_daddr(bp);
+ list_add_tail(&co->list, &bs->to_check);
+ return 0;
+ }
+
+ return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
+}
+
+/* Decide if we want to check minrecs of a btree block in the inode root. */
+static inline bool
+xchk_btree_check_iroot_minrecs(
+ struct xchk_btree *bs)
+{
+ /*
+ * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
+ * would miscalculate the space required for the data fork bmbt root
+ * when adding an attr fork, and promote the iroot contents to an
+ * external block unnecessarily. This went unnoticed for many years
+ * until scrub found filesystems in this state. Inode rooted btrees are
+ * not supposed to have immediate child blocks that are small enough
+ * that the contents could fit in the inode root, but we can't fail
+ * existing filesystems, so instead we disable the check for data fork
+ * bmap btrees when there's an attr fork.
+ */
+ if (bs->cur->bc_btnum == XFS_BTNUM_BMAP &&
+ bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
+ xfs_inode_has_attr_fork(bs->sc->ip))
+ return false;
+
+ return true;
+}
+
+/*
+ * Check that this btree block has at least minrecs records or is one of the
+ * special blocks that don't require that.
+ */
+STATIC void
+xchk_btree_check_minrecs(
+ struct xchk_btree *bs,
+ int level,
+ struct xfs_btree_block *block)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ unsigned int root_level = cur->bc_nlevels - 1;
+ unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
+
+ /* More records than minrecs means the block is ok. */
+ if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
+ return;
+
+ /*
+ * For btrees rooted in the inode, it's possible that the root block
+ * contents spilled into a regular ondisk block because there wasn't
+ * enough space in the inode root. The number of records in that
+ * child block might be less than the standard minrecs, but that's ok
+ * provided that there's only one direct child of the root.
+ */
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == cur->bc_nlevels - 2) {
+ struct xfs_btree_block *root_block;
+ struct xfs_buf *root_bp;
+ int root_maxrecs;
+
+ root_block = xfs_btree_get_block(cur, root_level, &root_bp);
+ root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
+ if (xchk_btree_check_iroot_minrecs(bs) &&
+ (be16_to_cpu(root_block->bb_numrecs) != 1 ||
+ numrecs <= root_maxrecs))
+ xchk_btree_set_corrupt(bs->sc, cur, level);
+ return;
+ }
+
+ /*
+ * Otherwise, only the root level is allowed to have fewer than minrecs
+ * records or keyptrs.
+ */
+ if (level < root_level)
+ xchk_btree_set_corrupt(bs->sc, cur, level);
+}
+
+/*
+ * If this btree block has a parent, make sure that the parent's keys capture
+ * the keyspace contained in this block.
+ */
+STATIC void
+xchk_btree_block_check_keys(
+ struct xchk_btree *bs,
+ int level,
+ struct xfs_btree_block *block)
+{
+ union xfs_btree_key block_key;
+ union xfs_btree_key *block_high_key;
+ union xfs_btree_key *parent_low_key, *parent_high_key;
+ struct xfs_btree_cur *cur = bs->cur;
+ struct xfs_btree_block *parent_block;
+ struct xfs_buf *bp;
+
+ if (level == cur->bc_nlevels - 1)
+ return;
+
+ xfs_btree_get_keys(cur, block, &block_key);
+
+ /* Make sure the low key of this block matches the parent. */
+ parent_block = xfs_btree_get_block(cur, level + 1, &bp);
+ parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
+ parent_block);
+ if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
+ xchk_btree_set_corrupt(bs->sc, bs->cur, level);
+ return;
+ }
+
+ if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ return;
+
+ /* Make sure the high key of this block matches the parent. */
+ parent_high_key = xfs_btree_high_key_addr(cur,
+ cur->bc_levels[level + 1].ptr, parent_block);
+ block_high_key = xfs_btree_high_key_from_key(cur, &block_key);
+ if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
+ xchk_btree_set_corrupt(bs->sc, bs->cur, level);
+}
+
+/*
+ * Grab and scrub a btree block given a btree pointer. Returns block
+ * and buffer pointers (if applicable) if they're ok to use.
+ */
+STATIC int
+xchk_btree_get_block(
+ struct xchk_btree *bs,
+ int level,
+ union xfs_btree_ptr *pp,
+ struct xfs_btree_block **pblock,
+ struct xfs_buf **pbp)
+{
+ xfs_failaddr_t failed_at;
+ int error;
+
+ *pblock = NULL;
+ *pbp = NULL;
+
+ error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
+ if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
+ !*pblock)
+ return error;
+
+ xfs_btree_get_block(bs->cur, level, pbp);
+ if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
+ level, *pbp);
+ else
+ failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
+ level, *pbp);
+ if (failed_at) {
+ xchk_btree_set_corrupt(bs->sc, bs->cur, level);
+ return 0;
+ }
+ if (*pbp)
+ xchk_buffer_recheck(bs->sc, *pbp);
+
+ xchk_btree_check_minrecs(bs, level, *pblock);
+
+ /*
+ * Check the block's owner; this function absorbs error codes
+ * for us.
+ */
+ error = xchk_btree_check_owner(bs, level, *pbp);
+ if (error)
+ return error;
+
+ /*
+ * Check the block's siblings; this function absorbs error codes
+ * for us.
+ */
+ error = xchk_btree_block_check_siblings(bs, *pblock);
+ if (error)
+ return error;
+
+ xchk_btree_block_check_keys(bs, level, *pblock);
+ return 0;
+}
+
+/*
+ * Check that the low and high keys of this block match the keys stored
+ * in the parent block.
+ */
+STATIC void
+xchk_btree_block_keys(
+ struct xchk_btree *bs,
+ int level,
+ struct xfs_btree_block *block)
+{
+ union xfs_btree_key block_keys;
+ struct xfs_btree_cur *cur = bs->cur;
+ union xfs_btree_key *high_bk;
+ union xfs_btree_key *parent_keys;
+ union xfs_btree_key *high_pk;
+ struct xfs_btree_block *parent_block;
+ struct xfs_buf *bp;
+
+ if (level >= cur->bc_nlevels - 1)
+ return;
+
+ /* Calculate the keys for this block. */
+ xfs_btree_get_keys(cur, block, &block_keys);
+
+ /* Obtain the parent's copy of the keys for this block. */
+ parent_block = xfs_btree_get_block(cur, level + 1, &bp);
+ parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
+ parent_block);
+
+ if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
+ xchk_btree_set_corrupt(bs->sc, cur, 1);
+
+ if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ return;
+
+ /* Get high keys */
+ high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
+ high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
+ parent_block);
+
+ if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
+ xchk_btree_set_corrupt(bs->sc, cur, 1);
+}
+
+/*
+ * Visit all nodes and leaves of a btree. Check that all pointers and
+ * records are in order, that the keys reflect the records, and use a callback
+ * so that the caller can verify individual records.
+ */
+int
+xchk_btree(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ xchk_btree_rec_fn scrub_fn,
+ const struct xfs_owner_info *oinfo,
+ void *private)
+{
+ union xfs_btree_ptr ptr;
+ struct xchk_btree *bs;
+ union xfs_btree_ptr *pp;
+ union xfs_btree_rec *recp;
+ struct xfs_btree_block *block;
+ struct xfs_buf *bp;
+ struct check_owner *co;
+ struct check_owner *n;
+ size_t cur_sz;
+ int level;
+ int error = 0;
+
+ /*
+ * Allocate the btree scrub context from the heap, because this
+ * structure can get rather large. Don't let a caller feed us a
+ * totally absurd size.
+ */
+ cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
+ if (cur_sz > PAGE_SIZE) {
+ xchk_btree_set_corrupt(sc, cur, 0);
+ return 0;
+ }
+ bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
+ if (!bs)
+ return -ENOMEM;
+ bs->cur = cur;
+ bs->scrub_rec = scrub_fn;
+ bs->oinfo = oinfo;
+ bs->private = private;
+ bs->sc = sc;
+
+ /* Initialize scrub state */
+ INIT_LIST_HEAD(&bs->to_check);
+
+ /*
+ * Load the root of the btree. The helper function absorbs
+ * error codes for us.
+ */
+ level = cur->bc_nlevels - 1;
+ cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+ if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
+ goto out;
+ error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
+ if (error || !block)
+ goto out;
+
+ cur->bc_levels[level].ptr = 1;
+
+ while (level < cur->bc_nlevels) {
+ block = xfs_btree_get_block(cur, level, &bp);
+
+ if (level == 0) {
+ /* End of leaf, pop back towards the root. */
+ if (cur->bc_levels[level].ptr >
+ be16_to_cpu(block->bb_numrecs)) {
+ xchk_btree_block_keys(bs, level, block);
+ if (level < cur->bc_nlevels - 1)
+ cur->bc_levels[level + 1].ptr++;
+ level++;
+ continue;
+ }
+
+ /* Records in order for scrub? */
+ xchk_btree_rec(bs);
+
+ /* Call out to the record checker. */
+ recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
+ block);
+ error = bs->scrub_rec(bs, recp);
+ if (error)
+ break;
+ if (xchk_should_terminate(sc, &error) ||
+ (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ break;
+
+ cur->bc_levels[level].ptr++;
+ continue;
+ }
+
+ /* End of node, pop back towards the root. */
+ if (cur->bc_levels[level].ptr >
+ be16_to_cpu(block->bb_numrecs)) {
+ xchk_btree_block_keys(bs, level, block);
+ if (level < cur->bc_nlevels - 1)
+ cur->bc_levels[level + 1].ptr++;
+ level++;
+ continue;
+ }
+
+ /* Keys in order for scrub? */
+ xchk_btree_key(bs, level);
+
+ /* Drill another level deeper. */
+ pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
+ if (!xchk_btree_ptr_ok(bs, level, pp)) {
+ cur->bc_levels[level].ptr++;
+ continue;
+ }
+ level--;
+ error = xchk_btree_get_block(bs, level, pp, &block, &bp);
+ if (error || !block)
+ goto out;
+
+ cur->bc_levels[level].ptr = 1;
+ }
+
+out:
+ /* Process deferred owner checks on btree blocks. */
+ list_for_each_entry_safe(co, n, &bs->to_check, list) {
+ if (!error && bs->cur)
+ error = xchk_btree_check_block_owner(bs, co->level,
+ co->daddr);
+ list_del(&co->list);
+ kfree(co);
+ }
+ kfree(bs);
+
+ return error;
+}