summaryrefslogtreecommitdiffstats
path: root/fs/xfs/scrub/inode_repair.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub/inode_repair.c')
-rw-r--r--fs/xfs/scrub/inode_repair.c1525
1 files changed, 1525 insertions, 0 deletions
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
new file mode 100644
index 000000000..0ca62d59f
--- /dev/null
+++ b/fs/xfs/scrub/inode_repair.c
@@ -0,0 +1,1525 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_inode_buf.h"
+#include "xfs_inode_fork.h"
+#include "xfs_ialloc.h"
+#include "xfs_da_format.h"
+#include "xfs_reflink.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_bmap_util.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_quota_defs.h"
+#include "xfs_quota.h"
+#include "xfs_ag.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_log_priv.h"
+#include "xfs_health.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+
+/*
+ * Inode Record Repair
+ * ===================
+ *
+ * Roughly speaking, inode problems can be classified based on whether or not
+ * they trip the dinode verifiers. If those trip, then we won't be able to
+ * xfs_iget ourselves the inode.
+ *
+ * Therefore, the xrep_dinode_* functions fix anything that will cause the
+ * inode buffer verifier or the dinode verifier. The xrep_inode_* functions
+ * fix things on live incore inodes. The inode repair functions make decisions
+ * with security and usability implications when reviving a file:
+ *
+ * - Files with zero di_mode or a garbage di_mode are converted to regular file
+ * that only root can read. This file may not actually contain user data,
+ * if the file was not previously a regular file. Setuid and setgid bits
+ * are cleared.
+ *
+ * - Zero-size directories can be truncated to look empty. It is necessary to
+ * run the bmapbtd and directory repair functions to fully rebuild the
+ * directory.
+ *
+ * - Zero-size symbolic link targets can be truncated to '?'. It is necessary
+ * to run the bmapbtd and symlink repair functions to salvage the symlink.
+ *
+ * - Invalid extent size hints will be removed.
+ *
+ * - Quotacheck will be scheduled if we repaired an inode that was so badly
+ * damaged that the ondisk inode had to be rebuilt.
+ *
+ * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
+ * Setuid and setgid bits are cleared.
+ *
+ * - Data and attr forks are reset to extents format with zero extents if the
+ * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta
+ * repair functions to recover the space mapping.
+ *
+ * - ACLs will not be recovered if the attr fork is zapped or the extended
+ * attribute structure itself requires salvaging.
+ *
+ * - If the attr fork is zapped, the user and group ids are reset to root and
+ * the setuid and setgid bits are removed.
+ */
+
+/*
+ * All the information we need to repair the ondisk inode if we can't iget the
+ * incore inode. We don't allocate this buffer unless we're going to perform
+ * a repair to the ondisk inode cluster buffer.
+ */
+struct xrep_inode {
+ /* Inode mapping that we saved from the initial lookup attempt. */
+ struct xfs_imap imap;
+
+ struct xfs_scrub *sc;
+
+ /* Blocks in use on the data device by data extents or bmbt blocks. */
+ xfs_rfsblock_t data_blocks;
+
+ /* Blocks in use on the rt device. */
+ xfs_rfsblock_t rt_blocks;
+
+ /* Blocks in use by the attr fork. */
+ xfs_rfsblock_t attr_blocks;
+
+ /* Number of data device extents for the data fork. */
+ xfs_extnum_t data_extents;
+
+ /*
+ * Number of realtime device extents for the data fork. If
+ * data_extents and rt_extents indicate that the data fork has extents
+ * on both devices, we'll just back away slowly.
+ */
+ xfs_extnum_t rt_extents;
+
+ /* Number of (data device) extents for the attr fork. */
+ xfs_aextnum_t attr_extents;
+
+ /* Sick state to set after zapping parts of the inode. */
+ unsigned int ino_sick_mask;
+
+ /* Must we remove all access from this file? */
+ bool zap_acls;
+};
+
+/*
+ * Setup function for inode repair. @imap contains the ondisk inode mapping
+ * information so that we can correct the ondisk inode cluster buffer if
+ * necessary to make iget work.
+ */
+int
+xrep_setup_inode(
+ struct xfs_scrub *sc,
+ const struct xfs_imap *imap)
+{
+ struct xrep_inode *ri;
+
+ sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
+ if (!sc->buf)
+ return -ENOMEM;
+
+ ri = sc->buf;
+ memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
+ ri->sc = sc;
+ return 0;
+}
+
+/*
+ * Make sure this ondisk inode can pass the inode buffer verifier. This is
+ * not the same as the dinode verifier.
+ */
+STATIC void
+xrep_dinode_buf_core(
+ struct xfs_scrub *sc,
+ struct xfs_buf *bp,
+ unsigned int ioffset)
+{
+ struct xfs_dinode *dip = xfs_buf_offset(bp, ioffset);
+ struct xfs_trans *tp = sc->tp;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agino_t agino;
+ bool crc_ok = false;
+ bool magic_ok = false;
+ bool unlinked_ok = false;
+
+ agino = be32_to_cpu(dip->di_next_unlinked);
+
+ if (xfs_verify_agino_or_null(bp->b_pag, agino))
+ unlinked_ok = true;
+
+ if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
+ xfs_dinode_good_version(mp, dip->di_version))
+ magic_ok = true;
+
+ if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
+ XFS_DINODE_CRC_OFF))
+ crc_ok = true;
+
+ if (magic_ok && unlinked_ok && crc_ok)
+ return;
+
+ if (!magic_ok) {
+ dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
+ dip->di_version = 3;
+ }
+ if (!unlinked_ok)
+ dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
+ xfs_dinode_calc_crc(mp, dip);
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
+ xfs_trans_log_buf(tp, bp, ioffset,
+ ioffset + sizeof(struct xfs_dinode) - 1);
+}
+
+/* Make sure this inode cluster buffer can pass the inode buffer verifier. */
+STATIC void
+xrep_dinode_buf(
+ struct xfs_scrub *sc,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = sc->mp;
+ int i;
+ int ni;
+
+ ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
+ for (i = 0; i < ni; i++)
+ xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
+}
+
+/* Reinitialize things that never change in an inode. */
+STATIC void
+xrep_dinode_header(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip)
+{
+ trace_xrep_dinode_header(sc, dip);
+
+ dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
+ if (!xfs_dinode_good_version(sc->mp, dip->di_version))
+ dip->di_version = 3;
+ dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
+ uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
+ dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
+}
+
+/* Turn di_mode into /something/ recognizable. */
+STATIC void
+xrep_dinode_mode(
+ struct xrep_inode *ri,
+ struct xfs_dinode *dip)
+{
+ struct xfs_scrub *sc = ri->sc;
+ uint16_t mode = be16_to_cpu(dip->di_mode);
+
+ trace_xrep_dinode_mode(sc, dip);
+
+ if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
+ return;
+
+ /* bad mode, so we set it to a file that only root can read */
+ mode = S_IFREG;
+ dip->di_mode = cpu_to_be16(mode);
+ dip->di_uid = 0;
+ dip->di_gid = 0;
+ ri->zap_acls = true;
+}
+
+/* Fix any conflicting flags that the verifiers complain about. */
+STATIC void
+xrep_dinode_flags(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip,
+ bool isrt)
+{
+ struct xfs_mount *mp = sc->mp;
+ uint64_t flags2 = be64_to_cpu(dip->di_flags2);
+ uint16_t flags = be16_to_cpu(dip->di_flags);
+ uint16_t mode = be16_to_cpu(dip->di_mode);
+
+ trace_xrep_dinode_flags(sc, dip);
+
+ if (isrt)
+ flags |= XFS_DIFLAG_REALTIME;
+ else
+ flags &= ~XFS_DIFLAG_REALTIME;
+
+ /*
+ * For regular files on a reflink filesystem, set the REFLINK flag to
+ * protect shared extents. A later stage will actually check those
+ * extents and clear the flag if possible.
+ */
+ if (xfs_has_reflink(mp) && S_ISREG(mode))
+ flags2 |= XFS_DIFLAG2_REFLINK;
+ else
+ flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
+ if (flags & XFS_DIFLAG_REALTIME)
+ flags2 &= ~XFS_DIFLAG2_REFLINK;
+ if (!xfs_has_bigtime(mp))
+ flags2 &= ~XFS_DIFLAG2_BIGTIME;
+ if (!xfs_has_large_extent_counts(mp))
+ flags2 &= ~XFS_DIFLAG2_NREXT64;
+ if (flags2 & XFS_DIFLAG2_NREXT64)
+ dip->di_nrext64_pad = 0;
+ else if (dip->di_version >= 3)
+ dip->di_v3_pad = 0;
+ dip->di_flags = cpu_to_be16(flags);
+ dip->di_flags2 = cpu_to_be64(flags2);
+}
+
+/*
+ * Blow out symlink; now it points nowhere. We don't have to worry about
+ * incore state because this inode is failing the verifiers.
+ */
+STATIC void
+xrep_dinode_zap_symlink(
+ struct xrep_inode *ri,
+ struct xfs_dinode *dip)
+{
+ struct xfs_scrub *sc = ri->sc;
+ char *p;
+
+ trace_xrep_dinode_zap_symlink(sc, dip);
+
+ dip->di_format = XFS_DINODE_FMT_LOCAL;
+ dip->di_size = cpu_to_be64(1);
+ p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ *p = '?';
+ ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
+}
+
+/*
+ * Blow out dir, make the parent point to the root. In the future repair will
+ * reconstruct this directory for us. Note that there's no in-core directory
+ * inode because the sf verifier tripped, so we don't have to worry about the
+ * dentry cache.
+ */
+STATIC void
+xrep_dinode_zap_dir(
+ struct xrep_inode *ri,
+ struct xfs_dinode *dip)
+{
+ struct xfs_scrub *sc = ri->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_dir2_sf_hdr *sfp;
+ int i8count;
+
+ trace_xrep_dinode_zap_dir(sc, dip);
+
+ dip->di_format = XFS_DINODE_FMT_LOCAL;
+ i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
+ sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ sfp->count = 0;
+ sfp->i8count = i8count;
+ xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
+ dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
+ ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
+}
+
+/* Make sure we don't have a garbage file size. */
+STATIC void
+xrep_dinode_size(
+ struct xrep_inode *ri,
+ struct xfs_dinode *dip)
+{
+ struct xfs_scrub *sc = ri->sc;
+ uint64_t size = be64_to_cpu(dip->di_size);
+ uint16_t mode = be16_to_cpu(dip->di_mode);
+
+ trace_xrep_dinode_size(sc, dip);
+
+ switch (mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFSOCK:
+ /* di_size can't be nonzero for special files */
+ dip->di_size = 0;
+ break;
+ case S_IFREG:
+ /* Regular files can't be larger than 2^63-1 bytes. */
+ dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
+ break;
+ case S_IFLNK:
+ /*
+ * Truncate ridiculously oversized symlinks. If the size is
+ * zero, reset it to point to the current directory. Both of
+ * these conditions trigger dinode verifier errors, so there
+ * is no in-core state to reset.
+ */
+ if (size > XFS_SYMLINK_MAXLEN)
+ dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
+ else if (size == 0)
+ xrep_dinode_zap_symlink(ri, dip);
+ break;
+ case S_IFDIR:
+ /*
+ * Directories can't have a size larger than 32G. If the size
+ * is zero, reset it to an empty directory. Both of these
+ * conditions trigger dinode verifier errors, so there is no
+ * in-core state to reset.
+ */
+ if (size > XFS_DIR2_SPACE_SIZE)
+ dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
+ else if (size == 0)
+ xrep_dinode_zap_dir(ri, dip);
+ break;
+ }
+}
+
+/* Fix extent size hints. */
+STATIC void
+xrep_dinode_extsize_hints(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip)
+{
+ struct xfs_mount *mp = sc->mp;
+ uint64_t flags2 = be64_to_cpu(dip->di_flags2);
+ uint16_t flags = be16_to_cpu(dip->di_flags);
+ uint16_t mode = be16_to_cpu(dip->di_mode);
+
+ xfs_failaddr_t fa;
+
+ trace_xrep_dinode_extsize_hints(sc, dip);
+
+ fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
+ mode, flags);
+ if (fa) {
+ dip->di_extsize = 0;
+ dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
+ XFS_DIFLAG_EXTSZINHERIT);
+ }
+
+ if (dip->di_version < 3)
+ return;
+
+ fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
+ mode, flags, flags2);
+ if (fa) {
+ dip->di_cowextsize = 0;
+ dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
+ }
+}
+
+/* Count extents and blocks for an inode given an rmap. */
+STATIC int
+xrep_dinode_walk_rmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_inode *ri = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(ri->sc, &error))
+ return error;
+
+ /* We only care about this inode. */
+ if (rec->rm_owner != ri->sc->sm->sm_ino)
+ return 0;
+
+ if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
+ ri->attr_blocks += rec->rm_blockcount;
+ if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
+ ri->attr_extents++;
+
+ return 0;
+ }
+
+ ri->data_blocks += rec->rm_blockcount;
+ if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
+ ri->data_extents++;
+
+ return 0;
+}
+
+/* Count extents and blocks for an inode from all AG rmap data. */
+STATIC int
+xrep_dinode_count_ag_rmaps(
+ struct xrep_inode *ri,
+ struct xfs_perag *pag)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agf;
+ int error;
+
+ error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
+ if (error)
+ return error;
+
+ cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
+ error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
+ xfs_btree_del_cursor(cur, error);
+ xfs_trans_brelse(ri->sc->tp, agf);
+ return error;
+}
+
+/* Count extents and blocks for a given inode from all rmap data. */
+STATIC int
+xrep_dinode_count_rmaps(
+ struct xrep_inode *ri)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+ int error;
+
+ if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
+ return -EOPNOTSUPP;
+
+ for_each_perag(ri->sc->mp, agno, pag) {
+ error = xrep_dinode_count_ag_rmaps(ri, pag);
+ if (error) {
+ xfs_perag_rele(pag);
+ return error;
+ }
+ }
+
+ /* Can't have extents on both the rt and the data device. */
+ if (ri->data_extents && ri->rt_extents)
+ return -EFSCORRUPTED;
+
+ trace_xrep_dinode_count_rmaps(ri->sc,
+ ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
+ ri->data_extents, ri->rt_extents, ri->attr_extents);
+ return 0;
+}
+
+/* Return true if this extents-format ifork looks like garbage. */
+STATIC bool
+xrep_dinode_bad_extents_fork(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip,
+ unsigned int dfork_size,
+ int whichfork)
+{
+ struct xfs_bmbt_irec new;
+ struct xfs_bmbt_rec *dp;
+ xfs_extnum_t nex;
+ bool isrt;
+ unsigned int i;
+
+ nex = xfs_dfork_nextents(dip, whichfork);
+ if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
+ return true;
+
+ dp = XFS_DFORK_PTR(dip, whichfork);
+
+ isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
+ for (i = 0; i < nex; i++, dp++) {
+ xfs_failaddr_t fa;
+
+ xfs_bmbt_disk_get_all(dp, &new);
+ fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
+ &new);
+ if (fa)
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if this btree-format ifork looks like garbage. */
+STATIC bool
+xrep_dinode_bad_bmbt_fork(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip,
+ unsigned int dfork_size,
+ int whichfork)
+{
+ struct xfs_bmdr_block *dfp;
+ xfs_extnum_t nex;
+ unsigned int i;
+ unsigned int dmxr;
+ unsigned int nrecs;
+ unsigned int level;
+
+ nex = xfs_dfork_nextents(dip, whichfork);
+ if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
+ return true;
+
+ if (dfork_size < sizeof(struct xfs_bmdr_block))
+ return true;
+
+ dfp = XFS_DFORK_PTR(dip, whichfork);
+ nrecs = be16_to_cpu(dfp->bb_numrecs);
+ level = be16_to_cpu(dfp->bb_level);
+
+ if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size)
+ return true;
+ if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
+ return true;
+
+ dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
+ for (i = 1; i <= nrecs; i++) {
+ struct xfs_bmbt_key *fkp;
+ xfs_bmbt_ptr_t *fpp;
+ xfs_fileoff_t fileoff;
+ xfs_fsblock_t fsbno;
+
+ fkp = XFS_BMDR_KEY_ADDR(dfp, i);
+ fileoff = be64_to_cpu(fkp->br_startoff);
+ if (!xfs_verify_fileoff(sc->mp, fileoff))
+ return true;
+
+ fpp = XFS_BMDR_PTR_ADDR(dfp, i, dmxr);
+ fsbno = be64_to_cpu(*fpp);
+ if (!xfs_verify_fsbno(sc->mp, fsbno))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Check the data fork for things that will fail the ifork verifiers or the
+ * ifork formatters.
+ */
+STATIC bool
+xrep_dinode_check_dfork(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip,
+ uint16_t mode)
+{
+ void *dfork_ptr;
+ int64_t data_size;
+ unsigned int fmt;
+ unsigned int dfork_size;
+
+ /*
+ * Verifier functions take signed int64_t, so check for bogus negative
+ * values first.
+ */
+ data_size = be64_to_cpu(dip->di_size);
+ if (data_size < 0)
+ return true;
+
+ fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
+ switch (mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFSOCK:
+ if (fmt != XFS_DINODE_FMT_DEV)
+ return true;
+ break;
+ case S_IFREG:
+ if (fmt == XFS_DINODE_FMT_LOCAL)
+ return true;
+ fallthrough;
+ case S_IFLNK:
+ case S_IFDIR:
+ switch (fmt) {
+ case XFS_DINODE_FMT_LOCAL:
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
+ break;
+ default:
+ return true;
+ }
+ break;
+ default:
+ return true;
+ }
+
+ dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
+ dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+
+ switch (fmt) {
+ case XFS_DINODE_FMT_DEV:
+ break;
+ case XFS_DINODE_FMT_LOCAL:
+ /* dir/symlink structure cannot be larger than the fork */
+ if (data_size > dfork_size)
+ return true;
+ /* directory structure must pass verification. */
+ if (S_ISDIR(mode) &&
+ xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
+ return true;
+ /* symlink structure must pass verification. */
+ if (S_ISLNK(mode) &&
+ xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
+ return true;
+ break;
+ case XFS_DINODE_FMT_EXTENTS:
+ if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
+ XFS_DATA_FORK))
+ return true;
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
+ XFS_DATA_FORK))
+ return true;
+ break;
+ default:
+ return true;
+ }
+
+ return false;
+}
+
+static void
+xrep_dinode_set_data_nextents(
+ struct xfs_dinode *dip,
+ xfs_extnum_t nextents)
+{
+ if (xfs_dinode_has_large_extent_counts(dip))
+ dip->di_big_nextents = cpu_to_be64(nextents);
+ else
+ dip->di_nextents = cpu_to_be32(nextents);
+}
+
+static void
+xrep_dinode_set_attr_nextents(
+ struct xfs_dinode *dip,
+ xfs_extnum_t nextents)
+{
+ if (xfs_dinode_has_large_extent_counts(dip))
+ dip->di_big_anextents = cpu_to_be32(nextents);
+ else
+ dip->di_anextents = cpu_to_be16(nextents);
+}
+
+/* Reset the data fork to something sane. */
+STATIC void
+xrep_dinode_zap_dfork(
+ struct xrep_inode *ri,
+ struct xfs_dinode *dip,
+ uint16_t mode)
+{
+ struct xfs_scrub *sc = ri->sc;
+
+ trace_xrep_dinode_zap_dfork(sc, dip);
+
+ ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
+
+ xrep_dinode_set_data_nextents(dip, 0);
+ ri->data_blocks = 0;
+ ri->rt_blocks = 0;
+
+ /* Special files always get reset to DEV */
+ switch (mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFSOCK:
+ dip->di_format = XFS_DINODE_FMT_DEV;
+ dip->di_size = 0;
+ return;
+ }
+
+ /*
+ * If we have data extents, reset to an empty map and hope the user
+ * will run the bmapbtd checker next.
+ */
+ if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
+ dip->di_format = XFS_DINODE_FMT_EXTENTS;
+ return;
+ }
+
+ /* Otherwise, reset the local format to the minimum. */
+ switch (mode & S_IFMT) {
+ case S_IFLNK:
+ xrep_dinode_zap_symlink(ri, dip);
+ break;
+ case S_IFDIR:
+ xrep_dinode_zap_dir(ri, dip);
+ break;
+ }
+}
+
+/*
+ * Check the attr fork for things that will fail the ifork verifiers or the
+ * ifork formatters.
+ */
+STATIC bool
+xrep_dinode_check_afork(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip)
+{
+ struct xfs_attr_sf_hdr *afork_ptr;
+ size_t attr_size;
+ unsigned int afork_size;
+
+ if (XFS_DFORK_BOFF(dip) == 0)
+ return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
+ xfs_dfork_attr_extents(dip) != 0;
+
+ afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
+ afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
+
+ switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
+ case XFS_DINODE_FMT_LOCAL:
+ /* Fork has to be large enough to extract the xattr size. */
+ if (afork_size < sizeof(struct xfs_attr_sf_hdr))
+ return true;
+
+ /* xattr structure cannot be larger than the fork */
+ attr_size = be16_to_cpu(afork_ptr->totsize);
+ if (attr_size > afork_size)
+ return true;
+
+ /* xattr structure must pass verification. */
+ return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
+ case XFS_DINODE_FMT_EXTENTS:
+ if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
+ XFS_ATTR_FORK))
+ return true;
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
+ XFS_ATTR_FORK))
+ return true;
+ break;
+ default:
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Reset the attr fork to empty. Since the attr fork could have contained
+ * ACLs, make the file readable only by root.
+ */
+STATIC void
+xrep_dinode_zap_afork(
+ struct xrep_inode *ri,
+ struct xfs_dinode *dip,
+ uint16_t mode)
+{
+ struct xfs_scrub *sc = ri->sc;
+
+ trace_xrep_dinode_zap_afork(sc, dip);
+
+ ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
+
+ dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
+ xrep_dinode_set_attr_nextents(dip, 0);
+ ri->attr_blocks = 0;
+
+ /*
+ * If the data fork is in btree format, removing the attr fork entirely
+ * might cause verifier failures if the next level down in the bmbt
+ * could now fit in the data fork area.
+ */
+ if (dip->di_format != XFS_DINODE_FMT_BTREE)
+ dip->di_forkoff = 0;
+ dip->di_mode = cpu_to_be16(mode & ~0777);
+ dip->di_uid = 0;
+ dip->di_gid = 0;
+}
+
+/* Make sure the fork offset is a sensible value. */
+STATIC void
+xrep_dinode_ensure_forkoff(
+ struct xrep_inode *ri,
+ struct xfs_dinode *dip,
+ uint16_t mode)
+{
+ struct xfs_bmdr_block *bmdr;
+ struct xfs_scrub *sc = ri->sc;
+ xfs_extnum_t attr_extents, data_extents;
+ size_t bmdr_minsz = XFS_BMDR_SPACE_CALC(1);
+ unsigned int lit_sz = XFS_LITINO(sc->mp);
+ unsigned int afork_min, dfork_min;
+
+ trace_xrep_dinode_ensure_forkoff(sc, dip);
+
+ /*
+ * Before calling this function, xrep_dinode_core ensured that both
+ * forks actually fit inside their respective literal areas. If this
+ * was not the case, the fork was reset to FMT_EXTENTS with zero
+ * records. If the rmapbt scan found attr or data fork blocks, this
+ * will be noted in the dinode_stats, and we must leave enough room
+ * for the bmap repair code to reconstruct the mapping structure.
+ *
+ * First, compute the minimum space required for the attr fork.
+ */
+ switch (dip->di_aformat) {
+ case XFS_DINODE_FMT_LOCAL:
+ /*
+ * If we still have a shortform xattr structure at all, that
+ * means the attr fork area was exactly large enough to fit
+ * the sf structure.
+ */
+ afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
+ break;
+ case XFS_DINODE_FMT_EXTENTS:
+ attr_extents = xfs_dfork_attr_extents(dip);
+ if (attr_extents) {
+ /*
+ * We must maintain sufficient space to hold the entire
+ * extent map array in the data fork. Note that we
+ * previously zapped the fork if it had no chance of
+ * fitting in the inode.
+ */
+ afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
+ } else if (ri->attr_extents > 0) {
+ /*
+ * The attr fork thinks it has zero extents, but we
+ * found some xattr extents. We need to leave enough
+ * empty space here so that the incore attr fork will
+ * get created (and hence trigger the attr fork bmap
+ * repairer).
+ */
+ afork_min = bmdr_minsz;
+ } else {
+ /* No extents on disk or found in rmapbt. */
+ afork_min = 0;
+ }
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ /* Must have space for btree header and key/pointers. */
+ bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
+ afork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
+ break;
+ default:
+ /* We should never see any other formats. */
+ afork_min = 0;
+ break;
+ }
+
+ /* Compute the minimum space required for the data fork. */
+ switch (dip->di_format) {
+ case XFS_DINODE_FMT_DEV:
+ dfork_min = sizeof(__be32);
+ break;
+ case XFS_DINODE_FMT_UUID:
+ dfork_min = sizeof(uuid_t);
+ break;
+ case XFS_DINODE_FMT_LOCAL:
+ /*
+ * If we still have a shortform data fork at all, that means
+ * the data fork area was large enough to fit whatever was in
+ * there.
+ */
+ dfork_min = be64_to_cpu(dip->di_size);
+ break;
+ case XFS_DINODE_FMT_EXTENTS:
+ data_extents = xfs_dfork_data_extents(dip);
+ if (data_extents) {
+ /*
+ * We must maintain sufficient space to hold the entire
+ * extent map array in the data fork. Note that we
+ * previously zapped the fork if it had no chance of
+ * fitting in the inode.
+ */
+ dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
+ } else if (ri->data_extents > 0 || ri->rt_extents > 0) {
+ /*
+ * The data fork thinks it has zero extents, but we
+ * found some data extents. We need to leave enough
+ * empty space here so that the data fork bmap repair
+ * will recover the mappings.
+ */
+ dfork_min = bmdr_minsz;
+ } else {
+ /* No extents on disk or found in rmapbt. */
+ dfork_min = 0;
+ }
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ /* Must have space for btree header and key/pointers. */
+ bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ dfork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
+ break;
+ default:
+ dfork_min = 0;
+ break;
+ }
+
+ /*
+ * Round all values up to the nearest 8 bytes, because that is the
+ * precision of di_forkoff.
+ */
+ afork_min = roundup(afork_min, 8);
+ dfork_min = roundup(dfork_min, 8);
+ bmdr_minsz = roundup(bmdr_minsz, 8);
+
+ ASSERT(dfork_min <= lit_sz);
+ ASSERT(afork_min <= lit_sz);
+
+ /*
+ * If the data fork was zapped and we don't have enough space for the
+ * recovery fork, move the attr fork up.
+ */
+ if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
+ xfs_dfork_data_extents(dip) == 0 &&
+ (ri->data_extents > 0 || ri->rt_extents > 0) &&
+ bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
+ if (bmdr_minsz + afork_min > lit_sz) {
+ /*
+ * The attr for and the stub fork we need to recover
+ * the data fork won't both fit. Zap the attr fork.
+ */
+ xrep_dinode_zap_afork(ri, dip, mode);
+ afork_min = bmdr_minsz;
+ } else {
+ void *before, *after;
+
+ /* Otherwise, just slide the attr fork up. */
+ before = XFS_DFORK_APTR(dip);
+ dip->di_forkoff = bmdr_minsz >> 3;
+ after = XFS_DFORK_APTR(dip);
+ memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
+ }
+ }
+
+ /*
+ * If the attr fork was zapped and we don't have enough space for the
+ * recovery fork, move the attr fork down.
+ */
+ if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
+ xfs_dfork_attr_extents(dip) == 0 &&
+ ri->attr_extents > 0 &&
+ bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
+ if (dip->di_format == XFS_DINODE_FMT_BTREE) {
+ /*
+ * If the data fork is in btree format then we can't
+ * adjust forkoff because that runs the risk of
+ * violating the extents/btree format transition rules.
+ */
+ } else if (bmdr_minsz + dfork_min > lit_sz) {
+ /*
+ * If we can't move the attr fork, too bad, we lose the
+ * attr fork and leak its blocks.
+ */
+ xrep_dinode_zap_afork(ri, dip, mode);
+ } else {
+ /*
+ * Otherwise, just slide the attr fork down. The attr
+ * fork is empty, so we don't have any old contents to
+ * move here.
+ */
+ dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
+ }
+ }
+}
+
+/*
+ * Zap the data/attr forks if we spot anything that isn't going to pass the
+ * ifork verifiers or the ifork formatters, because we need to get the inode
+ * into good enough shape that the higher level repair functions can run.
+ */
+STATIC void
+xrep_dinode_zap_forks(
+ struct xrep_inode *ri,
+ struct xfs_dinode *dip)
+{
+ struct xfs_scrub *sc = ri->sc;
+ xfs_extnum_t data_extents;
+ xfs_extnum_t attr_extents;
+ xfs_filblks_t nblocks;
+ uint16_t mode;
+ bool zap_datafork = false;
+ bool zap_attrfork = ri->zap_acls;
+
+ trace_xrep_dinode_zap_forks(sc, dip);
+
+ mode = be16_to_cpu(dip->di_mode);
+
+ data_extents = xfs_dfork_data_extents(dip);
+ attr_extents = xfs_dfork_attr_extents(dip);
+ nblocks = be64_to_cpu(dip->di_nblocks);
+
+ /* Inode counters don't make sense? */
+ if (data_extents > nblocks)
+ zap_datafork = true;
+ if (attr_extents > nblocks)
+ zap_attrfork = true;
+ if (data_extents + attr_extents > nblocks)
+ zap_datafork = zap_attrfork = true;
+
+ if (!zap_datafork)
+ zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
+ if (!zap_attrfork)
+ zap_attrfork = xrep_dinode_check_afork(sc, dip);
+
+ /* Zap whatever's bad. */
+ if (zap_attrfork)
+ xrep_dinode_zap_afork(ri, dip, mode);
+ if (zap_datafork)
+ xrep_dinode_zap_dfork(ri, dip, mode);
+ xrep_dinode_ensure_forkoff(ri, dip, mode);
+
+ /*
+ * Zero di_nblocks if we don't have any extents at all to satisfy the
+ * buffer verifier.
+ */
+ data_extents = xfs_dfork_data_extents(dip);
+ attr_extents = xfs_dfork_attr_extents(dip);
+ if (data_extents + attr_extents == 0)
+ dip->di_nblocks = 0;
+}
+
+/* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
+STATIC int
+xrep_dinode_core(
+ struct xrep_inode *ri)
+{
+ struct xfs_scrub *sc = ri->sc;
+ struct xfs_buf *bp;
+ struct xfs_dinode *dip;
+ xfs_ino_t ino = sc->sm->sm_ino;
+ int error;
+ int iget_error;
+
+ /* Figure out what this inode had mapped in both forks. */
+ error = xrep_dinode_count_rmaps(ri);
+ if (error)
+ return error;
+
+ /* Read the inode cluster buffer. */
+ error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
+ ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
+ NULL);
+ if (error)
+ return error;
+
+ /* Make sure we can pass the inode buffer verifier. */
+ xrep_dinode_buf(sc, bp);
+ bp->b_ops = &xfs_inode_buf_ops;
+
+ /* Fix everything the verifier will complain about. */
+ dip = xfs_buf_offset(bp, ri->imap.im_boffset);
+ xrep_dinode_header(sc, dip);
+ xrep_dinode_mode(ri, dip);
+ xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
+ xrep_dinode_size(ri, dip);
+ xrep_dinode_extsize_hints(sc, dip);
+ xrep_dinode_zap_forks(ri, dip);
+
+ /* Write out the inode. */
+ trace_xrep_dinode_fixed(sc, dip);
+ xfs_dinode_calc_crc(sc->mp, dip);
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
+ xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
+ ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
+
+ /*
+ * In theory, we've fixed the ondisk inode record enough that we should
+ * be able to load the inode into the cache. Try to iget that inode
+ * now while we hold the AGI and the inode cluster buffer and take the
+ * IOLOCK so that we can continue with repairs without anyone else
+ * accessing the inode. If iget fails, we still need to commit the
+ * changes.
+ */
+ iget_error = xchk_iget(sc, ino, &sc->ip);
+ if (!iget_error)
+ xchk_ilock(sc, XFS_IOLOCK_EXCL);
+
+ /*
+ * Commit the inode cluster buffer updates and drop the AGI buffer that
+ * we've been holding since scrub setup. From here on out, repairs
+ * deal only with the cached inode.
+ */
+ error = xrep_trans_commit(sc);
+ if (error)
+ return error;
+
+ if (iget_error)
+ return iget_error;
+
+ error = xchk_trans_alloc(sc, 0);
+ if (error)
+ return error;
+
+ error = xrep_ino_dqattach(sc);
+ if (error)
+ return error;
+
+ xchk_ilock(sc, XFS_ILOCK_EXCL);
+ if (ri->ino_sick_mask)
+ xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
+ return 0;
+}
+
+/* Fix everything xfs_dinode_verify cares about. */
+STATIC int
+xrep_dinode_problems(
+ struct xrep_inode *ri)
+{
+ struct xfs_scrub *sc = ri->sc;
+ int error;
+
+ error = xrep_dinode_core(ri);
+ if (error)
+ return error;
+
+ /* We had to fix a totally busted inode, schedule quotacheck. */
+ if (XFS_IS_UQUOTA_ON(sc->mp))
+ xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
+ if (XFS_IS_GQUOTA_ON(sc->mp))
+ xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
+ if (XFS_IS_PQUOTA_ON(sc->mp))
+ xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
+
+ return 0;
+}
+
+/*
+ * Fix problems that the verifiers don't care about. In general these are
+ * errors that don't cause problems elsewhere in the kernel that we can easily
+ * detect, so we don't check them all that rigorously.
+ */
+
+/* Make sure block and extent counts are ok. */
+STATIC int
+xrep_inode_blockcounts(
+ struct xfs_scrub *sc)
+{
+ struct xfs_ifork *ifp;
+ xfs_filblks_t count;
+ xfs_filblks_t acount;
+ xfs_extnum_t nextents;
+ int error;
+
+ trace_xrep_inode_blockcounts(sc);
+
+ /* Set data fork counters from the data fork mappings. */
+ error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
+ &nextents, &count);
+ if (error)
+ return error;
+ if (xfs_is_reflink_inode(sc->ip)) {
+ /*
+ * data fork blockcount can exceed physical storage if a user
+ * reflinks the same block over and over again.
+ */
+ ;
+ } else if (XFS_IS_REALTIME_INODE(sc->ip)) {
+ if (count >= sc->mp->m_sb.sb_rblocks)
+ return -EFSCORRUPTED;
+ } else {
+ if (count >= sc->mp->m_sb.sb_dblocks)
+ return -EFSCORRUPTED;
+ }
+ error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
+ if (error)
+ return error;
+ sc->ip->i_df.if_nextents = nextents;
+
+ /* Set attr fork counters from the attr fork mappings. */
+ ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
+ if (ifp) {
+ error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
+ &nextents, &acount);
+ if (error)
+ return error;
+ if (count >= sc->mp->m_sb.sb_dblocks)
+ return -EFSCORRUPTED;
+ error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
+ nextents);
+ if (error)
+ return error;
+ ifp->if_nextents = nextents;
+ } else {
+ acount = 0;
+ }
+
+ sc->ip->i_nblocks = count + acount;
+ return 0;
+}
+
+/* Check for invalid uid/gid/prid. */
+STATIC void
+xrep_inode_ids(
+ struct xfs_scrub *sc)
+{
+ bool dirty = false;
+
+ trace_xrep_inode_ids(sc);
+
+ if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
+ i_uid_write(VFS_I(sc->ip), 0);
+ dirty = true;
+ if (XFS_IS_UQUOTA_ON(sc->mp))
+ xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
+ }
+
+ if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
+ i_gid_write(VFS_I(sc->ip), 0);
+ dirty = true;
+ if (XFS_IS_GQUOTA_ON(sc->mp))
+ xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
+ }
+
+ if (sc->ip->i_projid == -1U) {
+ sc->ip->i_projid = 0;
+ dirty = true;
+ if (XFS_IS_PQUOTA_ON(sc->mp))
+ xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
+ }
+
+ /* strip setuid/setgid if we touched any of the ids */
+ if (dirty)
+ VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
+}
+
+static inline void
+xrep_clamp_timestamp(
+ struct xfs_inode *ip,
+ struct timespec64 *ts)
+{
+ ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
+ *ts = timestamp_truncate(*ts, VFS_I(ip));
+}
+
+/* Nanosecond counters can't have more than 1 billion. */
+STATIC void
+xrep_inode_timestamps(
+ struct xfs_inode *ip)
+{
+ struct timespec64 tstamp;
+ struct inode *inode = VFS_I(ip);
+
+ tstamp = inode_get_atime(inode);
+ xrep_clamp_timestamp(ip, &tstamp);
+ inode_set_atime_to_ts(inode, tstamp);
+
+ tstamp = inode_get_mtime(inode);
+ xrep_clamp_timestamp(ip, &tstamp);
+ inode_set_mtime_to_ts(inode, tstamp);
+
+ tstamp = inode_get_ctime(inode);
+ xrep_clamp_timestamp(ip, &tstamp);
+ inode_set_ctime_to_ts(inode, tstamp);
+
+ xrep_clamp_timestamp(ip, &ip->i_crtime);
+}
+
+/* Fix inode flags that don't make sense together. */
+STATIC void
+xrep_inode_flags(
+ struct xfs_scrub *sc)
+{
+ uint16_t mode;
+
+ trace_xrep_inode_flags(sc);
+
+ mode = VFS_I(sc->ip)->i_mode;
+
+ /* Clear junk flags */
+ if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
+ sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
+
+ /* NEWRTBM only applies to realtime bitmaps */
+ if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
+ sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
+ else
+ sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
+
+ /* These only make sense for directories. */
+ if (!S_ISDIR(mode))
+ sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
+ XFS_DIFLAG_EXTSZINHERIT |
+ XFS_DIFLAG_PROJINHERIT |
+ XFS_DIFLAG_NOSYMLINKS);
+
+ /* These only make sense for files. */
+ if (!S_ISREG(mode))
+ sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
+ XFS_DIFLAG_EXTSIZE);
+
+ /* These only make sense for non-rt files. */
+ if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
+ sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
+
+ /* Immutable and append only? Drop the append. */
+ if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
+ (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
+ sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
+
+ /* Clear junk flags. */
+ if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
+ sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
+
+ /* No reflink flag unless we support it and it's a file. */
+ if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
+ sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
+
+ /* DAX only applies to files and dirs. */
+ if (!(S_ISREG(mode) || S_ISDIR(mode)))
+ sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
+
+ /* No reflink files on the realtime device. */
+ if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
+ sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
+}
+
+/*
+ * Fix size problems with block/node format directories. If we fail to find
+ * the extent list, just bail out and let the bmapbtd repair functions clean
+ * up that mess.
+ */
+STATIC void
+xrep_inode_blockdir_size(
+ struct xfs_scrub *sc)
+{
+ struct xfs_iext_cursor icur;
+ struct xfs_bmbt_irec got;
+ struct xfs_ifork *ifp;
+ xfs_fileoff_t off;
+ int error;
+
+ trace_xrep_inode_blockdir_size(sc);
+
+ error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
+ if (error)
+ return;
+
+ /* Find the last block before 32G; this is the dir size. */
+ ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
+ off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
+ if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
+ /* zero-extents directory? */
+ return;
+ }
+
+ off = got.br_startoff + got.br_blockcount;
+ sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
+ XFS_FSB_TO_B(sc->mp, off));
+}
+
+/* Fix size problems with short format directories. */
+STATIC void
+xrep_inode_sfdir_size(
+ struct xfs_scrub *sc)
+{
+ struct xfs_ifork *ifp;
+
+ trace_xrep_inode_sfdir_size(sc);
+
+ ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
+ sc->ip->i_disk_size = ifp->if_bytes;
+}
+
+/*
+ * Fix any irregularities in a directory inode's size now that we can iterate
+ * extent maps and access other regular inode data.
+ */
+STATIC void
+xrep_inode_dir_size(
+ struct xfs_scrub *sc)
+{
+ trace_xrep_inode_dir_size(sc);
+
+ switch (sc->ip->i_df.if_format) {
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
+ xrep_inode_blockdir_size(sc);
+ break;
+ case XFS_DINODE_FMT_LOCAL:
+ xrep_inode_sfdir_size(sc);
+ break;
+ }
+}
+
+/* Fix extent size hint problems. */
+STATIC void
+xrep_inode_extsize(
+ struct xfs_scrub *sc)
+{
+ /* Fix misaligned extent size hints on a directory. */
+ if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
+ xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
+ sc->ip->i_extsize = 0;
+ sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
+ }
+}
+
+/* Fix any irregularities in an inode that the verifiers don't catch. */
+STATIC int
+xrep_inode_problems(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ error = xrep_inode_blockcounts(sc);
+ if (error)
+ return error;
+ xrep_inode_timestamps(sc->ip);
+ xrep_inode_flags(sc);
+ xrep_inode_ids(sc);
+ /*
+ * We can now do a better job fixing the size of a directory now that
+ * we can scan the data fork extents than we could in xrep_dinode_size.
+ */
+ if (S_ISDIR(VFS_I(sc->ip)->i_mode))
+ xrep_inode_dir_size(sc);
+ xrep_inode_extsize(sc);
+
+ trace_xrep_inode_fixed(sc);
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+ return xrep_roll_trans(sc);
+}
+
+/* Repair an inode's fields. */
+int
+xrep_inode(
+ struct xfs_scrub *sc)
+{
+ int error = 0;
+
+ /*
+ * No inode? That means we failed the _iget verifiers. Repair all
+ * the things that the inode verifiers care about, then retry _iget.
+ */
+ if (!sc->ip) {
+ struct xrep_inode *ri = sc->buf;
+
+ ASSERT(ri != NULL);
+
+ error = xrep_dinode_problems(ri);
+ if (error)
+ return error;
+
+ /* By this point we had better have a working incore inode. */
+ if (!sc->ip)
+ return -EFSCORRUPTED;
+ }
+
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ /* If we found corruption of any kind, try to fix it. */
+ if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
+ (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
+ error = xrep_inode_problems(sc);
+ if (error)
+ return error;
+ }
+
+ /* See if we can clear the reflink flag. */
+ if (xfs_is_reflink_inode(sc->ip)) {
+ error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
+ if (error)
+ return error;
+ }
+
+ return xrep_defer_finish(sc);
+}