// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_trace.h" #include "xfs_bmap.h" #include "xfs_trans.h" #include "xfs_error.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/readdir.h" /* Call a function for every entry in a shortform directory. */ STATIC int xchk_dir_walk_sf( struct xfs_scrub *sc, struct xfs_inode *dp, xchk_dirent_fn dirent_fn, void *priv) { struct xfs_name name = { .name = ".", .len = 1, .type = XFS_DIR3_FT_DIR, }; struct xfs_mount *mp = dp->i_mount; struct xfs_da_geometry *geo = mp->m_dir_geo; struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; xfs_ino_t ino; xfs_dir2_dataptr_t dapos; unsigned int i; int error; ASSERT(dp->i_df.if_bytes == dp->i_disk_size); ASSERT(sfp != NULL); /* dot entry */ dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, geo->data_entry_offset); error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv); if (error) return error; /* dotdot entry */ dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, geo->data_entry_offset + xfs_dir2_data_entsize(mp, sizeof(".") - 1)); ino = xfs_dir2_sf_get_parent_ino(sfp); name.name = ".."; name.len = 2; error = dirent_fn(sc, dp, dapos, &name, ino, priv); if (error) return error; /* iterate everything else */ sfep = xfs_dir2_sf_firstentry(sfp); for (i = 0; i < sfp->count; i++) { dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, xfs_dir2_sf_get_offset(sfep)); ino = xfs_dir2_sf_get_ino(mp, sfp, sfep); name.name = sfep->name; name.len = sfep->namelen; name.type = xfs_dir2_sf_get_ftype(mp, sfep); error = dirent_fn(sc, dp, dapos, &name, ino, priv); if (error) return error; sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); } return 0; } /* Call a function for every entry in a block directory. */ STATIC int xchk_dir_walk_block( struct xfs_scrub *sc, struct xfs_inode *dp, xchk_dirent_fn dirent_fn, void *priv) { struct xfs_mount *mp = dp->i_mount; struct xfs_da_geometry *geo = mp->m_dir_geo; struct xfs_buf *bp; unsigned int off, next_off, end; int error; error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp); if (error) return error; /* Walk each directory entry. */ end = xfs_dir3_data_end_offset(geo, bp->b_addr); for (off = geo->data_entry_offset; off < end; off = next_off) { struct xfs_name name = { }; struct xfs_dir2_data_unused *dup = bp->b_addr + off; struct xfs_dir2_data_entry *dep = bp->b_addr + off; xfs_ino_t ino; xfs_dir2_dataptr_t dapos; /* Skip an empty entry. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { next_off = off + be16_to_cpu(dup->length); continue; } /* Otherwise, find the next entry and report it. */ next_off = off + xfs_dir2_data_entsize(mp, dep->namelen); if (next_off > end) break; dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off); ino = be64_to_cpu(dep->inumber); name.name = dep->name; name.len = dep->namelen; name.type = xfs_dir2_data_get_ftype(mp, dep); error = dirent_fn(sc, dp, dapos, &name, ino, priv); if (error) break; } xfs_trans_brelse(sc->tp, bp); return error; } /* Read a leaf-format directory buffer. */ STATIC int xchk_read_leaf_dir_buf( struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_da_geometry *geo, xfs_dir2_off_t *curoff, struct xfs_buf **bpp) { struct xfs_iext_cursor icur; struct xfs_bmbt_irec map; struct xfs_ifork *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK); xfs_dablk_t last_da; xfs_dablk_t map_off; xfs_dir2_off_t new_off; *bpp = NULL; /* * Look for mapped directory blocks at or above the current offset. * Truncate down to the nearest directory block to start the scanning * operation. */ last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff)); if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map)) return 0; if (map.br_startoff >= last_da) return 0; xfs_trim_extent(&map, map_off, last_da - map_off); /* Read the directory block of that first mapping. */ new_off = xfs_dir2_da_to_byte(geo, map.br_startoff); if (new_off > *curoff) *curoff = new_off; return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp); } /* Call a function for every entry in a leaf directory. */ STATIC int xchk_dir_walk_leaf( struct xfs_scrub *sc, struct xfs_inode *dp, xchk_dirent_fn dirent_fn, void *priv) { struct xfs_mount *mp = dp->i_mount; struct xfs_da_geometry *geo = mp->m_dir_geo; struct xfs_buf *bp = NULL; xfs_dir2_off_t curoff = 0; unsigned int offset = 0; int error; /* Iterate every directory offset in this directory. */ while (curoff < XFS_DIR2_LEAF_OFFSET) { struct xfs_name name = { }; struct xfs_dir2_data_unused *dup; struct xfs_dir2_data_entry *dep; xfs_ino_t ino; unsigned int length; xfs_dir2_dataptr_t dapos; /* * If we have no buffer, or we're off the end of the * current buffer, need to get another one. */ if (!bp || offset >= geo->blksize) { if (bp) { xfs_trans_brelse(sc->tp, bp); bp = NULL; } error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff, &bp); if (error || !bp) break; /* * Find our position in the block. */ offset = geo->data_entry_offset; curoff += geo->data_entry_offset; } /* Skip an empty entry. */ dup = bp->b_addr + offset; if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { length = be16_to_cpu(dup->length); offset += length; curoff += length; continue; } /* Otherwise, find the next entry and report it. */ dep = bp->b_addr + offset; length = xfs_dir2_data_entsize(mp, dep->namelen); dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; ino = be64_to_cpu(dep->inumber); name.name = dep->name; name.len = dep->namelen; name.type = xfs_dir2_data_get_ftype(mp, dep); error = dirent_fn(sc, dp, dapos, &name, ino, priv); if (error) break; /* Advance to the next entry. */ offset += length; curoff += length; } if (bp) xfs_trans_brelse(sc->tp, bp); return error; } /* * Call a function for every entry in a directory. * * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. */ int xchk_dir_walk( struct xfs_scrub *sc, struct xfs_inode *dp, xchk_dirent_fn dirent_fn, void *priv) { struct xfs_da_args args = { .dp = dp, .geo = dp->i_mount->m_dir_geo, .trans = sc->tp, .owner = dp->i_ino, }; int error; if (xfs_is_shutdown(dp->i_mount)) return -EIO; ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); switch (xfs_dir2_format(&args, &error)) { case XFS_DIR2_FMT_SF: return xchk_dir_walk_sf(sc, dp, dirent_fn, priv); case XFS_DIR2_FMT_BLOCK: return xchk_dir_walk_block(sc, dp, dirent_fn, priv); case XFS_DIR2_FMT_LEAF: case XFS_DIR2_FMT_NODE: return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv); default: return error; } } /* * Look up the inode number for an exact name in a directory. * * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. Names are not * checked for correctness. */ int xchk_dir_lookup( struct xfs_scrub *sc, struct xfs_inode *dp, const struct xfs_name *name, xfs_ino_t *ino) { struct xfs_da_args args = { .dp = dp, .geo = dp->i_mount->m_dir_geo, .trans = sc->tp, .name = name->name, .namelen = name->len, .filetype = name->type, .hashval = xfs_dir2_hashname(dp->i_mount, name), .whichfork = XFS_DATA_FORK, .op_flags = XFS_DA_OP_OKNOENT, .owner = dp->i_ino, }; int error; if (xfs_is_shutdown(dp->i_mount)) return -EIO; /* * A temporary directory's block headers are written with the owner * set to sc->ip, so we must switch the owner here for the lookup. */ if (dp == sc->tempip) args.owner = sc->ip->i_ino; ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); error = xfs_dir_lookup_args(&args); if (!error) *ino = args.inumber; return error; } /* * Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock * state. The caller may have a transaction, so we must use trylock for both * IOLOCKs. */ static inline unsigned int xchk_dir_trylock_both( struct xfs_scrub *sc, struct xfs_inode *ip) { if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL)) return 0; if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) goto parent_iolock; xchk_ilock(sc, XFS_ILOCK_EXCL); if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) goto parent_ilock; return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL; parent_ilock: xchk_iunlock(sc, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_SHARED); parent_iolock: xchk_iunlock(sc, XFS_IOLOCK_EXCL); return 0; } /* * Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target * (@sc->ip) and the inode at the other end (@ip) of a directory or parent * pointer link so that we can check that link. * * We do not know ahead of time that the directory tree is /not/ corrupt, so we * cannot use the "lock two inode" functions because we do not know that there * is not a racing thread trying to take the locks in opposite order. First * take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED * of @ip to synchronize with the VFS. Next, take ILOCK_EXCL of the scrub * target and @ip to synchronize with XFS. * * If the trylocks succeed, *lockmode will be set to the locks held for @ip; * @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will * be returned. If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if * XCHK_TRY_HARDER was set. Returns -EINTR if the process has been killed. */ int xchk_dir_trylock_for_pptrs( struct xfs_scrub *sc, struct xfs_inode *ip, unsigned int *lockmode) { unsigned int nr; int error = 0; ASSERT(sc->ilock_flags == 0); for (nr = 0; nr < HZ; nr++) { *lockmode = xchk_dir_trylock_both(sc, ip); if (*lockmode) return 0; if (xchk_should_terminate(sc, &error)) return error; delay(1); } if (sc->flags & XCHK_TRY_HARDER) { xchk_set_incomplete(sc); return -ETIMEDOUT; } return -EDEADLOCK; }